Review Board 1.7.22


hbase-3872 Hole in split transaction rollback; edits to .META. need to be rolled back even if it seems like they didn't make it

Review Request #1097 - Created July 13, 2011 and updated

Michael Stack
hbase-3872
Reviewers
hbase
hbase-git
Fix is two-fold.

1. Fix catalogjanitor so that it does not remove parent if daughter regions are not present.
2. Fix the SplitTransaction so that if we go past the point of no return, DO NOT REMOVE daughter regions as part of cleanup; leave them in place.  Because we went past PONR, we are going to abort and server shutdown processing has what it needs to do fixup.
Added two new unit tests.  One to test we do not remove parent if no daughter region in fs and another to test that the right answer pops out of the call to rollback if we go past PONR (and that the daughter regions are still in place after rollback).
src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
Revision 4a35d09 New Change
[20] 41 lines
[+20]
42
import org.apache.hadoop.hbase.catalog.MetaReader;
42
import org.apache.hadoop.hbase.catalog.MetaReader;
43
import org.apache.hadoop.hbase.client.Result;
43
import org.apache.hadoop.hbase.client.Result;
44
import org.apache.hadoop.hbase.regionserver.HRegion;
44
import org.apache.hadoop.hbase.regionserver.HRegion;
45
import org.apache.hadoop.hbase.regionserver.Store;
45
import org.apache.hadoop.hbase.regionserver.Store;
46
import org.apache.hadoop.hbase.regionserver.StoreFile;
46
import org.apache.hadoop.hbase.regionserver.StoreFile;

    
   
47
import org.apache.hadoop.hbase.util.Pair;
47
import org.apache.hadoop.hbase.util.Bytes;
48
import org.apache.hadoop.hbase.util.Bytes;
48
import org.apache.hadoop.hbase.util.Writables;
49
import org.apache.hadoop.hbase.util.Writables;
49

    
   
50

   
50

    
   
51

   
51
/**
52
/**
[+20] [20] 107 lines
[+20] [+] static HRegionInfo getHRegionInfo(final Result result)
159
   * <code>metaRegionName</code>
160
   * <code>metaRegionName</code>
160
   * @return True if we removed <code>parent</code> from meta table and from
161
   * @return True if we removed <code>parent</code> from meta table and from
161
   * the filesystem.
162
   * the filesystem.
162
   * @throws IOException
163
   * @throws IOException
163
   */
164
   */
164
  boolean cleanParent(final HRegionInfo parent,
165
  boolean cleanParent(final HRegionInfo parent, Result rowContent)
165
    Result rowContent)

   
166
  throws IOException {
166
  throws IOException {
167
    boolean result = false;
167
    boolean result = false;
168
    // Run checks on each daughter split.
168
    // Run checks on each daughter split.
169
    boolean hasReferencesA =
169
    Pair<Boolean, Boolean> a =
170
      checkDaughter(parent, rowContent, HConstants.SPLITA_QUALIFIER);
170
      checkDaughter(parent, rowContent, HConstants.SPLITA_QUALIFIER);
171
    boolean hasReferencesB =
171
    Pair<Boolean, Boolean> b =
172
      checkDaughter(parent, rowContent, HConstants.SPLITB_QUALIFIER);
172
      checkDaughter(parent, rowContent, HConstants.SPLITB_QUALIFIER);
173
    if (!hasReferencesA && !hasReferencesB) {
173
    if ((a.getFirst() && !a.getSecond()) && (b.getFirst() && !b.getSecond())) {
174
      LOG.debug("Deleting region " + parent.getRegionNameAsString() +
174
      LOG.debug("Deleting region " + parent.getRegionNameAsString() +
175
        " because daughter splits no longer hold references");
175
        " because daughter splits no longer hold references");
176
      // This latter regionOffline should not be necessary but is done for now
176
      // This latter regionOffline should not be necessary but is done for now
177
      // until we let go of regionserver to master heartbeats.  See HBASE-3368.
177
      // until we let go of regionserver to master heartbeats.  See HBASE-3368.
178
      if (this.services.getAssignmentManager() != null) {
178
      if (this.services.getAssignmentManager() != null) {
[+20] [20] 16 lines
[+20] static HRegionInfo getHRegionInfo(final Result result)
195
   * and if not, remove the note of daughter region in the parent row: its
195
   * and if not, remove the note of daughter region in the parent row: its
196
   * column info:splitA or info:splitB.
196
   * column info:splitA or info:splitB.
197
   * @param parent
197
   * @param parent
198
   * @param rowContent
198
   * @param rowContent
199
   * @param qualifier
199
   * @param qualifier
200
   * @return True if this daughter still has references to the parent.
200
   * @return A pair where the first boolean says whether or not the daughter

    
   
201
   * region directory exists in the filesystem and then the second boolean says

    
   
202
   * whether the daughter has references to the parent.
201
   * @throws IOException
203
   * @throws IOException
202
   */
204
   */
203
  boolean checkDaughter(final HRegionInfo parent,
205
  Pair<Boolean, Boolean> checkDaughter(final HRegionInfo parent,
204
    final Result rowContent, final byte [] qualifier)
206
    final Result rowContent, final byte [] qualifier)
205
  throws IOException {
207
  throws IOException {
206
    HRegionInfo hri = getDaughterRegionInfo(rowContent, qualifier);
208
    HRegionInfo hri = getDaughterRegionInfo(rowContent, qualifier);
207
    return hasReferences(parent, rowContent, hri, qualifier);
209
    Pair<Boolean, Boolean> result =

    
   
210
      checkDaughterInFs(parent, rowContent, hri, qualifier);

    
   
211
    if (result.getFirst() && !result.getSecond()) {

    
   
212
      // Remove daughter from the parent IFF the daughter region exists in FS.

    
   
213
      // If there is no daughter region in the filesystem, must be because of

    
   
214
      // a failed split.  The ServerShutdownHandler will do the fixup.  Don't

    
   
215
      // do any deletes in here that could intefere with ServerShutdownHandler

    
   
216
      // fixup

    
   
217
      removeDaughterFromParent(parent, hri, qualifier);

    
   
218
    }

    
   
219
    return result;
208
  }
220
  }
209

    
   
221

   
210
  /**
222
  /**
211
   * Get daughter HRegionInfo out of parent info:splitA/info:splitB columns.
223
   * Get daughter HRegionInfo out of parent info:splitA/info:splitB columns.
212
   * @param result
224
   * @param result
[+20] [20] 27 lines
[+20] [+] private void removeDaughterFromParent(final HRegionInfo parent,
240
  }
252
  }
241

    
   
253

   
242
  /**
254
  /**
243
   * Checks if a daughter region -- either splitA or splitB -- still holds
255
   * Checks if a daughter region -- either splitA or splitB -- still holds
244
   * references to parent.  If not, removes reference to the split from
256
   * references to parent.  If not, removes reference to the split from
245
   * the parent meta region row so we don't check it any more.
257
   * the parent meta region row so we don't check it any more.  Also checks

    
   
258
   * daughter region exists in the filesytem.
246
   * @param parent Parent region name. 
259
   * @param parent Parent region name. 
247
   * @param rowContent Keyed content of the parent row in meta region.
260
   * @param rowContent Keyed content of the parent row in meta region.
248
   * @param split Which column family.
261
   * @param split Which column family.
249
   * @param qualifier Which of the daughters to look at, splitA or splitB.
262
   * @param qualifier Which of the daughters to look at, splitA or splitB.
250
   * @return True if still has references to parent.
263
   * @return A pair where the first boolean says whether or not the daughter

    
   
264
   * region directory exists in the filesystem and then the second boolean says

    
   
265
   * whether the daughter has references to the parent.
251
   * @throws IOException
266
   * @throws IOException
252
   */
267
   */
253
  boolean hasReferences(final HRegionInfo parent,
268
  Pair<Boolean, Boolean> checkDaughterInFs(final HRegionInfo parent,
254
    final Result rowContent, final HRegionInfo split,
269
    final Result rowContent, final HRegionInfo split,
255
    final byte [] qualifier)
270
    final byte [] qualifier)
256
  throws IOException {
271
  throws IOException {
257
    boolean result = false;
272
    boolean references = false;
258
    if (split == null)  return result;
273
    boolean exists = false;

    
   
274
    if (split == null)  {

    
   
275
      return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);

    
   
276
    }
259
    FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
277
    FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
260
    Path rootdir = this.services.getMasterFileSystem().getRootDir();
278
    Path rootdir = this.services.getMasterFileSystem().getRootDir();
261
    Path tabledir = new Path(rootdir, split.getTableNameAsString());
279
    Path tabledir = new Path(rootdir, split.getTableNameAsString());

    
   
280
    Path regiondir = new Path(tabledir, split.getEncodedName());

    
   
281
    exists = fs.exists(regiondir);

    
   
282
    if (!exists) {

    
   
283
      LOG.warn("Daughter regiondir does not exist: " + regiondir.toString());

    
   
284
      return new Pair<Boolean, Boolean>(exists, Boolean.FALSE);

    
   
285
    }
262
    HTableDescriptor parentDescriptor = getTableDescriptor(parent.getTableName());
286
    HTableDescriptor parentDescriptor = getTableDescriptor(parent.getTableName());
263

    
   
287

   
264
    for (HColumnDescriptor family: parentDescriptor.getFamilies()) {
288
    for (HColumnDescriptor family: parentDescriptor.getFamilies()) {
265
      Path p = Store.getStoreHomedir(tabledir, split.getEncodedName(),
289
      Path p = Store.getStoreHomedir(tabledir, split.getEncodedName(),
266
        family.getName());
290
        family.getName());
[+20] [20] 6 lines
[+20] [+] public boolean accept(Path path) {
273
            }
297
            }
274
          }
298
          }
275
      );
299
      );
276

    
   
300

   
277
      if (ps != null && ps.length > 0) {
301
      if (ps != null && ps.length > 0) {
278
        result = true;
302
        references = true;
279
        break;
303
        break;
280
      }
304
      }
281
    }
305
    }
282
    if (!result) {
306
    return new Pair<Boolean, Boolean>(Boolean.valueOf(exists),
283
      removeDaughterFromParent(parent, split, qualifier);
307
      Boolean.valueOf(references));
284
    }

   
285
    return result;

   
286
  }
308
  }
287

    
   
309

   
288
  private HTableDescriptor getTableDescriptor(byte[] tableName)
310
  private HTableDescriptor getTableDescriptor(byte[] tableName)
289
  throws TableExistsException, FileNotFoundException, IOException {
311
  throws TableExistsException, FileNotFoundException, IOException {
290
    return this.services.getTableDescriptors().get(Bytes.toString(tableName));
312
    return this.services.getTableDescriptors().get(Bytes.toString(tableName));
291
  }
313
  }
292
}
314
}
src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
Revision 7082342 New Change
 
src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Revision 977115d New Change
 
src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java
Revision c402b87 New Change
 
src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java
Revision 16e970e New Change
 
src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java
Revision ab88766 New Change
 
src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java
Revision 49333ac New Change
 
src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java
Revision 56fc0b8 New Change
 
src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransaction.java
Revision 98a7ee7 New Change
 
  1. src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java: Loading...
  2. src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java: Loading...
  3. src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java: Loading...
  4. src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java: Loading...
  5. src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java: Loading...
  6. src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java: Loading...
  7. src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java: Loading...
  8. src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java: Loading...
  9. src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransaction.java: Loading...