Review Board 1.7.22


HBASE-5801 [hbck] Hbck should handle case where some regions have different HTD settings in .regioninfo files (0.90 specific)

Review Request #4833 - Created April 20, 2012 and submitted

Jimmy Xiang
0.90
HBASE-5801
Reviewers
hbase
jmhsieh
hbase-git
Added option to fix inconsistent table descriptors:
1. sideline the current .regioninfo file
2. create a new one with HTD from HBaseAdmin (meta, first entry)
3. offline the region and wait till it assigned again
TestHBaseFsck* are green. On live cluster, it does the fix as expected. 
src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
Revision 50f9128 New Change
[20] 162 lines
[+20] [+] public class HBaseFsck {
163
  // limit fixes to listed tables, if empty atttempt to fix all
163
  // limit fixes to listed tables, if empty atttempt to fix all
164
  private List<byte[]> tablesToFix = new ArrayList<byte[]>();
164
  private List<byte[]> tablesToFix = new ArrayList<byte[]>();
165
  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
165
  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
166
  private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; // maximum number of overlapping regions to sideline
166
  private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; // maximum number of overlapping regions to sideline
167
  private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
167
  private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions

    
   
168
  private boolean fixTableDesc = false; // fix table descriptor inconsistency?
168

    
   
169

   
169
  private boolean rerun = false; // if we tried to fix something, rerun hbck
170
  private boolean rerun = false; // if we tried to fix something, rerun hbck
170
  private static boolean summary = false; // if we want to print less output
171
  private static boolean summary = false; // if we want to print less output
171
  private boolean checkMetaOnly = false;
172
  private boolean checkMetaOnly = false;
172

    
   
173

   
173
  /*********
174
  /*********
174
   * State
175
   * State
175
   *********/
176
   *********/
176
  private ErrorReporter errors = new PrintingErrorReporter();
177
  private ErrorReporter errors = new PrintingErrorReporter();

    
   
178
  private boolean multiTableDescFound = false; // to record if multi table descriptors found
177
  int fixes = 0;
179
  int fixes = 0;
178

    
   
180

   
179
  /**
181
  /**
180
   * This map contains the state of all hbck items.  It maps from encoded region
182
   * This map contains the state of all hbck items.  It maps from encoded region
181
   * name to HbckInfo structure.  The information contained in HbckInfo is used
183
   * name to HbckInfo structure.  The information contained in HbckInfo is used
[+20] [20] 79 lines
[+20] [+] private void clearState() {
261
    emptyRegionInfoQualifiers.clear();
263
    emptyRegionInfoQualifiers.clear();
262
    disabledTables.clear();
264
    disabledTables.clear();
263
    errors.clear();
265
    errors.clear();
264
    tablesInfo.clear();
266
    tablesInfo.clear();
265
    orphanHdfsDirs.clear();
267
    orphanHdfsDirs.clear();

    
   
268
    multiTableDescFound = false;
266
  }
269
  }
267

    
   
270

   
268
  /**
271
  /**
269
   * This repair method analyzes hbase data in hdfs and repairs it to satisfy
272
   * This repair method analyzes hbase data in hdfs and repairs it to satisfy
270
   * the table integrity rules.  HBase doesn't need to be online for this
273
   * the table integrity rules.  HBase doesn't need to be online for this
[+20] [20] 802 lines
[+20] [+] private void loadRegionDeployments(Collection<HServerInfo> regionServerList)
1073
  private void checkAndFixConsistency()
1076
  private void checkAndFixConsistency()
1074
  throws IOException, KeeperException, InterruptedException {
1077
  throws IOException, KeeperException, InterruptedException {
1075
    for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1078
    for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1076
      checkRegionConsistency(e.getKey(), e.getValue());
1079
      checkRegionConsistency(e.getKey(), e.getValue());
1077
    }
1080
    }

    
   
1081
    if (shouldFixTableDesc() && isMultiTableDescFound()) {

    
   
1082
      setShouldRerun();  // should re-run to verify it is fixed

    
   
1083
      for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {

    
   
1084
        fixTableDescConsistency(e.getKey(), e.getValue());

    
   
1085
      }

    
   
1086
    }
1078
  }
1087
  }
1079

    
   
1088

   
1080
  /**
1089
  /**
1081
   * Deletes region from meta table
1090
   * Deletes region from meta table
1082
   */
1091
   */
[+20] [20] 123 lines
[+20] [+] private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
1206
      HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
1215
      HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
1207
    }
1216
    }
1208
  }
1217
  }
1209

    
   
1218

   
1210
  /**
1219
  /**

    
   
1220
   * Check a single region for table desc consistency.

    
   
1221
   */

    
   
1222
  private void fixTableDescConsistency(final String key, final HbckInfo hbi)

    
   
1223
        throws IOException, KeeperException, InterruptedException {

    
   
1224
    String tableName = Bytes.toString(hbi.getTableName());

    
   
1225
    TableInfo tableInfo = tablesInfo.get(tableName);

    
   
1226
    Preconditions.checkNotNull("Table " + tableName + "' not present!", tableInfo);

    
   
1227
    if (tableInfo.htds.size() != 1) {

    
   
1228
      HTableDescriptor htd = tableInfo.getHTD();

    
   
1229
      HRegionInfo hri = hbi.getHdfsHRI();

    
   
1230
      if (!htd.equals(hri.getTableDesc())) {

    
   
1231
        // at first, sideline the current .regioninfo

    
   
1232
        Path regionDir = hbi.getHdfsRegionDir();

    
   
1233
        Path regioninfoPath = new Path(regionDir, HRegion.REGIONINFO_FILE);

    
   
1234
        Path sidelineTableDir= new Path(getSidelineDir(), tableName);

    
   
1235
        Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());

    
   
1236
        Path regioninfoSidelinPath = new Path(sidelineRegionDir, HRegion.REGIONINFO_FILE);

    
   
1237
        FileSystem fs = FileSystem.get(admin.getConfiguration());

    
   
1238
        fs.mkdirs(sidelineRegionDir);

    
   
1239
        boolean success = fs.rename(regioninfoPath, regioninfoSidelinPath);

    
   
1240
        if (!success) {

    
   
1241
          String msg = "Unable to rename file " + regioninfoPath +  " to " + regioninfoSidelinPath;

    
   
1242
          LOG.error(msg);

    
   
1243
          throw new IOException(msg);

    
   
1244
        }

    
   
1245

   

    
   
1246
        // then fix the table desc: create a new .regioninfo,

    
   
1247
        //   offline the region and wait till it's assigned again.

    
   
1248
        hri.setTableDesc(htd);

    
   
1249
        Path tmpDir = new Path(sidelineRegionDir, ".tmp");

    
   
1250
        Path tmpPath = new Path(tmpDir, HRegion.REGIONINFO_FILE);

    
   
1251
        HBaseFsckRepair.fixTableDesc(admin, hri, regioninfoPath, tmpPath);

    
   
1252
      }

    
   
1253
    }

    
   
1254
  }

    
   
1255

   

    
   
1256
  /**
1211
   * Check a single region for consistency and correct deployment.
1257
   * Check a single region for consistency and correct deployment.
1212
   */
1258
   */
1213
  private void checkRegionConsistency(final String key, final HbckInfo hbi)
1259
  private void checkRegionConsistency(final String key, final HbckInfo hbi)
1214
  throws IOException, KeeperException, InterruptedException {
1260
  throws IOException, KeeperException, InterruptedException {
1215
    String descriptiveName = hbi.toString();
1261
    String descriptiveName = hbi.toString();
[+20] [20] 274 lines
[+20] [+] public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
1490
   * Maintain information about a particular table.
1536
   * Maintain information about a particular table.
1491
   */
1537
   */
1492
  public class TableInfo {
1538
  public class TableInfo {
1493
    String tableName;
1539
    String tableName;
1494
    TreeSet <HServerAddress> deployedOn;
1540
    TreeSet <HServerAddress> deployedOn;

    
   
1541
    HTableDescriptor htdFromAdmin;
1495

    
   
1542

   
1496
    // backwards regions
1543
    // backwards regions
1497
    final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
1544
    final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
1498

    
   
1545

   
1499
    // sidelined big overlapped regions
1546
    // sidelined big overlapped regions
[+20] [20] 27 lines
[+20] [+] private void trackHTD(HbckInfo hir) {
1527
      boolean added = htds.add(htd);
1574
      boolean added = htds.add(htd);
1528
      if (added) {
1575
      if (added) {
1529
        if (htds.size() > 1) {
1576
        if (htds.size() > 1) {
1530
          LOG.warn("Multiple table descriptors found for table '"
1577
          LOG.warn("Multiple table descriptors found for table '"
1531
              + Bytes.toString(hir.getTableName()) + "' regions: " + htds);
1578
              + Bytes.toString(hir.getTableName()) + "' regions: " + htds);

    
   
1579
          setMultiTableDescFound(true);
1532
        } else {
1580
        } else {
1533
          LOG.info("Added a table descriptor found in table '"
1581
          LOG.info("Added a table descriptor found in table '"
1534
              + Bytes.toString(hir.getTableName()) + "' regions: " + htd);
1582
              + Bytes.toString(hir.getTableName()) + "' regions: " + htd);
1535
        }
1583
        }
1536
      }
1584
      }
1537
    }
1585
    }
1538

    
   
1586

   
1539
    /**
1587
    /**
1540
     * @return descriptor common to all regions.  null if are none or multiple!
1588
     * @return descriptor common to all regions.  null if are none or multiple!
1541
     */
1589
     */
1542
    private HTableDescriptor getHTD() {
1590
    private HTableDescriptor getHTD() throws IOException {
1543
      if (htds.size() == 1) {
1591
      if (htds.size() != 1) {
1544
        return (HTableDescriptor)htds.toArray()[0];
1592
        if (htdFromAdmin == null) {
1545
      } else {
1593
          LOG.warn("None/Multiple table descriptors found for table '"
1546
        LOG.error("None/Multiple table descriptors found for table '"

   
1547
          + tableName + "' regions: " + htds);
1594
            + tableName + "' regions: " + htds);

    
   
1595
          htdFromAdmin = admin.getTableDescriptor(Bytes.toBytes(tableName));

    
   
1596
          LOG.warn("Use this one from meta instead" + htdFromAdmin);
1548
      }
1597
        }
1549
      return null;
1598
        return htdFromAdmin;

    
   
1599
      }

    
   
1600
      return (HTableDescriptor)htds.toArray()[0];
1550
    }
1601
    }
1551

    
   
1602

   
1552
    public void addRegionInfo(HbckInfo hir) {
1603
    public void addRegionInfo(HbckInfo hir) {
1553
      trackHTD(hir);
1604
      trackHTD(hir);
1554

    
   
1605

   
[+20] [20] 868 lines
[+20] [+] public int compare(HbckInfo l, HbckInfo r) {
2423
  /**
2474
  /**
2424
   * Prints summary of all tables found on the system.
2475
   * Prints summary of all tables found on the system.
2425
   */
2476
   */
2426
  private void printTableSummary(TreeMap<String, TableInfo> tablesInfo) {
2477
  private void printTableSummary(TreeMap<String, TableInfo> tablesInfo) {
2427
    System.out.println("Summary:");
2478
    System.out.println("Summary:");

    
   
2479
    if (isMultiTableDescFound()) {

    
   
2480
      System.out.println("  Multiple table descriptors were found.\n"

    
   
2481
        + "    You can ignore it if your cluster is working fine.\n"

    
   
2482
        + "    To fix it, please re-run hbck with option -fixTableDesc\n");

    
   
2483
    }

    
   
2484

   
2428
    for (TableInfo tInfo : tablesInfo.values()) {
2485
    for (TableInfo tInfo : tablesInfo.values()) {
2429
      if (errors.tableHasErrors(tInfo)) {
2486
      if (errors.tableHasErrors(tInfo)) {
2430
        System.out.println("Table " + tInfo.getName() + " is inconsistent.");
2487
        System.out.println("Table " + tInfo.getName() + " is inconsistent.");
2431
      } else {
2488
      } else {
2432
        System.out.println("  " + tInfo.getName() + " is okay.");
2489
        System.out.println("  " + tInfo.getName() + " is okay.");
[+20] [20] 385 lines
[+20] [+] public void setSidelineBigOverlaps(boolean sbo) {
2818

    
   
2875

   
2819
  public boolean shouldSidelineBigOverlaps() {
2876
  public boolean shouldSidelineBigOverlaps() {
2820
    return sidelineBigOverlaps;
2877
    return sidelineBigOverlaps;
2821
  }
2878
  }
2822

    
   
2879

   

    
   
2880
  public void setFixTableDesc(boolean ftd) {

    
   
2881
    this.fixTableDesc = ftd;

    
   
2882
  }

    
   
2883

   

    
   
2884
  public boolean shouldFixTableDesc() {

    
   
2885
    return fixTableDesc;

    
   
2886
  }

    
   
2887

   

    
   
2888
  public void setMultiTableDescFound(boolean multiTableDesc) {

    
   
2889
    multiTableDescFound = multiTableDesc;

    
   
2890
  }

    
   
2891

   

    
   
2892
  public boolean isMultiTableDescFound() {

    
   
2893
    return multiTableDescFound;

    
   
2894
  }

    
   
2895

   
2823
  /**
2896
  /**
2824
   * @param mm maximum number of regions to merge into a single region.
2897
   * @param mm maximum number of regions to merge into a single region.
2825
   */
2898
   */
2826
  public void setMaxMerge(int mm) {
2899
  public void setMaxMerge(int mm) {
2827
    this.maxMerge = mm;
2900
    this.maxMerge = mm;
[+20] [20] 63 lines
[+20] [+] protected static void printUsageAndExit() {
2891
    System.err.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
2964
    System.err.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
2892
    System.err.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
2965
    System.err.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
2893
    System.err.println("");
2966
    System.err.println("");
2894
    System.err.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps");
2967
    System.err.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps");
2895
    System.err.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans");
2968
    System.err.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans");

    
   
2969
    System.err.println("   -fixTableDesc     Try to fix table descriptor inconsistency");
2896

    
   
2970

   
2897
    Runtime.getRuntime().exit(-2);
2971
    Runtime.getRuntime().exit(-2);
2898
  }
2972
  }
2899

    
   
2973

   
2900
  /**
2974
  /**
[+20] [20] 55 lines
[+20] [+] public static void main(String[] args) throws Exception {
2956
        fsck.setFixHdfsOverlaps(true);
3030
        fsck.setFixHdfsOverlaps(true);
2957
      } else if (cmd.equals("-fixVersionFile")) {
3031
      } else if (cmd.equals("-fixVersionFile")) {
2958
        fsck.setFixVersionFile(true);
3032
        fsck.setFixVersionFile(true);
2959
      } else if (cmd.equals("-sidelineBigOverlaps")) {
3033
      } else if (cmd.equals("-sidelineBigOverlaps")) {
2960
        fsck.setSidelineBigOverlaps(true);
3034
        fsck.setSidelineBigOverlaps(true);

    
   
3035
      } else if (cmd.equals("-fixTableDesc")) {

    
   
3036
        fsck.setFixTableDesc(true);
2961
      } else if (cmd.equals("-repair")) {
3037
      } else if (cmd.equals("-repair")) {
2962
        // this attempts to merge overlapping hdfs regions, needs testing
3038
        // this attempts to merge overlapping hdfs regions, needs testing
2963
        // under load
3039
        // under load
2964
        fsck.setFixHdfsHoles(true);
3040
        fsck.setFixHdfsHoles(true);
2965
        fsck.setFixHdfsOrphans(true);
3041
        fsck.setFixHdfsOrphans(true);
[+20] [20] 112 lines
src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java
Revision 06d2b73 New Change
 
src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
Revision 103d8bf New Change
 
  1. src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java: Loading...
  2. src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java: Loading...
  3. src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java: Loading...