Review Board 1.7.22


HBASE-5801 [hbck] Hbck should handle case where some regions have different HTD settings in .regioninfo files (0.90 specific)

Review Request #4833 - Created April 20, 2012 and submitted

Jimmy Xiang
0.90
HBASE-5801
Reviewers
hbase
jmhsieh
hbase-git
Added option to fix inconsistent table descriptors:
1. sideline the current .regioninfo file
2. create a new one with HTD from HBaseAdmin (meta, first entry)
3. offline the region and wait till it assigned again
TestHBaseFsck* are green. On live cluster, it does the fix as expected. 

Diff revision 2

This is not the most recent revision of the diff. The latest diff is revision 3. See what's changed.

1 2 3
1 2 3

  1. src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java: Loading...
  2. src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java: Loading...
  3. src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java: Loading...
src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
Revision 50f9128 New Change
[20] 162 lines
[+20] [+] public class HBaseFsck {
163
  // limit fixes to listed tables, if empty atttempt to fix all
163
  // limit fixes to listed tables, if empty atttempt to fix all
164
  private List<byte[]> tablesToFix = new ArrayList<byte[]>();
164
  private List<byte[]> tablesToFix = new ArrayList<byte[]>();
165
  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
165
  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
166
  private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; // maximum number of overlapping regions to sideline
166
  private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; // maximum number of overlapping regions to sideline
167
  private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
167
  private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions

    
   
168
  private boolean fixTableDesc = false; // fix table descriptor inconsistency?
168

    
   
169

   
169
  private boolean rerun = false; // if we tried to fix something, rerun hbck
170
  private boolean rerun = false; // if we tried to fix something, rerun hbck
170
  private static boolean summary = false; // if we want to print less output
171
  private static boolean summary = false; // if we want to print less output
171
  private boolean checkMetaOnly = false;
172
  private boolean checkMetaOnly = false;
172

    
   
173

   
173
  /*********
174
  /*********
174
   * State
175
   * State
175
   *********/
176
   *********/
176
  private ErrorReporter errors = new PrintingErrorReporter();
177
  private ErrorReporter errors = new PrintingErrorReporter();

    
   
178
  private boolean multiTableDescFound = false; // to record if multi table descriptors found
177
  int fixes = 0;
179
  int fixes = 0;
178

    
   
180

   
179
  /**
181
  /**
180
   * This map contains the state of all hbck items.  It maps from encoded region
182
   * This map contains the state of all hbck items.  It maps from encoded region
181
   * name to HbckInfo structure.  The information contained in HbckInfo is used
183
   * name to HbckInfo structure.  The information contained in HbckInfo is used
[+20] [20] 79 lines
[+20] [+] private void clearState() {
261
    emptyRegionInfoQualifiers.clear();
263
    emptyRegionInfoQualifiers.clear();
262
    disabledTables.clear();
264
    disabledTables.clear();
263
    errors.clear();
265
    errors.clear();
264
    tablesInfo.clear();
266
    tablesInfo.clear();
265
    orphanHdfsDirs.clear();
267
    orphanHdfsDirs.clear();

    
   
268
    multiTableDescFound = false;
266
  }
269
  }
267

    
   
270

   
268
  /**
271
  /**
269
   * This repair method analyzes hbase data in hdfs and repairs it to satisfy
272
   * This repair method analyzes hbase data in hdfs and repairs it to satisfy
270
   * the table integrity rules.  HBase doesn't need to be online for this
273
   * the table integrity rules.  HBase doesn't need to be online for this
[+20] [20] 802 lines
[+20] [+] private void loadRegionDeployments(Collection<HServerInfo> regionServerList)
1073
  private void checkAndFixConsistency()
1076
  private void checkAndFixConsistency()
1074
  throws IOException, KeeperException, InterruptedException {
1077
  throws IOException, KeeperException, InterruptedException {
1075
    for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1078
    for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1076
      checkRegionConsistency(e.getKey(), e.getValue());
1079
      checkRegionConsistency(e.getKey(), e.getValue());
1077
    }
1080
    }

    
   
1081
    if (shouldFixTableDesc() && isMultiTableDescFound()) {

    
   
1082
      setShouldRerun();  // should re-run to verify it is fixed

    
   
1083
      for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {

    
   
1084
        fixTableDescConsistency(e.getKey(), e.getValue());

    
   
1085
      }

    
   
1086
    }
1078
  }
1087
  }
1079

    
   
1088

   
1080
  /**
1089
  /**
1081
   * Deletes region from meta table
1090
   * Deletes region from meta table
1082
   */
1091
   */
[+20] [20] 123 lines
[+20] [+] private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
1206
      HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
1215
      HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
1207
    }
1216
    }
1208
  }
1217
  }
1209

    
   
1218

   
1210
  /**
1219
  /**

    
   
1220
   * Check a single region for table desc consistency.

    
   
1221
   */

    
   
1222
  private void fixTableDescConsistency(final String key, final HbckInfo hbi)

    
   
1223
        throws IOException, KeeperException, InterruptedException {

    
   
1224
    String tableName = Bytes.toString(hbi.getTableName());

    
   
1225
    TableInfo tableInfo = tablesInfo.get(tableName);

    
   
1226
    Preconditions.checkNotNull("Table " + tableName + "' not present!", tableInfo);

    
   
1227
    if (tableInfo.htds.size() != 1) {

    
   
1228
      HTableDescriptor htd = tableInfo.getHTD();

    
   
1229
      HRegionInfo hri = hbi.getHdfsHRI();

    
   
1230
      if (!htd.equals(hri.getTableDesc())) {

    
   
1231
        // at first, sideline the current .regioninfo

    
   
1232
        Path regionDir = hbi.getHdfsRegionDir();

    
   
1233
        Path regioninfoPath = new Path(regionDir, HRegion.REGIONINFO_FILE);

    
   
1234
        Path sidelineTableDir= new Path(getSidelineDir(), tableName);

    
   
1235
        Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());

    
   
1236
        Path regioninfoSidelinPath = new Path(sidelineRegionDir, HRegion.REGIONINFO_FILE);

    
   
1237
        FileSystem fs = FileSystem.get(admin.getConfiguration());

    
   
1238
        fs.mkdirs(sidelineRegionDir);

    
   
1239
        fs.rename(regioninfoPath, regioninfoSidelinPath);

    
   
1240

   

    
   
1241
        // then fix the table desc: create a new .regioninfo,

    
   
1242
        //   offline the region and wait till it's assigned again.

    
   
1243
        hri.setTableDesc(htd);

    
   
1244
        Path tmpDir = new Path(sidelineRegionDir, ".tmp");

    
   
1245
        Path tmpPath = new Path(tmpDir, HRegion.REGIONINFO_FILE);

    
   
1246
        HBaseFsckRepair.fixTableDesc(admin, hri, regioninfoPath, tmpPath);

    
   
1247
      }

    
   
1248
    }

    
   
1249
  }

    
   
1250

   

    
   
1251
  /**
1211
   * Check a single region for consistency and correct deployment.
1252
   * Check a single region for consistency and correct deployment.
1212
   */
1253
   */
1213
  private void checkRegionConsistency(final String key, final HbckInfo hbi)
1254
  private void checkRegionConsistency(final String key, final HbckInfo hbi)
1214
  throws IOException, KeeperException, InterruptedException {
1255
  throws IOException, KeeperException, InterruptedException {
1215
    String descriptiveName = hbi.toString();
1256
    String descriptiveName = hbi.toString();
[+20] [20] 274 lines
[+20] [+] public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
1490
   * Maintain information about a particular table.
1531
   * Maintain information about a particular table.
1491
   */
1532
   */
1492
  public class TableInfo {
1533
  public class TableInfo {
1493
    String tableName;
1534
    String tableName;
1494
    TreeSet <HServerAddress> deployedOn;
1535
    TreeSet <HServerAddress> deployedOn;

    
   
1536
    HTableDescriptor htdFromAdmin;
1495

    
   
1537

   
1496
    // backwards regions
1538
    // backwards regions
1497
    final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
1539
    final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
1498

    
   
1540

   
1499
    // sidelined big overlapped regions
1541
    // sidelined big overlapped regions
[+20] [20] 27 lines
[+20] [+] private void trackHTD(HbckInfo hir) {
1527
      boolean added = htds.add(htd);
1569
      boolean added = htds.add(htd);
1528
      if (added) {
1570
      if (added) {
1529
        if (htds.size() > 1) {
1571
        if (htds.size() > 1) {
1530
          LOG.warn("Multiple table descriptors found for table '"
1572
          LOG.warn("Multiple table descriptors found for table '"
1531
              + Bytes.toString(hir.getTableName()) + "' regions: " + htds);
1573
              + Bytes.toString(hir.getTableName()) + "' regions: " + htds);

    
   
1574
          setMultiTableDescFound(true);
1532
        } else {
1575
        } else {
1533
          LOG.info("Added a table descriptor found in table '"
1576
          LOG.info("Added a table descriptor found in table '"
1534
              + Bytes.toString(hir.getTableName()) + "' regions: " + htd);
1577
              + Bytes.toString(hir.getTableName()) + "' regions: " + htd);
1535
        }
1578
        }
1536
      }
1579
      }
1537
    }
1580
    }
1538

    
   
1581

   
1539
    /**
1582
    /**
1540
     * @return descriptor common to all regions.  null if are none or multiple!
1583
     * @return descriptor common to all regions.  null if are none or multiple!
1541
     */
1584
     */
1542
    private HTableDescriptor getHTD() {
1585
    private HTableDescriptor getHTD() throws IOException {
1543
      if (htds.size() == 1) {
1586
      if (htds.size() != 1) {
1544
        return (HTableDescriptor)htds.toArray()[0];
1587
        if (htdFromAdmin == null) {
1545
      } else {
1588
          LOG.warn("None/Multiple table descriptors found for table '"
1546
        LOG.error("None/Multiple table descriptors found for table '"

   
1547
          + tableName + "' regions: " + htds);
1589
            + tableName + "' regions: " + htds);

    
   
1590
          htdFromAdmin = admin.getTableDescriptor(Bytes.toBytes(tableName));

    
   
1591
          LOG.info("Use this one from meta instead" + htdFromAdmin);
1548
      }
1592
        }
1549
      return null;
1593
        return htdFromAdmin;

    
   
1594
      }

    
   
1595
      return (HTableDescriptor)htds.toArray()[0];
1550
    }
1596
    }
1551

    
   
1597

   
1552
    public void addRegionInfo(HbckInfo hir) {
1598
    public void addRegionInfo(HbckInfo hir) {
1553
      trackHTD(hir);
1599
      trackHTD(hir);
1554

    
   
1600

   
[+20] [20] 868 lines
[+20] [+] public int compare(HbckInfo l, HbckInfo r) {
2423
  /**
2469
  /**
2424
   * Prints summary of all tables found on the system.
2470
   * Prints summary of all tables found on the system.
2425
   */
2471
   */
2426
  private void printTableSummary(TreeMap<String, TableInfo> tablesInfo) {
2472
  private void printTableSummary(TreeMap<String, TableInfo> tablesInfo) {
2427
    System.out.println("Summary:");
2473
    System.out.println("Summary:");

    
   
2474
    if (isMultiTableDescFound()) {

    
   
2475
      System.out.println("  Mutliple table descriptors were found.\n"

    
   
2476
        + "    You can ignore it if your cluster is working fine.\n"

    
   
2477
        + "    To fix it, please re-run hbck with option -fixTableDesc\n");

    
   
2478
    }

    
   
2479

   
2428
    for (TableInfo tInfo : tablesInfo.values()) {
2480
    for (TableInfo tInfo : tablesInfo.values()) {
2429
      if (errors.tableHasErrors(tInfo)) {
2481
      if (errors.tableHasErrors(tInfo)) {
2430
        System.out.println("Table " + tInfo.getName() + " is inconsistent.");
2482
        System.out.println("Table " + tInfo.getName() + " is inconsistent.");
2431
      } else {
2483
      } else {
2432
        System.out.println("  " + tInfo.getName() + " is okay.");
2484
        System.out.println("  " + tInfo.getName() + " is okay.");
[+20] [20] 385 lines
[+20] [+] public void setSidelineBigOverlaps(boolean sbo) {
2818

    
   
2870

   
2819
  public boolean shouldSidelineBigOverlaps() {
2871
  public boolean shouldSidelineBigOverlaps() {
2820
    return sidelineBigOverlaps;
2872
    return sidelineBigOverlaps;
2821
  }
2873
  }
2822

    
   
2874

   

    
   
2875
  public void setFixTableDesc(boolean ftd) {

    
   
2876
    this.fixTableDesc = ftd;

    
   
2877
  }

    
   
2878

   

    
   
2879
  public boolean shouldFixTableDesc() {

    
   
2880
    return fixTableDesc;

    
   
2881
  }

    
   
2882

   

    
   
2883
  public void setMultiTableDescFound(boolean multiTableDesc) {

    
   
2884
    multiTableDescFound = multiTableDesc;

    
   
2885
  }

    
   
2886

   

    
   
2887
  public boolean isMultiTableDescFound() {

    
   
2888
    return multiTableDescFound;

    
   
2889
  }

    
   
2890

   
2823
  /**
2891
  /**
2824
   * @param mm maximum number of regions to merge into a single region.
2892
   * @param mm maximum number of regions to merge into a single region.
2825
   */
2893
   */
2826
  public void setMaxMerge(int mm) {
2894
  public void setMaxMerge(int mm) {
2827
    this.maxMerge = mm;
2895
    this.maxMerge = mm;
[+20] [20] 63 lines
[+20] [+] protected static void printUsageAndExit() {
2891
    System.err.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
2959
    System.err.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
2892
    System.err.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
2960
    System.err.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
2893
    System.err.println("");
2961
    System.err.println("");
2894
    System.err.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps");
2962
    System.err.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps");
2895
    System.err.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans");
2963
    System.err.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans");

    
   
2964
    System.err.println("   -fixTableDesc     Try to fix table descriptor inconsistency");
2896

    
   
2965

   
2897
    Runtime.getRuntime().exit(-2);
2966
    Runtime.getRuntime().exit(-2);
2898
  }
2967
  }
2899

    
   
2968

   
2900
  /**
2969
  /**
[+20] [20] 55 lines
[+20] [+] public static void main(String[] args) throws Exception {
2956
        fsck.setFixHdfsOverlaps(true);
3025
        fsck.setFixHdfsOverlaps(true);
2957
      } else if (cmd.equals("-fixVersionFile")) {
3026
      } else if (cmd.equals("-fixVersionFile")) {
2958
        fsck.setFixVersionFile(true);
3027
        fsck.setFixVersionFile(true);
2959
      } else if (cmd.equals("-sidelineBigOverlaps")) {
3028
      } else if (cmd.equals("-sidelineBigOverlaps")) {
2960
        fsck.setSidelineBigOverlaps(true);
3029
        fsck.setSidelineBigOverlaps(true);

    
   
3030
      } else if (cmd.equals("-fixTableDesc")) {

    
   
3031
        fsck.setFixTableDesc(true);
2961
      } else if (cmd.equals("-repair")) {
3032
      } else if (cmd.equals("-repair")) {
2962
        // this attempts to merge overlapping hdfs regions, needs testing
3033
        // this attempts to merge overlapping hdfs regions, needs testing
2963
        // under load
3034
        // under load
2964
        fsck.setFixHdfsHoles(true);
3035
        fsck.setFixHdfsHoles(true);
2965
        fsck.setFixHdfsOrphans(true);
3036
        fsck.setFixHdfsOrphans(true);
[+20] [20] 112 lines
src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java
Revision 06d2b73 New Change
 
src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
Revision 103d8bf New Change
 
  1. src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java: Loading...
  2. src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java: Loading...
  3. src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java: Loading...