Review Board 1.7.22


HBASE-5712 Parallelize load of .regioninfo file sin diagnostic/repair portion of hbck

Review Request #4883 - Created April 26, 2012 and submitted

Jonathan Hsieh
0.92, 0.94, 0.96
HBASE-5712
Reviewers
hbase
jxiang, tedyu
hbase-git
* Parallelized load of .regioninfo files
* changed TreeMap to SortedMap in method signatures
* renamed a test's name.
Ran patch 10x on trunk, passes.  Ran 1x on 0.92 and 0.94.

Ther 0.90 version that is nearly identical except for ignoring changes near lines HBaseFsck lines 671-680.
src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
Revision 66156c2 New Change
[20] 25 lines
[+20]
26
import java.util.HashSet;
26
import java.util.HashSet;
27
import java.util.List;
27
import java.util.List;
28
import java.util.Map;
28
import java.util.Map;
29
import java.util.Map.Entry;
29
import java.util.Map.Entry;
30
import java.util.Set;
30
import java.util.Set;

    
   
31
import java.util.SortedMap;
31
import java.util.SortedSet;
32
import java.util.SortedSet;
32
import java.util.TreeMap;
33
import java.util.TreeMap;
33
import java.util.TreeSet;
34
import java.util.TreeSet;
34
import java.util.concurrent.SynchronousQueue;
35
import java.util.concurrent.SynchronousQueue;
35
import java.util.concurrent.ThreadPoolExecutor;
36
import java.util.concurrent.ThreadPoolExecutor;
[+20] [20] 162 lines
[+20] [+] public class HBaseFsck {
198
  /**
199
  /**
199
   * This map from Tablename -> TableInfo contains the structures necessary to
200
   * This map from Tablename -> TableInfo contains the structures necessary to
200
   * detect table consistency problems (holes, dupes, overlaps).  It is sorted
201
   * detect table consistency problems (holes, dupes, overlaps).  It is sorted
201
   * to prevent dupes.
202
   * to prevent dupes.
202
   */
203
   */
203
  private TreeMap<String, TableInfo> tablesInfo = new TreeMap<String, TableInfo>();
204
  private SortedMap<String, TableInfo> tablesInfo = Collections.synchronizedSortedMap(new TreeMap<String, TableInfo>());
204

    
   
205

   
205
  /**
206
  /**
206
   * When initially looking at HDFS, we attempt to find any orphaned data.
207
   * When initially looking at HDFS, we attempt to find any orphaned data.
207
   */
208
   */
208
  private List<HbckInfo> orphanHdfsDirs = new ArrayList<HbckInfo>();
209
  private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
209

    
   
210

   
210
  /**
211
  /**
211
   * Constructor
212
   * Constructor
212
   *
213
   *
213
   * @param conf Configuration object
214
   * @param conf Configuration object
[+20] [20] 197 lines
[+20] [+] private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
411
   */
412
   */
412
  private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
413
  private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
413
    Path p = hi.getHdfsRegionDir();
414
    Path p = hi.getHdfsRegionDir();
414
    FileSystem fs = p.getFileSystem(conf);
415
    FileSystem fs = p.getFileSystem(conf);
415
    FileStatus[] dirs = fs.listStatus(p);
416
    FileStatus[] dirs = fs.listStatus(p);

    
   
417
    if (dirs == null) {

    
   
418
      LOG.warn("Adopting hdfs ophan skipped becuase no files present in " + p + ".  " +

    
   
419
          "This dir could probably be deleted." );

    
   
420
      return ;

    
   
421
    }
416

    
   
422

   
417
    String tableName = Bytes.toString(hi.getTableName());
423
    String tableName = Bytes.toString(hi.getTableName());
418
    TableInfo tableInfo = tablesInfo.get(tableName);
424
    TableInfo tableInfo = tablesInfo.get(tableName);
419
    Preconditions.checkNotNull("Table " + tableName + "' not present!", tableInfo);
425
    Preconditions.checkNotNull("Table " + tableName + "' not present!", tableInfo);
420
    HTableDescriptor template = tableInfo.getHTD();
426
    HTableDescriptor template = tableInfo.getHTD();
[+20] [20] 161 lines
[+20] [+] public ErrorReporter getErrors() {
582
    Path regionDir = hbi.getHdfsRegionDir();
588
    Path regionDir = hbi.getHdfsRegionDir();
583
    if (regionDir == null) {
589
    if (regionDir == null) {
584
      LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
590
      LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
585
      return;
591
      return;
586
    }
592
    }

    
   
593

   

    
   
594
    if (hbi.hdfsEntry.hri != null) {

    
   
595
      // already loaded data

    
   
596
      return;

    
   
597
    }

    
   
598

   
587
    Path regioninfo = new Path(regionDir, HRegion.REGIONINFO_FILE);
599
    Path regioninfo = new Path(regionDir, HRegion.REGIONINFO_FILE);
588
    FileSystem fs = regioninfo.getFileSystem(conf);
600
    FileSystem fs = regioninfo.getFileSystem(conf);
589

    
   
601

   
590
    FSDataInputStream in = fs.open(regioninfo);
602
    FSDataInputStream in = fs.open(regioninfo);
591
    HRegionInfo hri = new HRegionInfo();
603
    HRegionInfo hri = new HRegionInfo();
[+20] [20] 17 lines
[+20] [+] public static class RegionRepairException extends IOException {
609
  }
621
  }
610

    
   
622

   
611
  /**
623
  /**
612
   * Populate hbi's from regionInfos loaded from file system.
624
   * Populate hbi's from regionInfos loaded from file system.
613
   */
625
   */
614
  private TreeMap<String, TableInfo> loadHdfsRegionInfos() throws IOException {
626
  private SortedMap<String, TableInfo> loadHdfsRegionInfos() throws IOException, InterruptedException {
615
    tablesInfo.clear(); // regenerating the data
627
    tablesInfo.clear(); // regenerating the data
616
    // generate region split structure
628
    // generate region split structure
617
    for (HbckInfo hbi : regionInfoMap.values()) {
629
    Collection<HbckInfo> hbckInfos = regionInfoMap.values();

    
   
630

   

    
   
631
    // Parallelized read of .regioninfo files.

    
   
632
    WorkItemHdfsRegionInfo[] hbis = new WorkItemHdfsRegionInfo[hbckInfos.size()];

    
   
633
    int num = 0;

    
   
634
    for (HbckInfo hbi : hbckInfos) {

    
   
635
      hbis[num] = new WorkItemHdfsRegionInfo(hbi, this, errors);

    
   
636
      executor.execute(hbis[num]);

    
   
637
      num++;

    
   
638
    }

    
   
639

   

    
   
640
    for (int i=0; i < num; i++) {

    
   
641
      WorkItemHdfsRegionInfo hbi = hbis[i];

    
   
642
      synchronized(hbi) {

    
   
643
        while (!hbi.isDone()) {

    
   
644
          hbi.wait();

    
   
645
        }

    
   
646
      }

    
   
647
    }

    
   
648

   

    
   
649
    // serialized table info gathering.

    
   
650
    for (HbckInfo hbi: hbckInfos) {
618

    
   
651

   
619
      // only load entries that haven't been loaded yet.

   
620
      if (hbi.getHdfsHRI() == null) {
652
      if (hbi.getHdfsHRI() == null) {
621
        try {
653
        // was an orphan
622
          loadHdfsRegioninfo(hbi);

   
623
        } catch (IOException ioe) {
Moved to 2805

   
624
          String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
Moved to 2806

   
625
            + Bytes.toString(hbi.getTableName()) + " in hdfs dir "
Moved to 2807

   
626
            + hbi.getHdfsRegionDir()
Moved to 2808

   
627
            + "!  It may be an invalid format or version file.  Treating as "
Moved to 2809

   
628
            + "an orphaned regiondir.";
Moved to 2810

   
629
          errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
Moved to 2811

   
630
          debugLsr(hbi.getHdfsRegionDir());

   
631
          orphanHdfsDirs.add(hbi);

   
632
          continue;
654
        continue;
633
        }
655
      }
634
      }

   
635

    
   
656

   

    
   
657

   
636
      // get table name from hdfs, populate various HBaseFsck tables.
658
      // get table name from hdfs, populate various HBaseFsck tables.
637
      String tableName = Bytes.toString(hbi.getTableName());
659
      String tableName = Bytes.toString(hbi.getTableName());
638
      if (tableName == null) {
660
      if (tableName == null) {
639
        // There was an entry in META not in the HDFS?
661
        // There was an entry in META not in the HDFS?
640
        LOG.warn("tableName was null for: " + hbi);
662
        LOG.warn("tableName was null for: " + hbi);
641
        continue;
663
        continue;
642
      }
664
      }
643

    
   
665

   
644
      TableInfo modTInfo = tablesInfo.get(tableName);
666
      TableInfo modTInfo = tablesInfo.get(tableName);
645
      if (modTInfo == null) {
667
      if (modTInfo == null) {
646
        // only executed once per table.
668
        // only executed once per table.
647
        modTInfo = new TableInfo(tableName);
669
        modTInfo = new TableInfo(tableName);
648
        Path hbaseRoot = new Path(conf.get(HConstants.HBASE_DIR));
670
        Path hbaseRoot = new Path(conf.get(HConstants.HBASE_DIR));

    
   
671
        try {
649
        HTableDescriptor htd =
672
          HTableDescriptor htd =
650
          FSTableDescriptors.getTableDescriptor(hbaseRoot.getFileSystem(conf),
673
              FSTableDescriptors.getTableDescriptor(hbaseRoot.getFileSystem(conf),
651
              hbaseRoot, tableName);
674
              hbaseRoot, tableName);
652
        modTInfo.htds.add(htd);
675
          modTInfo.htds.add(htd);

    
   
676
        } catch (IOException ioe) {

    
   
677
          LOG.error("Unable to read .tableinfo from " + hbaseRoot, ioe);

    
   
678
          throw ioe;

    
   
679
        }

    
   
680

   
653
      }
681
      }
654
      modTInfo.addRegionInfo(hbi);
682
      modTInfo.addRegionInfo(hbi);
655
      tablesInfo.put(tableName, modTInfo);
683
      tablesInfo.put(tableName, modTInfo);
656
    }
684
    }
657

    
   
685

   
[+20] [20] 30 lines
[+20] [+] private HRegion createNewRootAndMeta() throws IOException {
688
   * Generate set of puts to add to new meta.  This expects the tables to be 
716
   * Generate set of puts to add to new meta.  This expects the tables to be 
689
   * clean with no overlaps or holes.  If there are any problems it returns null.
717
   * clean with no overlaps or holes.  If there are any problems it returns null.
690
   * 
718
   * 
691
   * @return An array list of puts to do in bulk, null if tables have problems
719
   * @return An array list of puts to do in bulk, null if tables have problems
692
   */
720
   */
693
  private ArrayList<Put> generatePuts(TreeMap<String, TableInfo> tablesInfo) throws IOException {
721
  private ArrayList<Put> generatePuts(SortedMap<String, TableInfo> tablesInfo) throws IOException {
694
    ArrayList<Put> puts = new ArrayList<Put>();
722
    ArrayList<Put> puts = new ArrayList<Put>();
695
    boolean hasProblems = false;
723
    boolean hasProblems = false;
696
    for (Entry<String, TableInfo> e : tablesInfo.entrySet()) {
724
    for (Entry<String, TableInfo> e : tablesInfo.entrySet()) {
697
      String name = e.getKey();
725
      String name = e.getKey();
698

    
   
726

   
[+20] [20] 29 lines
[+20] private HRegion createNewRootAndMeta() throws IOException {
728
  }
756
  }
729

    
   
757

   
730
  /**
758
  /**
731
   * Suggest fixes for each table
759
   * Suggest fixes for each table
732
   */
760
   */
733
  private void suggestFixes(TreeMap<String, TableInfo> tablesInfo) throws IOException {
761
  private void suggestFixes(SortedMap<String, TableInfo> tablesInfo) throws IOException {
734
    for (TableInfo tInfo : tablesInfo.values()) {
762
    for (TableInfo tInfo : tablesInfo.values()) {
735
      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
763
      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
736
      tInfo.checkRegionChain(handler);
764
      tInfo.checkRegionChain(handler);
737
    }
765
    }
738
  }
766
  }
[+20] [20] 60 lines
[+20] [+] public boolean rebuildMeta(boolean fix) throws IOException,
799
    meta.getLog().closeAndDelete();
827
    meta.getLog().closeAndDelete();
800
    LOG.info("Success! .META. table rebuilt.");
828
    LOG.info("Success! .META. table rebuilt.");
801
    return true;
829
    return true;
802
  }
830
  }
803

    
   
831

   
804
  private TreeMap<String, TableInfo> checkHdfsIntegrity(boolean fixHoles,
832
  private SortedMap<String, TableInfo> checkHdfsIntegrity(boolean fixHoles,
805
      boolean fixOverlaps) throws IOException {
833
      boolean fixOverlaps) throws IOException {
806
    LOG.info("Checking HBase region split map from HDFS data...");
834
    LOG.info("Checking HBase region split map from HDFS data...");
807
    for (TableInfo tInfo : tablesInfo.values()) {
835
    for (TableInfo tInfo : tablesInfo.values()) {
808
      TableIntegrityErrorHandler handler;
836
      TableIntegrityErrorHandler handler;
809
      if (fixHoles || fixOverlaps) {
837
      if (fixHoles || fixOverlaps) {
[+20] [20] 615 lines
[+20] [+] private void checkRegionConsistency(final String key, final HbckInfo hbi)
1425
   * Checks tables integrity. Goes over all regions and scans the tables.
1453
   * Checks tables integrity. Goes over all regions and scans the tables.
1426
   * Collects all the pieces for each table and checks if there are missing,
1454
   * Collects all the pieces for each table and checks if there are missing,
1427
   * repeated or overlapping ones.
1455
   * repeated or overlapping ones.
1428
   * @throws IOException
1456
   * @throws IOException
1429
   */
1457
   */
1430
  TreeMap<String, TableInfo> checkIntegrity() throws IOException {
1458
  SortedMap<String, TableInfo> checkIntegrity() throws IOException {
1431
    tablesInfo = new TreeMap<String,TableInfo> ();
1459
    tablesInfo = new TreeMap<String,TableInfo> ();
1432
    List<HbckInfo> noHDFSRegionInfos = new ArrayList<HbckInfo>();
1460
    List<HbckInfo> noHDFSRegionInfos = new ArrayList<HbckInfo>();
1433
    LOG.debug("There are " + regionInfoMap.size() + " region info entries");
1461
    LOG.debug("There are " + regionInfoMap.size() + " region info entries");
1434
    for (HbckInfo hbi : regionInfoMap.values()) {
1462
    for (HbckInfo hbi : regionInfoMap.values()) {
1435
      // Check only valid, working regions
1463
      // Check only valid, working regions
[+20] [20] 1009 lines
[+20] [+] public int compare(HbckInfo l, HbckInfo r) {
2445
  };
2473
  };
2446

    
   
2474

   
2447
  /**
2475
  /**
2448
   * Prints summary of all tables found on the system.
2476
   * Prints summary of all tables found on the system.
2449
   */
2477
   */
2450
  private void printTableSummary(TreeMap<String, TableInfo> tablesInfo) {
2478
  private void printTableSummary(SortedMap<String, TableInfo> tablesInfo) {
2451
    System.out.println("Summary:");
2479
    System.out.println("Summary:");
2452
    for (TableInfo tInfo : tablesInfo.values()) {
2480
    for (TableInfo tInfo : tablesInfo.values()) {
2453
      if (errors.tableHasErrors(tInfo)) {
2481
      if (errors.tableHasErrors(tInfo)) {
2454
        System.out.println("Table " + tInfo.getName() + " is inconsistent.");
2482
        System.out.println("Table " + tInfo.getName() + " is inconsistent.");
2455
      } else {
2483
      } else {
[+20] [20] 289 lines
[+20] [+] public synchronized void run() {
2745
      }
2773
      }
2746
    }
2774
    }
2747
  }
2775
  }
2748

    
   
2776

   
2749
  /**
2777
  /**

    
   
2778
   * Contact hdfs and get all information about specified table directory into

    
   
2779
   * regioninfo list.

    
   
2780
   */

    
   
2781
  static class WorkItemHdfsRegionInfo implements Runnable {

    
   
2782
    private HbckInfo hbi;

    
   
2783
    private HBaseFsck hbck;

    
   
2784
    private ErrorReporter errors;

    
   
2785
    private boolean done;

    
   
2786

   

    
   
2787
    WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {

    
   
2788
      this.hbi = hbi;

    
   
2789
      this.hbck = hbck;

    
   
2790
      this.errors = errors;

    
   
2791
      this.done = false;

    
   
2792
    }

    
   
2793

   

    
   
2794
    synchronized boolean isDone() {

    
   
2795
      return done;

    
   
2796
    }

    
   
2797

   

    
   
2798
    @Override

    
   
2799
    public synchronized void run() {

    
   
2800
      try {

    
   
2801
        // only load entries that haven't been loaded yet.

    
   
2802
        if (hbi.getHdfsHRI() == null) {

    
   
2803
          try {

    
   
2804
            hbck.loadHdfsRegioninfo(hbi);
Moved from 623

    
   
2805
          } catch (IOException ioe) {
Moved from 624

    
   
2806
            String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
Moved from 625

    
   
2807
                + Bytes.toString(hbi.getTableName()) + " in hdfs dir "
Moved from 626

    
   
2808
                + hbi.getHdfsRegionDir()
Moved from 627

    
   
2809
                + "!  It may be an invalid format or version file.  Treating as "
Moved from 628

    
   
2810
                + "an orphaned regiondir.";
Moved from 629

    
   
2811
            errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);

    
   
2812
            try {

    
   
2813
              hbck.debugLsr(hbi.getHdfsRegionDir());

    
   
2814
            } catch (IOException ioe2) {

    
   
2815
              LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);

    
   
2816
              return; // TODO convert this in to a future

    
   
2817
            }

    
   
2818
            hbck.orphanHdfsDirs.add(hbi);

    
   
2819
            return;

    
   
2820
          }

    
   
2821
        }

    
   
2822
      } finally {

    
   
2823
        done = true;

    
   
2824
        notifyAll();

    
   
2825
      }

    
   
2826
    }

    
   
2827
  };

    
   
2828

   

    
   
2829
  /**
2750
   * Display the full report from fsck. This displays all live and dead region
2830
   * Display the full report from fsck. This displays all live and dead region
2751
   * servers, and all known regions.
2831
   * servers, and all known regions.
2752
   */
2832
   */
2753
  public void setDisplayFullReport() {
2833
  public void setDisplayFullReport() {
2754
    details = true;
2834
    details = true;
[+20] [20] 352 lines
src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
Revision 6b64f10 New Change
 
  1. src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java: Loading...
  2. src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java: Loading...