Review Board 1.7.22


HBASE-5712 Parallelize load of .regioninfo file sin diagnostic/repair portion of hbck

Review Request #4883 - Created April 26, 2012 and submitted

Jonathan Hsieh
0.92, 0.94, 0.96
HBASE-5712
Reviewers
hbase
jxiang, tedyu
hbase-git
* Parallelized load of .regioninfo files
* changed TreeMap to SortedMap in method signatures
* renamed a test's name.
Ran patch 10x on trunk, passes.  Ran 1x on 0.92 and 0.94.

Ther 0.90 version that is nearly identical except for ignoring changes near lines HBaseFsck lines 671-680.
src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
Revision 66156c2 New Change
1
/**
1
/**
2
 * Licensed to the Apache Software Foundation (ASF) under one
2
 * Licensed to the Apache Software Foundation (ASF) under one
3
 * or more contributor license agreements.  See the NOTICE file
3
 * or more contributor license agreements.  See the NOTICE file
4
 * distributed with this work for additional information
4
 * distributed with this work for additional information
5
 * regarding copyright ownership.  The ASF licenses this file
5
 * regarding copyright ownership.  The ASF licenses this file
6
 * to you under the Apache License, Version 2.0 (the
6
 * to you under the Apache License, Version 2.0 (the
7
 * "License"); you may not use this file except in compliance
7
 * "License"); you may not use this file except in compliance
8
 * with the License.  You may obtain a copy of the License at
8
 * with the License.  You may obtain a copy of the License at
9
 *
9
 *
10
 *     http://www.apache.org/licenses/LICENSE-2.0
10
 *     http://www.apache.org/licenses/LICENSE-2.0
11
 *
11
 *
12
 * Unless required by applicable law or agreed to in writing, software
12
 * Unless required by applicable law or agreed to in writing, software
13
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * distributed under the License is distributed on an "AS IS" BASIS,
14
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 * See the License for the specific language governing permissions and
15
 * See the License for the specific language governing permissions and
16
 * limitations under the License.
16
 * limitations under the License.
17
 */
17
 */
18
package org.apache.hadoop.hbase.util;
18
package org.apache.hadoop.hbase.util;
19

    
   
19

   
20
import java.io.IOException;
20
import java.io.IOException;
21
import java.util.ArrayList;
21
import java.util.ArrayList;
22
import java.util.Collection;
22
import java.util.Collection;
23
import java.util.Collections;
23
import java.util.Collections;
24
import java.util.Comparator;
24
import java.util.Comparator;
25
import java.util.HashMap;
25
import java.util.HashMap;
26
import java.util.HashSet;
26
import java.util.HashSet;
27
import java.util.List;
27
import java.util.List;
28
import java.util.Map;
28
import java.util.Map;
29
import java.util.Map.Entry;
29
import java.util.Map.Entry;
30
import java.util.Set;
30
import java.util.Set;

    
   
31
import java.util.SortedMap;
31
import java.util.SortedSet;
32
import java.util.SortedSet;
32
import java.util.TreeMap;
33
import java.util.TreeMap;
33
import java.util.TreeSet;
34
import java.util.TreeSet;
34
import java.util.concurrent.SynchronousQueue;
35
import java.util.concurrent.SynchronousQueue;
35
import java.util.concurrent.ThreadPoolExecutor;
36
import java.util.concurrent.ThreadPoolExecutor;
36
import java.util.concurrent.TimeUnit;
37
import java.util.concurrent.TimeUnit;
37
import java.util.concurrent.atomic.AtomicInteger;
38
import java.util.concurrent.atomic.AtomicInteger;
38

    
   
39

   
39
import org.apache.commons.logging.Log;
40
import org.apache.commons.logging.Log;
40
import org.apache.commons.logging.LogFactory;
41
import org.apache.commons.logging.LogFactory;
41
import org.apache.hadoop.classification.InterfaceAudience;
42
import org.apache.hadoop.classification.InterfaceAudience;
42
import org.apache.hadoop.classification.InterfaceStability;
43
import org.apache.hadoop.classification.InterfaceStability;
43
import org.apache.hadoop.conf.Configuration;
44
import org.apache.hadoop.conf.Configuration;
44
import org.apache.hadoop.fs.FSDataInputStream;
45
import org.apache.hadoop.fs.FSDataInputStream;
45
import org.apache.hadoop.fs.FileStatus;
46
import org.apache.hadoop.fs.FileStatus;
46
import org.apache.hadoop.fs.FileSystem;
47
import org.apache.hadoop.fs.FileSystem;
47
import org.apache.hadoop.fs.Path;
48
import org.apache.hadoop.fs.Path;
48
import org.apache.hadoop.hbase.Abortable;
49
import org.apache.hadoop.hbase.Abortable;
49
import org.apache.hadoop.hbase.ClusterStatus;
50
import org.apache.hadoop.hbase.ClusterStatus;
50
import org.apache.hadoop.hbase.HBaseConfiguration;
51
import org.apache.hadoop.hbase.HBaseConfiguration;
51
import org.apache.hadoop.hbase.HConstants;
52
import org.apache.hadoop.hbase.HConstants;
52
import org.apache.hadoop.hbase.HRegionInfo;
53
import org.apache.hadoop.hbase.HRegionInfo;
53
import org.apache.hadoop.hbase.HRegionLocation;
54
import org.apache.hadoop.hbase.HRegionLocation;
54
import org.apache.hadoop.hbase.HTableDescriptor;
55
import org.apache.hadoop.hbase.HTableDescriptor;
55
import org.apache.hadoop.hbase.KeyValue;
56
import org.apache.hadoop.hbase.KeyValue;
56
import org.apache.hadoop.hbase.MasterNotRunningException;
57
import org.apache.hadoop.hbase.MasterNotRunningException;
57
import org.apache.hadoop.hbase.ServerName;
58
import org.apache.hadoop.hbase.ServerName;
58
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
59
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
59
import org.apache.hadoop.hbase.catalog.MetaReader;
60
import org.apache.hadoop.hbase.catalog.MetaReader;
60
import org.apache.hadoop.hbase.client.Delete;
61
import org.apache.hadoop.hbase.client.Delete;
61
import org.apache.hadoop.hbase.client.Get;
62
import org.apache.hadoop.hbase.client.Get;
62
import org.apache.hadoop.hbase.client.HBaseAdmin;
63
import org.apache.hadoop.hbase.client.HBaseAdmin;
63
import org.apache.hadoop.hbase.client.HConnection;
64
import org.apache.hadoop.hbase.client.HConnection;
64
import org.apache.hadoop.hbase.client.HConnectionManager;
65
import org.apache.hadoop.hbase.client.HConnectionManager;
65
import org.apache.hadoop.hbase.client.HConnectionManager.HConnectable;
66
import org.apache.hadoop.hbase.client.HConnectionManager.HConnectable;
66
import org.apache.hadoop.hbase.client.HTable;
67
import org.apache.hadoop.hbase.client.HTable;
67
import org.apache.hadoop.hbase.client.MetaScanner;
68
import org.apache.hadoop.hbase.client.MetaScanner;
68
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
69
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
69
import org.apache.hadoop.hbase.client.Put;
70
import org.apache.hadoop.hbase.client.Put;
70
import org.apache.hadoop.hbase.client.Result;
71
import org.apache.hadoop.hbase.client.Result;
71
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
72
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
72
import org.apache.hadoop.hbase.io.hfile.HFile;
73
import org.apache.hadoop.hbase.io.hfile.HFile;
73
import org.apache.hadoop.hbase.ipc.HRegionInterface;
74
import org.apache.hadoop.hbase.ipc.HRegionInterface;
74
import org.apache.hadoop.hbase.master.MasterFileSystem;
75
import org.apache.hadoop.hbase.master.MasterFileSystem;
75
import org.apache.hadoop.hbase.regionserver.HRegion;
76
import org.apache.hadoop.hbase.regionserver.HRegion;
76
import org.apache.hadoop.hbase.regionserver.wal.HLog;
77
import org.apache.hadoop.hbase.regionserver.wal.HLog;
77
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
78
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
78
import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
79
import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
79
import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
80
import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
80
import org.apache.hadoop.hbase.zookeeper.RootRegionTracker;
81
import org.apache.hadoop.hbase.zookeeper.RootRegionTracker;
81
import org.apache.hadoop.hbase.zookeeper.ZKTable;
82
import org.apache.hadoop.hbase.zookeeper.ZKTable;
82
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
83
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
83
import org.apache.zookeeper.KeeperException;
84
import org.apache.zookeeper.KeeperException;
84

    
   
85

   
85
import com.google.common.base.Joiner;
86
import com.google.common.base.Joiner;
86
import com.google.common.base.Preconditions;
87
import com.google.common.base.Preconditions;
87
import com.google.common.collect.Lists;
88
import com.google.common.collect.Lists;
88
import com.google.common.collect.Multimap;
89
import com.google.common.collect.Multimap;
89
import com.google.common.collect.TreeMultimap;
90
import com.google.common.collect.TreeMultimap;
90

    
   
91

   
91
/**
92
/**
92
 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
93
 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
93
 * table integrity problems in a corrupted HBase.
94
 * table integrity problems in a corrupted HBase.
94
 * <p>
95
 * <p>
95
 * Region consistency checks verify that .META., region deployment on region
96
 * Region consistency checks verify that .META., region deployment on region
96
 * servers and the state of data in HDFS (.regioninfo files) all are in
97
 * servers and the state of data in HDFS (.regioninfo files) all are in
97
 * accordance.
98
 * accordance.
98
 * <p>
99
 * <p>
99
 * Table integrity checks verify that all possible row keys resolve to exactly
100
 * Table integrity checks verify that all possible row keys resolve to exactly
100
 * one region of a table.  This means there are no individual degenerate
101
 * one region of a table.  This means there are no individual degenerate
101
 * or backwards regions; no holes between regions; and that there are no
102
 * or backwards regions; no holes between regions; and that there are no
102
 * overlapping regions.
103
 * overlapping regions.
103
 * <p>
104
 * <p>
104
 * The general repair strategy works in two phases:
105
 * The general repair strategy works in two phases:
105
 * <ol>
106
 * <ol>
106
 * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
107
 * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
107
 * <li> Repair Region Consistency with .META. and assignments
108
 * <li> Repair Region Consistency with .META. and assignments
108
 * </ol>
109
 * </ol>
109
 * <p>
110
 * <p>
110
 * For table integrity repairs, the tables' region directories are scanned
111
 * For table integrity repairs, the tables' region directories are scanned
111
 * for .regioninfo files.  Each table's integrity is then verified.  If there
112
 * for .regioninfo files.  Each table's integrity is then verified.  If there
112
 * are any orphan regions (regions with no .regioninfo files) or holes, new
113
 * are any orphan regions (regions with no .regioninfo files) or holes, new
113
 * regions are fabricated.  Backwards regions are sidelined as well as empty
114
 * regions are fabricated.  Backwards regions are sidelined as well as empty
114
 * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
115
 * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
115
 * a new region is created and all data is merged into the new region.
116
 * a new region is created and all data is merged into the new region.
116
 * <p>
117
 * <p>
117
 * Table integrity repairs deal solely with HDFS and could potentially be done
118
 * Table integrity repairs deal solely with HDFS and could potentially be done
118
 * offline -- the hbase region servers or master do not need to be running.
119
 * offline -- the hbase region servers or master do not need to be running.
119
 * This phase can eventually be used to completely reconstruct the META table in
120
 * This phase can eventually be used to completely reconstruct the META table in
120
 * an offline fashion.
121
 * an offline fashion.
121
 * <p>
122
 * <p>
122
 * Region consistency requires three conditions -- 1) valid .regioninfo file
123
 * Region consistency requires three conditions -- 1) valid .regioninfo file
123
 * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
124
 * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
124
 * and 3) a region is deployed only at the regionserver that was assigned to
125
 * and 3) a region is deployed only at the regionserver that was assigned to
125
 * with proper state in the master.
126
 * with proper state in the master.
126
 * <p>
127
 * <p>
127
 * Region consistency repairs require hbase to be online so that hbck can
128
 * Region consistency repairs require hbase to be online so that hbck can
128
 * contact the HBase master and region servers.  The hbck#connect() method must
129
 * contact the HBase master and region servers.  The hbck#connect() method must
129
 * first be called successfully.  Much of the region consistency information
130
 * first be called successfully.  Much of the region consistency information
130
 * is transient and less risky to repair.
131
 * is transient and less risky to repair.
131
 * <p>
132
 * <p>
132
 * If hbck is run from the command line, there are a handful of arguments that
133
 * If hbck is run from the command line, there are a handful of arguments that
133
 * can be used to limit the kinds of repairs hbck will do.  See the code in
134
 * can be used to limit the kinds of repairs hbck will do.  See the code in
134
 * {@link #printUsageAndExit()} for more details.
135
 * {@link #printUsageAndExit()} for more details.
135
 */
136
 */
136
@InterfaceAudience.Public
137
@InterfaceAudience.Public
137
@InterfaceStability.Evolving
138
@InterfaceStability.Evolving
138
public class HBaseFsck {
139
public class HBaseFsck {
139
  public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
140
  public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
140
  public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
141
  public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
141
  private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
142
  private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
142
  private static final long THREADS_KEEP_ALIVE_SECONDS = 60;
143
  private static final long THREADS_KEEP_ALIVE_SECONDS = 60;
143
  private static boolean rsSupportsOffline = true;
144
  private static boolean rsSupportsOffline = true;
144
  private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
145
  private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
145
  private static final int DEFAULT_MAX_MERGE = 5;
146
  private static final int DEFAULT_MAX_MERGE = 5;
146

    
   
147

   
147
  /**********************
148
  /**********************
148
   * Internal resources
149
   * Internal resources
149
   **********************/
150
   **********************/
150
  private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
151
  private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
151
  private Configuration conf;
152
  private Configuration conf;
152
  private ClusterStatus status;
153
  private ClusterStatus status;
153
  private HConnection connection;
154
  private HConnection connection;
154
  private HBaseAdmin admin;
155
  private HBaseAdmin admin;
155
  private HTable meta;
156
  private HTable meta;
156
  private ThreadPoolExecutor executor; // threads to retrieve data from regionservers
157
  private ThreadPoolExecutor executor; // threads to retrieve data from regionservers
157
  private long startMillis = System.currentTimeMillis();
158
  private long startMillis = System.currentTimeMillis();
158

    
   
159

   
159
  /***********
160
  /***********
160
   * Options
161
   * Options
161
   ***********/
162
   ***********/
162
  private static boolean details = false; // do we display the full report
163
  private static boolean details = false; // do we display the full report
163
  private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
164
  private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
164
  private boolean fixAssignments = false; // fix assignment errors?
165
  private boolean fixAssignments = false; // fix assignment errors?
165
  private boolean fixMeta = false; // fix meta errors?
166
  private boolean fixMeta = false; // fix meta errors?
166
  private boolean fixHdfsHoles = false; // fix fs holes?
167
  private boolean fixHdfsHoles = false; // fix fs holes?
167
  private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
168
  private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
168
  private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
169
  private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
169
  private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
170
  private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
170

    
   
171

   
171
  // limit fixes to listed tables, if empty atttempt to fix all
172
  // limit fixes to listed tables, if empty atttempt to fix all
172
  private List<byte[]> tablesToFix = new ArrayList<byte[]>();
173
  private List<byte[]> tablesToFix = new ArrayList<byte[]>();
173
  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
174
  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
174
  private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; // maximum number of overlapping regions to sideline
175
  private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; // maximum number of overlapping regions to sideline
175
  private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
176
  private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
176

    
   
177

   
177
  private boolean rerun = false; // if we tried to fix something, rerun hbck
178
  private boolean rerun = false; // if we tried to fix something, rerun hbck
178
  private static boolean summary = false; // if we want to print less output
179
  private static boolean summary = false; // if we want to print less output
179
  private boolean checkMetaOnly = false;
180
  private boolean checkMetaOnly = false;
180

    
   
181

   
181
  /*********
182
  /*********
182
   * State
183
   * State
183
   *********/
184
   *********/
184
  private ErrorReporter errors = new PrintingErrorReporter();
185
  private ErrorReporter errors = new PrintingErrorReporter();
185
  int fixes = 0;
186
  int fixes = 0;
186

    
   
187

   
187
  /**
188
  /**
188
   * This map contains the state of all hbck items.  It maps from encoded region
189
   * This map contains the state of all hbck items.  It maps from encoded region
189
   * name to HbckInfo structure.  The information contained in HbckInfo is used
190
   * name to HbckInfo structure.  The information contained in HbckInfo is used
190
   * to detect and correct consistency (hdfs/meta/deployment) problems.
191
   * to detect and correct consistency (hdfs/meta/deployment) problems.
191
   */
192
   */
192
  private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
193
  private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
193
  private TreeSet<byte[]> disabledTables =
194
  private TreeSet<byte[]> disabledTables =
194
    new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
195
    new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
195
  // Empty regioninfo qualifiers in .META.
196
  // Empty regioninfo qualifiers in .META.
196
  private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
197
  private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
197

    
   
198

   
198
  /**
199
  /**
199
   * This map from Tablename -> TableInfo contains the structures necessary to
200
   * This map from Tablename -> TableInfo contains the structures necessary to
200
   * detect table consistency problems (holes, dupes, overlaps).  It is sorted
201
   * detect table consistency problems (holes, dupes, overlaps).  It is sorted
201
   * to prevent dupes.
202
   * to prevent dupes.
202
   */
203
   */
203
  private TreeMap<String, TableInfo> tablesInfo = new TreeMap<String, TableInfo>();
204
  private SortedMap<String, TableInfo> tablesInfo = Collections.synchronizedSortedMap(new TreeMap<String, TableInfo>());
204

    
   
205

   
205
  /**
206
  /**
206
   * When initially looking at HDFS, we attempt to find any orphaned data.
207
   * When initially looking at HDFS, we attempt to find any orphaned data.
207
   */
208
   */
208
  private List<HbckInfo> orphanHdfsDirs = new ArrayList<HbckInfo>();
209
  private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
209

    
   
210

   
210
  /**
211
  /**
211
   * Constructor
212
   * Constructor
212
   *
213
   *
213
   * @param conf Configuration object
214
   * @param conf Configuration object
214
   * @throws MasterNotRunningException if the master is not running
215
   * @throws MasterNotRunningException if the master is not running
215
   * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
216
   * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
216
   */
217
   */
217
  public HBaseFsck(Configuration conf) throws MasterNotRunningException,
218
  public HBaseFsck(Configuration conf) throws MasterNotRunningException,
218
      ZooKeeperConnectionException, IOException {
219
      ZooKeeperConnectionException, IOException {
219
    this.conf = conf;
220
    this.conf = conf;
220

    
   
221

   
221
    int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
222
    int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
222
    executor = new ThreadPoolExecutor(1, numThreads,
223
    executor = new ThreadPoolExecutor(1, numThreads,
223
        THREADS_KEEP_ALIVE_SECONDS, TimeUnit.SECONDS,
224
        THREADS_KEEP_ALIVE_SECONDS, TimeUnit.SECONDS,
224
        new SynchronousQueue<Runnable>());
225
        new SynchronousQueue<Runnable>());
225
    executor.allowCoreThreadTimeOut(true);
226
    executor.allowCoreThreadTimeOut(true);
226
  }
227
  }
227

    
   
228

   
228
  /**
229
  /**
229
   * To repair region consistency, one must call connect() in order to repair
230
   * To repair region consistency, one must call connect() in order to repair
230
   * online state.
231
   * online state.
231
   */
232
   */
232
  public void connect() throws IOException {
233
  public void connect() throws IOException {
233
    admin = new HBaseAdmin(conf);
234
    admin = new HBaseAdmin(conf);
234
    meta = new HTable(conf, HConstants.META_TABLE_NAME);
235
    meta = new HTable(conf, HConstants.META_TABLE_NAME);
235
    status = admin.getMaster().getClusterStatus();
236
    status = admin.getMaster().getClusterStatus();
236
    connection = admin.getConnection();
237
    connection = admin.getConnection();
237
  }
238
  }
238

    
   
239

   
239
  /**
240
  /**
240
   * Get deployed regions according to the region servers.
241
   * Get deployed regions according to the region servers.
241
   */
242
   */
242
  private void loadDeployedRegions() throws IOException, InterruptedException {
243
  private void loadDeployedRegions() throws IOException, InterruptedException {
243
    // From the master, get a list of all known live region servers
244
    // From the master, get a list of all known live region servers
244
    Collection<ServerName> regionServers = status.getServers();
245
    Collection<ServerName> regionServers = status.getServers();
245
    errors.print("Number of live region servers: " + regionServers.size());
246
    errors.print("Number of live region servers: " + regionServers.size());
246
    if (details) {
247
    if (details) {
247
      for (ServerName rsinfo: regionServers) {
248
      for (ServerName rsinfo: regionServers) {
248
        errors.print("  " + rsinfo.getServerName());
249
        errors.print("  " + rsinfo.getServerName());
249
      }
250
      }
250
    }
251
    }
251

    
   
252

   
252
    // From the master, get a list of all dead region servers
253
    // From the master, get a list of all dead region servers
253
    Collection<ServerName> deadRegionServers = status.getDeadServerNames();
254
    Collection<ServerName> deadRegionServers = status.getDeadServerNames();
254
    errors.print("Number of dead region servers: " + deadRegionServers.size());
255
    errors.print("Number of dead region servers: " + deadRegionServers.size());
255
    if (details) {
256
    if (details) {
256
      for (ServerName name: deadRegionServers) {
257
      for (ServerName name: deadRegionServers) {
257
        errors.print("  " + name);
258
        errors.print("  " + name);
258
      }
259
      }
259
    }
260
    }
260

    
   
261

   
261
    // Print the current master name and state
262
    // Print the current master name and state
262
    errors.print("Master: " + status.getMaster());
263
    errors.print("Master: " + status.getMaster());
263

    
   
264

   
264
    // Print the list of all backup masters
265
    // Print the list of all backup masters
265
    Collection<ServerName> backupMasters = status.getBackupMasters();
266
    Collection<ServerName> backupMasters = status.getBackupMasters();
266
    errors.print("Number of backup masters: " + backupMasters.size());
267
    errors.print("Number of backup masters: " + backupMasters.size());
267
    if (details) {
268
    if (details) {
268
      for (ServerName name: backupMasters) {
269
      for (ServerName name: backupMasters) {
269
        errors.print("  " + name);
270
        errors.print("  " + name);
270
      }
271
      }
271
    }
272
    }
272

    
   
273

   
273
    // Determine what's deployed
274
    // Determine what's deployed
274
    processRegionServers(regionServers);
275
    processRegionServers(regionServers);
275
  }
276
  }
276

    
   
277

   
277
  /**
278
  /**
278
   * Clear the current state of hbck.
279
   * Clear the current state of hbck.
279
   */
280
   */
280
  private void clearState() {
281
  private void clearState() {
281
    // Make sure regionInfo is empty before starting
282
    // Make sure regionInfo is empty before starting
282
    fixes = 0;
283
    fixes = 0;
283
    regionInfoMap.clear();
284
    regionInfoMap.clear();
284
    emptyRegionInfoQualifiers.clear();
285
    emptyRegionInfoQualifiers.clear();
285
    disabledTables.clear();
286
    disabledTables.clear();
286
    errors.clear();
287
    errors.clear();
287
    tablesInfo.clear();
288
    tablesInfo.clear();
288
    orphanHdfsDirs.clear();
289
    orphanHdfsDirs.clear();
289
  }
290
  }
290

    
   
291

   
291
  /**
292
  /**
292
   * This repair method analyzes hbase data in hdfs and repairs it to satisfy
293
   * This repair method analyzes hbase data in hdfs and repairs it to satisfy
293
   * the table integrity rules.  HBase doesn't need to be online for this
294
   * the table integrity rules.  HBase doesn't need to be online for this
294
   * operation to work.
295
   * operation to work.
295
   */
296
   */
296
  public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
297
  public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
297
    // Initial pass to fix orphans.
298
    // Initial pass to fix orphans.
298
    if (shouldFixHdfsOrphans() || shouldFixHdfsHoles() || shouldFixHdfsOverlaps()) {
299
    if (shouldFixHdfsOrphans() || shouldFixHdfsHoles() || shouldFixHdfsOverlaps()) {
299
      LOG.info("Loading regioninfos HDFS");
300
      LOG.info("Loading regioninfos HDFS");
300
      // if nothing is happening this should always complete in two iterations.
301
      // if nothing is happening this should always complete in two iterations.
301
      int maxIterations = conf.getInt("hbase.hbck.integrityrepair.iterations.max", 3);
302
      int maxIterations = conf.getInt("hbase.hbck.integrityrepair.iterations.max", 3);
302
      int curIter = 0;
303
      int curIter = 0;
303
      do {
304
      do {
304
        clearState(); // clears hbck state and reset fixes to 0 and.
305
        clearState(); // clears hbck state and reset fixes to 0 and.
305
        // repair what's on HDFS
306
        // repair what's on HDFS
306
        restoreHdfsIntegrity();
307
        restoreHdfsIntegrity();
307
        curIter++;// limit the number of iterations.
308
        curIter++;// limit the number of iterations.
308
      } while (fixes > 0 && curIter <= maxIterations);
309
      } while (fixes > 0 && curIter <= maxIterations);
309

    
   
310

   
310
      // Repairs should be done in the first iteration and verification in the second.
311
      // Repairs should be done in the first iteration and verification in the second.
311
      // If there are more than 2 passes, something funny has happened.
312
      // If there are more than 2 passes, something funny has happened.
312
      if (curIter > 2) {
313
      if (curIter > 2) {
313
        if (curIter == maxIterations) {
314
        if (curIter == maxIterations) {
314
          LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
315
          LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
315
              + "Tables integrity may not be fully repaired!");
316
              + "Tables integrity may not be fully repaired!");
316
        } else {
317
        } else {
317
          LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
318
          LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
318
        }
319
        }
319
      }
320
      }
320
    }
321
    }
321
  }
322
  }
322

    
   
323

   
323
  /**
324
  /**
324
   * This repair method requires the cluster to be online since it contacts
325
   * This repair method requires the cluster to be online since it contacts
325
   * region servers and the masters.  It makes each region's state in HDFS, in
326
   * region servers and the masters.  It makes each region's state in HDFS, in
326
   * .META., and deployments consistent.
327
   * .META., and deployments consistent.
327
   *
328
   *
328
   * @return If > 0 , number of errors detected, if < 0 there was an unrecoverable
329
   * @return If > 0 , number of errors detected, if < 0 there was an unrecoverable
329
   * error.  If 0, we have a clean hbase.
330
   * error.  If 0, we have a clean hbase.
330
   */
331
   */
331
  public int onlineConsistencyRepair() throws IOException, KeeperException,
332
  public int onlineConsistencyRepair() throws IOException, KeeperException,
332
    InterruptedException {
333
    InterruptedException {
333
    clearState();
334
    clearState();
334

    
   
335

   
335
    LOG.info("Loading regionsinfo from the .META. table");
336
    LOG.info("Loading regionsinfo from the .META. table");
336
    boolean success = loadMetaEntries();
337
    boolean success = loadMetaEntries();
337
    if (!success) return -1;
338
    if (!success) return -1;
338

    
   
339

   
339
    // Check if .META. is found only once and in the right place
340
    // Check if .META. is found only once and in the right place
340
    if (!checkMetaRegion()) {
341
    if (!checkMetaRegion()) {
341
      // Will remove later if we can fix it
342
      // Will remove later if we can fix it
342
      errors.reportError("Encountered fatal error. Exiting...");
343
      errors.reportError("Encountered fatal error. Exiting...");
343
      return -2;
344
      return -2;
344
    }
345
    }
345

    
   
346

   
346
    // get a list of all tables that have not changed recently.
347
    // get a list of all tables that have not changed recently.
347
    if (!checkMetaOnly) {
348
    if (!checkMetaOnly) {
348
      reportTablesInFlux();
349
      reportTablesInFlux();
349
    }
350
    }
350

    
   
351

   
351
    // get regions according to what is online on each RegionServer
352
    // get regions according to what is online on each RegionServer
352
    loadDeployedRegions();
353
    loadDeployedRegions();
353

    
   
354

   
354
    // load regiondirs and regioninfos from HDFS
355
    // load regiondirs and regioninfos from HDFS
355
    loadHdfsRegionDirs();
356
    loadHdfsRegionDirs();
356
    loadHdfsRegionInfos();
357
    loadHdfsRegionInfos();
357

    
   
358

   
358
    // Empty cells in .META.?
359
    // Empty cells in .META.?
359
    reportEmptyMetaCells();
360
    reportEmptyMetaCells();
360

    
   
361

   
361
    // Get disabled tables from ZooKeeper
362
    // Get disabled tables from ZooKeeper
362
    loadDisabledTables();
363
    loadDisabledTables();
363

    
   
364

   
364
    // Check and fix consistency
365
    // Check and fix consistency
365
    checkAndFixConsistency();
366
    checkAndFixConsistency();
366

    
   
367

   
367
    // Check integrity (does not fix)
368
    // Check integrity (does not fix)
368
    checkIntegrity();
369
    checkIntegrity();
369
    return errors.getErrorList().size();
370
    return errors.getErrorList().size();
370
  }
371
  }
371

    
   
372

   
372
  /**
373
  /**
373
   * Contacts the master and prints out cluster-wide information
374
   * Contacts the master and prints out cluster-wide information
374
   * @return 0 on success, non-zero on failure
375
   * @return 0 on success, non-zero on failure
375
   */
376
   */
376
  public int onlineHbck() throws IOException, KeeperException, InterruptedException {
377
  public int onlineHbck() throws IOException, KeeperException, InterruptedException {
377
    // print hbase server version
378
    // print hbase server version
378
    errors.print("Version: " + status.getHBaseVersion());
379
    errors.print("Version: " + status.getHBaseVersion());
379
    offlineHdfsIntegrityRepair();
380
    offlineHdfsIntegrityRepair();
380

    
   
381

   
381
    // turn the balancer off
382
    // turn the balancer off
382
    boolean oldBalancer = admin.balanceSwitch(false);
383
    boolean oldBalancer = admin.balanceSwitch(false);
383

    
   
384

   
384
    onlineConsistencyRepair();
385
    onlineConsistencyRepair();
385

    
   
386

   
386
    admin.balanceSwitch(oldBalancer);
387
    admin.balanceSwitch(oldBalancer);
387

    
   
388

   
388
    // Print table summary
389
    // Print table summary
389
    printTableSummary(tablesInfo);
390
    printTableSummary(tablesInfo);
390
    return errors.summarize();
391
    return errors.summarize();
391
  }
392
  }
392

    
   
393

   
393
  /**
394
  /**
394
   * Iterates through the list of all orphan/invalid regiondirs.
395
   * Iterates through the list of all orphan/invalid regiondirs.
395
   */
396
   */
396
  private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
397
  private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
397
    for (HbckInfo hi : orphanHdfsDirs) {
398
    for (HbckInfo hi : orphanHdfsDirs) {
398
      LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
399
      LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
399
      adoptHdfsOrphan(hi);
400
      adoptHdfsOrphan(hi);
400
    }
401
    }
401
  }
402
  }
402

    
   
403

   
403
  /**
404
  /**
404
   * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
405
   * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
405
   * these orphans by creating a new region, and moving the column families,
406
   * these orphans by creating a new region, and moving the column families,
406
   * recovered edits, HLogs, into the new region dir.  We determine the region
407
   * recovered edits, HLogs, into the new region dir.  We determine the region
407
   * startkey and endkeys by looking at all of the hfiles inside the column
408
   * startkey and endkeys by looking at all of the hfiles inside the column
408
   * families to identify the min and max keys. The resulting region will
409
   * families to identify the min and max keys. The resulting region will
409
   * likely violate table integrity but will be dealt with by merging
410
   * likely violate table integrity but will be dealt with by merging
410
   * overlapping regions.
411
   * overlapping regions.
411
   */
412
   */
412
  private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
413
  private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
413
    Path p = hi.getHdfsRegionDir();
414
    Path p = hi.getHdfsRegionDir();
414
    FileSystem fs = p.getFileSystem(conf);
415
    FileSystem fs = p.getFileSystem(conf);
415
    FileStatus[] dirs = fs.listStatus(p);
416
    FileStatus[] dirs = fs.listStatus(p);

    
   
417
    if (dirs == null) {

    
   
418
      LOG.warn("Adopting hdfs ophan skipped becuase no files present in " + p + ".  " +

    
   
419
          "This dir could probably be deleted." );

    
   
420
      return ;

    
   
421
    }
416

    
   
422

   
417
    String tableName = Bytes.toString(hi.getTableName());
423
    String tableName = Bytes.toString(hi.getTableName());
418
    TableInfo tableInfo = tablesInfo.get(tableName);
424
    TableInfo tableInfo = tablesInfo.get(tableName);
419
    Preconditions.checkNotNull("Table " + tableName + "' not present!", tableInfo);
425
    Preconditions.checkNotNull("Table " + tableName + "' not present!", tableInfo);
420
    HTableDescriptor template = tableInfo.getHTD();
426
    HTableDescriptor template = tableInfo.getHTD();
421

    
   
427

   
422
    // find min and max key values
428
    // find min and max key values
423
    Pair<byte[],byte[]> orphanRegionRange = null;
429
    Pair<byte[],byte[]> orphanRegionRange = null;
424
    for (FileStatus cf : dirs) {
430
    for (FileStatus cf : dirs) {
425
      String cfName= cf.getPath().getName();
431
      String cfName= cf.getPath().getName();
426
      // TODO Figure out what the special dirs are
432
      // TODO Figure out what the special dirs are
427
      if (cfName.startsWith(".") || cfName.equals("splitlog")) continue;
433
      if (cfName.startsWith(".") || cfName.equals("splitlog")) continue;
428

    
   
434

   
429
      FileStatus[] hfiles = fs.listStatus(cf.getPath());
435
      FileStatus[] hfiles = fs.listStatus(cf.getPath());
430
      for (FileStatus hfile : hfiles) {
436
      for (FileStatus hfile : hfiles) {
431
        byte[] start, end;
437
        byte[] start, end;
432
        HFile.Reader hf = null;
438
        HFile.Reader hf = null;
433
        try {
439
        try {
434
          CacheConfig cacheConf = new CacheConfig(conf);
440
          CacheConfig cacheConf = new CacheConfig(conf);
435
          hf = HFile.createReader(fs, hfile.getPath(), cacheConf);
441
          hf = HFile.createReader(fs, hfile.getPath(), cacheConf);
436
          hf.loadFileInfo();
442
          hf.loadFileInfo();
437
          KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
443
          KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
438
          start = startKv.getRow();
444
          start = startKv.getRow();
439
          KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
445
          KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
440
          end = endKv.getRow();
446
          end = endKv.getRow();
441
        } catch (IOException ioe) {
447
        } catch (IOException ioe) {
442
          LOG.warn("Problem reading orphan file " + hfile + ", skipping");
448
          LOG.warn("Problem reading orphan file " + hfile + ", skipping");
443
          continue;
449
          continue;
444
        } catch (NullPointerException ioe) {
450
        } catch (NullPointerException ioe) {
445
          LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
451
          LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
446
          continue;
452
          continue;
447
        } finally {
453
        } finally {
448
          if (hf != null) {
454
          if (hf != null) {
449
            hf.close();
455
            hf.close();
450
          }
456
          }
451
        }
457
        }
452

    
   
458

   
453
        // expand the range to include the range of all hfiles
459
        // expand the range to include the range of all hfiles
454
        if (orphanRegionRange == null) {
460
        if (orphanRegionRange == null) {
455
          // first range
461
          // first range
456
          orphanRegionRange = new Pair<byte[], byte[]>(start, end);
462
          orphanRegionRange = new Pair<byte[], byte[]>(start, end);
457
        } else {
463
        } else {
458
          // TODO add test
464
          // TODO add test
459

    
   
465

   
460
          // expand range only if the hfile is wider.
466
          // expand range only if the hfile is wider.
461
          if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
467
          if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
462
            orphanRegionRange.setFirst(start);
468
            orphanRegionRange.setFirst(start);
463
          }
469
          }
464
          if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
470
          if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
465
            orphanRegionRange.setSecond(end);
471
            orphanRegionRange.setSecond(end);
466
          }
472
          }
467
        }
473
        }
468
      }
474
      }
469
    }
475
    }
470
    if (orphanRegionRange == null) {
476
    if (orphanRegionRange == null) {
471
      LOG.warn("No data in dir " + p + ", sidelining data");
477
      LOG.warn("No data in dir " + p + ", sidelining data");
472
      fixes++;
478
      fixes++;
473
      sidelineRegionDir(fs, hi);
479
      sidelineRegionDir(fs, hi);
474
      return;
480
      return;
475
    }
481
    }
476
    LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
482
    LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
477
        Bytes.toString(orphanRegionRange.getSecond()) + ")");
483
        Bytes.toString(orphanRegionRange.getSecond()) + ")");
478

    
   
484

   
479
    // create new region on hdfs.  move data into place.
485
    // create new region on hdfs.  move data into place.
480
    HRegionInfo hri = new HRegionInfo(template.getName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
486
    HRegionInfo hri = new HRegionInfo(template.getName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
481
    LOG.info("Creating new region : " + hri);
487
    LOG.info("Creating new region : " + hri);
482
    HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, hri, template);
488
    HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, hri, template);
483
    Path target = region.getRegionDir();
489
    Path target = region.getRegionDir();
484

    
   
490

   
485
    // rename all the data to new region
491
    // rename all the data to new region
486
    mergeRegionDirs(target, hi);
492
    mergeRegionDirs(target, hi);
487
    fixes++;
493
    fixes++;
488
  }
494
  }
489

    
   
495

   
490
  /**
496
  /**
491
   * This method determines if there are table integrity errors in HDFS.  If
497
   * This method determines if there are table integrity errors in HDFS.  If
492
   * there are errors and the appropriate "fix" options are enabled, the method
498
   * there are errors and the appropriate "fix" options are enabled, the method
493
   * will first correct orphan regions making them into legit regiondirs, and
499
   * will first correct orphan regions making them into legit regiondirs, and
494
   * then reload to merge potentially overlapping regions.
500
   * then reload to merge potentially overlapping regions.
495
   *
501
   *
496
   * @return number of table integrity errors found
502
   * @return number of table integrity errors found
497
   */
503
   */
498
  private int restoreHdfsIntegrity() throws IOException, InterruptedException {
504
  private int restoreHdfsIntegrity() throws IOException, InterruptedException {
499
    // Determine what's on HDFS
505
    // Determine what's on HDFS
500
    LOG.info("Loading HBase regioninfo from HDFS...");
506
    LOG.info("Loading HBase regioninfo from HDFS...");
501
    loadHdfsRegionDirs(); // populating regioninfo table.
507
    loadHdfsRegionDirs(); // populating regioninfo table.
502

    
   
508

   
503
    int errs = errors.getErrorList().size();
509
    int errs = errors.getErrorList().size();
504
    // First time just get suggestions.
510
    // First time just get suggestions.
505
    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
511
    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
506
    checkHdfsIntegrity(false, false);
512
    checkHdfsIntegrity(false, false);
507

    
   
513

   
508
    if (errors.getErrorList().size() == errs) {
514
    if (errors.getErrorList().size() == errs) {
509
      LOG.info("No integrity errors.  We are done with this phase. Glorious.");
515
      LOG.info("No integrity errors.  We are done with this phase. Glorious.");
510
      return 0;
516
      return 0;
511
    }
517
    }
512

    
   
518

   
513
    if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
519
    if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
514
      adoptHdfsOrphans(orphanHdfsDirs);
520
      adoptHdfsOrphans(orphanHdfsDirs);
515
      // TODO optimize by incrementally adding instead of reloading.
521
      // TODO optimize by incrementally adding instead of reloading.
516
    }
522
    }
517

    
   
523

   
518
    // Make sure there are no holes now.
524
    // Make sure there are no holes now.
519
    if (shouldFixHdfsHoles()) {
525
    if (shouldFixHdfsHoles()) {
520
      clearState(); // this also resets # fixes.
526
      clearState(); // this also resets # fixes.
521
      loadHdfsRegionDirs();
527
      loadHdfsRegionDirs();
522
      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
528
      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
523
      tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
529
      tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
524
    }
530
    }
525

    
   
531

   
526
    // Now we fix overlaps
532
    // Now we fix overlaps
527
    if (shouldFixHdfsOverlaps()) {
533
    if (shouldFixHdfsOverlaps()) {
528
      // second pass we fix overlaps.
534
      // second pass we fix overlaps.
529
      clearState(); // this also resets # fixes.
535
      clearState(); // this also resets # fixes.
530
      loadHdfsRegionDirs();
536
      loadHdfsRegionDirs();
531
      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
537
      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
532
      tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
538
      tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
533
    }
539
    }
534

    
   
540

   
535
    return errors.getErrorList().size();
541
    return errors.getErrorList().size();
536
  }
542
  }
537

    
   
543

   
538
  /**
544
  /**
539
   * TODO -- need to add tests for this.
545
   * TODO -- need to add tests for this.
540
   */
546
   */
541
  private void reportEmptyMetaCells() {
547
  private void reportEmptyMetaCells() {
542
    errors.print("Number of empty REGIONINFO_QUALIFIER rows in .META.: " +
548
    errors.print("Number of empty REGIONINFO_QUALIFIER rows in .META.: " +
543
      emptyRegionInfoQualifiers.size());
549
      emptyRegionInfoQualifiers.size());
544
    if (details) {
550
    if (details) {
545
      for (Result r: emptyRegionInfoQualifiers) {
551
      for (Result r: emptyRegionInfoQualifiers) {
546
        errors.print("  " + r);
552
        errors.print("  " + r);
547
      }
553
      }
548
    }
554
    }
549
  }
555
  }
550

    
   
556

   
551
  /**
557
  /**
552
   * TODO -- need to add tests for this.
558
   * TODO -- need to add tests for this.
553
   */
559
   */
554
  private void reportTablesInFlux() {
560
  private void reportTablesInFlux() {
555
    AtomicInteger numSkipped = new AtomicInteger(0);
561
    AtomicInteger numSkipped = new AtomicInteger(0);
556
    HTableDescriptor[] allTables = getTables(numSkipped);
562
    HTableDescriptor[] allTables = getTables(numSkipped);
557
    errors.print("Number of Tables: " + allTables.length);
563
    errors.print("Number of Tables: " + allTables.length);
558
    if (details) {
564
    if (details) {
559
      if (numSkipped.get() > 0) {
565
      if (numSkipped.get() > 0) {
560
        errors.detail("Number of Tables in flux: " + numSkipped.get());
566
        errors.detail("Number of Tables in flux: " + numSkipped.get());
561
      }
567
      }
562
      for (HTableDescriptor td : allTables) {
568
      for (HTableDescriptor td : allTables) {
563
        String tableName = td.getNameAsString();
569
        String tableName = td.getNameAsString();
564
        errors.detail("  Table: " + tableName + "\t" +
570
        errors.detail("  Table: " + tableName + "\t" +
565
                           (td.isReadOnly() ? "ro" : "rw") + "\t" +
571
                           (td.isReadOnly() ? "ro" : "rw") + "\t" +
566
                           (td.isRootRegion() ? "ROOT" :
572
                           (td.isRootRegion() ? "ROOT" :
567
                            (td.isMetaRegion() ? "META" : "    ")) + "\t" +
573
                            (td.isMetaRegion() ? "META" : "    ")) + "\t" +
568
                           " families: " + td.getFamilies().size());
574
                           " families: " + td.getFamilies().size());
569
      }
575
      }
570
    }
576
    }
571
  }
577
  }
572

    
   
578

   
573
  public ErrorReporter getErrors() {
579
  public ErrorReporter getErrors() {
574
    return errors;
580
    return errors;
575
  }
581
  }
576

    
   
582

   
577
  /**
583
  /**
578
   * Read the .regioninfo file from the file system.  If there is no
584
   * Read the .regioninfo file from the file system.  If there is no
579
   * .regioninfo, add it to the orphan hdfs region list.
585
   * .regioninfo, add it to the orphan hdfs region list.
580
   */
586
   */
581
  private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
587
  private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
582
    Path regionDir = hbi.getHdfsRegionDir();
588
    Path regionDir = hbi.getHdfsRegionDir();
583
    if (regionDir == null) {
589
    if (regionDir == null) {
584
      LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
590
      LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
585
      return;
591
      return;
586
    }
592
    }

    
   
593

   

    
   
594
    if (hbi.hdfsEntry.hri != null) {

    
   
595
      // already loaded data

    
   
596
      return;

    
   
597
    }

    
   
598

   
587
    Path regioninfo = new Path(regionDir, HRegion.REGIONINFO_FILE);
599
    Path regioninfo = new Path(regionDir, HRegion.REGIONINFO_FILE);
588
    FileSystem fs = regioninfo.getFileSystem(conf);
600
    FileSystem fs = regioninfo.getFileSystem(conf);
589

    
   
601

   
590
    FSDataInputStream in = fs.open(regioninfo);
602
    FSDataInputStream in = fs.open(regioninfo);
591
    HRegionInfo hri = new HRegionInfo();
603
    HRegionInfo hri = new HRegionInfo();
592
    hri.readFields(in);
604
    hri.readFields(in);
593
    in.close();
605
    in.close();
594
    LOG.debug("HRegionInfo read: " + hri.toString());
606
    LOG.debug("HRegionInfo read: " + hri.toString());
595
    hbi.hdfsEntry.hri = hri;
607
    hbi.hdfsEntry.hri = hri;
596
  }
608
  }
597

    
   
609

   
598
  /**
610
  /**
599
   * Exception thrown when a integrity repair operation fails in an
611
   * Exception thrown when a integrity repair operation fails in an
600
   * unresolvable way.
612
   * unresolvable way.
601
   */
613
   */
602
  public static class RegionRepairException extends IOException {
614
  public static class RegionRepairException extends IOException {
603
    private static final long serialVersionUID = 1L;
615
    private static final long serialVersionUID = 1L;
604
    final IOException ioe;
616
    final IOException ioe;
605
    public RegionRepairException(String s, IOException ioe) {
617
    public RegionRepairException(String s, IOException ioe) {
606
      super(s);
618
      super(s);
607
      this.ioe = ioe;
619
      this.ioe = ioe;
608
    }
620
    }
609
  }
621
  }
610

    
   
622

   
611
  /**
623
  /**
612
   * Populate hbi's from regionInfos loaded from file system.
624
   * Populate hbi's from regionInfos loaded from file system.
613
   */
625
   */
614
  private TreeMap<String, TableInfo> loadHdfsRegionInfos() throws IOException {
626
  private SortedMap<String, TableInfo> loadHdfsRegionInfos() throws IOException, InterruptedException {
615
    tablesInfo.clear(); // regenerating the data
627
    tablesInfo.clear(); // regenerating the data
616
    // generate region split structure
628
    // generate region split structure
617
    for (HbckInfo hbi : regionInfoMap.values()) {
629
    Collection<HbckInfo> hbckInfos = regionInfoMap.values();

    
   
630

   

    
   
631
    // Parallelized read of .regioninfo files.

    
   
632
    WorkItemHdfsRegionInfo[] hbis = new WorkItemHdfsRegionInfo[hbckInfos.size()];

    
   
633
    int num = 0;

    
   
634
    for (HbckInfo hbi : hbckInfos) {

    
   
635
      hbis[num] = new WorkItemHdfsRegionInfo(hbi, this, errors);

    
   
636
      executor.execute(hbis[num]);

    
   
637
      num++;

    
   
638
    }

    
   
639

   

    
   
640
    for (int i=0; i < num; i++) {

    
   
641
      WorkItemHdfsRegionInfo hbi = hbis[i];

    
   
642
      synchronized(hbi) {

    
   
643
        while (!hbi.isDone()) {

    
   
644
          hbi.wait();

    
   
645
        }

    
   
646
      }

    
   
647
    }

    
   
648

   

    
   
649
    // serialized table info gathering.

    
   
650
    for (HbckInfo hbi: hbckInfos) {
618

    
   
651

   
619
      // only load entries that haven't been loaded yet.

   
620
      if (hbi.getHdfsHRI() == null) {
652
      if (hbi.getHdfsHRI() == null) {
621
        try {
653
        // was an orphan
622
          loadHdfsRegioninfo(hbi);

   
623
        } catch (IOException ioe) {
Moved to 2805

   
624
          String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
Moved to 2806

   
625
            + Bytes.toString(hbi.getTableName()) + " in hdfs dir "
Moved to 2807

   
626
            + hbi.getHdfsRegionDir()
Moved to 2808

   
627
            + "!  It may be an invalid format or version file.  Treating as "
Moved to 2809

   
628
            + "an orphaned regiondir.";
Moved to 2810

   
629
          errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
Moved to 2811

   
630
          debugLsr(hbi.getHdfsRegionDir());

   
631
          orphanHdfsDirs.add(hbi);

   
632
          continue;
654
        continue;
633
        }
655
      }
634
      }

   
635

    
   
656

   

    
   
657

   
636
      // get table name from hdfs, populate various HBaseFsck tables.
658
      // get table name from hdfs, populate various HBaseFsck tables.
637
      String tableName = Bytes.toString(hbi.getTableName());
659
      String tableName = Bytes.toString(hbi.getTableName());
638
      if (tableName == null) {
660
      if (tableName == null) {
639
        // There was an entry in META not in the HDFS?
661
        // There was an entry in META not in the HDFS?
640
        LOG.warn("tableName was null for: " + hbi);
662
        LOG.warn("tableName was null for: " + hbi);
641
        continue;
663
        continue;
642
      }
664
      }
643

    
   
665

   
644
      TableInfo modTInfo = tablesInfo.get(tableName);
666
      TableInfo modTInfo = tablesInfo.get(tableName);
645
      if (modTInfo == null) {
667
      if (modTInfo == null) {
646
        // only executed once per table.
668
        // only executed once per table.
647
        modTInfo = new TableInfo(tableName);
669
        modTInfo = new TableInfo(tableName);
648
        Path hbaseRoot = new Path(conf.get(HConstants.HBASE_DIR));
670
        Path hbaseRoot = new Path(conf.get(HConstants.HBASE_DIR));

    
   
671
        try {
649
        HTableDescriptor htd =
672
          HTableDescriptor htd =
650
          FSTableDescriptors.getTableDescriptor(hbaseRoot.getFileSystem(conf),
673
              FSTableDescriptors.getTableDescriptor(hbaseRoot.getFileSystem(conf),
651
              hbaseRoot, tableName);
674
              hbaseRoot, tableName);
652
        modTInfo.htds.add(htd);
675
          modTInfo.htds.add(htd);

    
   
676
        } catch (IOException ioe) {

    
   
677
          LOG.error("Unable to read .tableinfo from " + hbaseRoot, ioe);

    
   
678
          throw ioe;

    
   
679
        }

    
   
680

   
653
      }
681
      }
654
      modTInfo.addRegionInfo(hbi);
682
      modTInfo.addRegionInfo(hbi);
655
      tablesInfo.put(tableName, modTInfo);
683
      tablesInfo.put(tableName, modTInfo);
656
    }
684
    }
657

    
   
685

   
658
    return tablesInfo;
686
    return tablesInfo;
659
  }
687
  }
660

    
   
688

   
661
  /**
689
  /**
662
   * This borrows code from MasterFileSystem.bootstrap()
690
   * This borrows code from MasterFileSystem.bootstrap()
663
   * 
691
   * 
664
   * @return an open .META. HRegion
692
   * @return an open .META. HRegion
665
   */
693
   */
666
  private HRegion createNewRootAndMeta() throws IOException {
694
  private HRegion createNewRootAndMeta() throws IOException {
667
    Path rootdir = new Path(conf.get(HConstants.HBASE_DIR));
695
    Path rootdir = new Path(conf.get(HConstants.HBASE_DIR));
668
    Configuration c = conf;
696
    Configuration c = conf;
669
    HRegionInfo rootHRI = new HRegionInfo(HRegionInfo.ROOT_REGIONINFO);
697
    HRegionInfo rootHRI = new HRegionInfo(HRegionInfo.ROOT_REGIONINFO);
670
    MasterFileSystem.setInfoFamilyCachingForRoot(false);
698
    MasterFileSystem.setInfoFamilyCachingForRoot(false);
671
    HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
699
    HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
672
    MasterFileSystem.setInfoFamilyCachingForMeta(false);
700
    MasterFileSystem.setInfoFamilyCachingForMeta(false);
673
    HRegion root = HRegion.createHRegion(rootHRI, rootdir, c,
701
    HRegion root = HRegion.createHRegion(rootHRI, rootdir, c,
674
        HTableDescriptor.ROOT_TABLEDESC);
702
        HTableDescriptor.ROOT_TABLEDESC);
675
    HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c,
703
    HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c,
676
        HTableDescriptor.META_TABLEDESC);
704
        HTableDescriptor.META_TABLEDESC);
677
    MasterFileSystem.setInfoFamilyCachingForRoot(true);
705
    MasterFileSystem.setInfoFamilyCachingForRoot(true);
678
    MasterFileSystem.setInfoFamilyCachingForMeta(true);
706
    MasterFileSystem.setInfoFamilyCachingForMeta(true);
679

    
   
707

   
680
    // Add first region from the META table to the ROOT region.
708
    // Add first region from the META table to the ROOT region.
681
    HRegion.addRegionToMETA(root, meta);
709
    HRegion.addRegionToMETA(root, meta);
682
    root.close();
710
    root.close();
683
    root.getLog().closeAndDelete();
711
    root.getLog().closeAndDelete();
684
    return meta;
712
    return meta;
685
  }
713
  }
686

    
   
714

   
687
  /**
715
  /**
688
   * Generate set of puts to add to new meta.  This expects the tables to be 
716
   * Generate set of puts to add to new meta.  This expects the tables to be 
689
   * clean with no overlaps or holes.  If there are any problems it returns null.
717
   * clean with no overlaps or holes.  If there are any problems it returns null.
690
   * 
718
   * 
691
   * @return An array list of puts to do in bulk, null if tables have problems
719
   * @return An array list of puts to do in bulk, null if tables have problems
692
   */
720
   */
693
  private ArrayList<Put> generatePuts(TreeMap<String, TableInfo> tablesInfo) throws IOException {
721
  private ArrayList<Put> generatePuts(SortedMap<String, TableInfo> tablesInfo) throws IOException {
694
    ArrayList<Put> puts = new ArrayList<Put>();
722
    ArrayList<Put> puts = new ArrayList<Put>();
695
    boolean hasProblems = false;
723
    boolean hasProblems = false;
696
    for (Entry<String, TableInfo> e : tablesInfo.entrySet()) {
724
    for (Entry<String, TableInfo> e : tablesInfo.entrySet()) {
697
      String name = e.getKey();
725
      String name = e.getKey();
698

    
   
726

   
699
      // skip "-ROOT-" and ".META."
727
      // skip "-ROOT-" and ".META."
700
      if (Bytes.compareTo(Bytes.toBytes(name), HConstants.ROOT_TABLE_NAME) == 0
728
      if (Bytes.compareTo(Bytes.toBytes(name), HConstants.ROOT_TABLE_NAME) == 0
701
          || Bytes.compareTo(Bytes.toBytes(name), HConstants.META_TABLE_NAME) == 0) {
729
          || Bytes.compareTo(Bytes.toBytes(name), HConstants.META_TABLE_NAME) == 0) {
702
        continue;
730
        continue;
703
      }
731
      }
704

    
   
732

   
705
      TableInfo ti = e.getValue();
733
      TableInfo ti = e.getValue();
706
      for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
734
      for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
707
          .entrySet()) {
735
          .entrySet()) {
708
        Collection<HbckInfo> his = spl.getValue();
736
        Collection<HbckInfo> his = spl.getValue();
709
        int sz = his.size();
737
        int sz = his.size();
710
        if (sz != 1) {
738
        if (sz != 1) {
711
          // problem
739
          // problem
712
          LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
740
          LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
713
              + " had " +  sz + " regions instead of exactly 1." );
741
              + " had " +  sz + " regions instead of exactly 1." );
714
          hasProblems = true;
742
          hasProblems = true;
715
          continue;
743
          continue;
716
        }
744
        }
717

    
   
745

   
718
        // add the row directly to meta.
746
        // add the row directly to meta.
719
        HbckInfo hi = his.iterator().next();
747
        HbckInfo hi = his.iterator().next();
720
        HRegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
748
        HRegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
721
        Put p = new Put(hri.getRegionName());
749
        Put p = new Put(hri.getRegionName());
722
        p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
750
        p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
723
            Writables.getBytes(hri));
751
            Writables.getBytes(hri));
724
        puts.add(p);
752
        puts.add(p);
725
      }
753
      }
726
    }
754
    }
727
    return hasProblems ? null : puts;
755
    return hasProblems ? null : puts;
728
  }
756
  }
729

    
   
757

   
730
  /**
758
  /**
731
   * Suggest fixes for each table
759
   * Suggest fixes for each table
732
   */
760
   */
733
  private void suggestFixes(TreeMap<String, TableInfo> tablesInfo) throws IOException {
761
  private void suggestFixes(SortedMap<String, TableInfo> tablesInfo) throws IOException {
734
    for (TableInfo tInfo : tablesInfo.values()) {
762
    for (TableInfo tInfo : tablesInfo.values()) {
735
      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
763
      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
736
      tInfo.checkRegionChain(handler);
764
      tInfo.checkRegionChain(handler);
737
    }
765
    }
738
  }
766
  }
739

    
   
767

   
740
  /**
768
  /**
741
   * Rebuilds meta from information in hdfs/fs.  Depends on configuration
769
   * Rebuilds meta from information in hdfs/fs.  Depends on configuration
742
   * settings passed into hbck constructor to point to a particular fs/dir.
770
   * settings passed into hbck constructor to point to a particular fs/dir.
743
   * 
771
   * 
744
   * @param fix flag that determines if method should attempt to fix holes
772
   * @param fix flag that determines if method should attempt to fix holes
745
   * @return true if successful, false if attempt failed.
773
   * @return true if successful, false if attempt failed.
746
   */
774
   */
747
  public boolean rebuildMeta(boolean fix) throws IOException,
775
  public boolean rebuildMeta(boolean fix) throws IOException,
748
      InterruptedException {
776
      InterruptedException {
749

    
   
777

   
750
    // TODO check to make sure hbase is offline. (or at least the table
778
    // TODO check to make sure hbase is offline. (or at least the table
751
    // currently being worked on is off line)
779
    // currently being worked on is off line)
752

    
   
780

   
753
    // Determine what's on HDFS
781
    // Determine what's on HDFS
754
    LOG.info("Loading HBase regioninfo from HDFS...");
782
    LOG.info("Loading HBase regioninfo from HDFS...");
755
    loadHdfsRegionDirs(); // populating regioninfo table.
783
    loadHdfsRegionDirs(); // populating regioninfo table.
756

    
   
784

   
757
    int errs = errors.getErrorList().size();
785
    int errs = errors.getErrorList().size();
758
    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
786
    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
759
    checkHdfsIntegrity(false, false);
787
    checkHdfsIntegrity(false, false);
760

    
   
788

   
761
    // make sure ok.
789
    // make sure ok.
762
    if (errors.getErrorList().size() != errs) {
790
    if (errors.getErrorList().size() != errs) {
763
      // While in error state, iterate until no more fixes possible
791
      // While in error state, iterate until no more fixes possible
764
      while(true) {
792
      while(true) {
765
        fixes = 0;
793
        fixes = 0;
766
        suggestFixes(tablesInfo);
794
        suggestFixes(tablesInfo);
767
        errors.clear();
795
        errors.clear();
768
        loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
796
        loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
769
        checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
797
        checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
770

    
   
798

   
771
        int errCount = errors.getErrorList().size();
799
        int errCount = errors.getErrorList().size();
772

    
   
800

   
773
        if (fixes == 0) {
801
        if (fixes == 0) {
774
          if (errCount > 0) {
802
          if (errCount > 0) {
775
            return false; // failed to fix problems.
803
            return false; // failed to fix problems.
776
          } else {
804
          } else {
777
            break; // no fixes and no problems? drop out and fix stuff!
805
            break; // no fixes and no problems? drop out and fix stuff!
778
          }
806
          }
779
        }
807
        }
780
      }
808
      }
781
    }
809
    }
782

    
   
810

   
783
    // we can rebuild, move old root and meta out of the way and start
811
    // we can rebuild, move old root and meta out of the way and start
784
    LOG.info("HDFS regioninfo's seems good.  Sidelining old .META.");
812
    LOG.info("HDFS regioninfo's seems good.  Sidelining old .META.");
785
    sidelineOldRootAndMeta();
813
    sidelineOldRootAndMeta();
786

    
   
814

   
787
    LOG.info("Creating new .META.");
815
    LOG.info("Creating new .META.");
788
    HRegion meta = createNewRootAndMeta();
816
    HRegion meta = createNewRootAndMeta();
789

    
   
817

   
790
    // populate meta
818
    // populate meta
791
    List<Put> puts = generatePuts(tablesInfo);
819
    List<Put> puts = generatePuts(tablesInfo);
792
    if (puts == null) {
820
    if (puts == null) {
793
      LOG.fatal("Problem encountered when creating new .META. entries.  " +
821
      LOG.fatal("Problem encountered when creating new .META. entries.  " +
794
        "You may need to restore the previously sidelined -ROOT- and .META.");
822
        "You may need to restore the previously sidelined -ROOT- and .META.");
795
      return false;
823
      return false;
796
    }
824
    }
797
    meta.put(puts.toArray(new Put[0]));
825
    meta.put(puts.toArray(new Put[0]));
798
    meta.close();
826
    meta.close();
799
    meta.getLog().closeAndDelete();
827
    meta.getLog().closeAndDelete();
800
    LOG.info("Success! .META. table rebuilt.");
828
    LOG.info("Success! .META. table rebuilt.");
801
    return true;
829
    return true;
802
  }
830
  }
803

    
   
831

   
804
  private TreeMap<String, TableInfo> checkHdfsIntegrity(boolean fixHoles,
832
  private SortedMap<String, TableInfo> checkHdfsIntegrity(boolean fixHoles,
805
      boolean fixOverlaps) throws IOException {
833
      boolean fixOverlaps) throws IOException {
806
    LOG.info("Checking HBase region split map from HDFS data...");
834
    LOG.info("Checking HBase region split map from HDFS data...");
807
    for (TableInfo tInfo : tablesInfo.values()) {
835
    for (TableInfo tInfo : tablesInfo.values()) {
808
      TableIntegrityErrorHandler handler;
836
      TableIntegrityErrorHandler handler;
809
      if (fixHoles || fixOverlaps) {
837
      if (fixHoles || fixOverlaps) {
810
        if (shouldFixTable(Bytes.toBytes(tInfo.getName()))) {
838
        if (shouldFixTable(Bytes.toBytes(tInfo.getName()))) {
811
          handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, conf,
839
          handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, conf,
812
              fixHoles, fixOverlaps);
840
              fixHoles, fixOverlaps);
813
        } else {
841
        } else {
814
          LOG.info("Table " + tInfo.getName() + " is not in the include table " +
842
          LOG.info("Table " + tInfo.getName() + " is not in the include table " +
815
            "list.  Just suggesting fixes.");
843
            "list.  Just suggesting fixes.");
816
          handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
844
          handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
817
        }
845
        }
818
      } else {
846
      } else {
819
        handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
847
        handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
820
      }
848
      }
821
      if (!tInfo.checkRegionChain(handler)) {
849
      if (!tInfo.checkRegionChain(handler)) {
822
        // should dump info as well.
850
        // should dump info as well.
823
        errors.report("Found inconsistency in table " + tInfo.getName());
851
        errors.report("Found inconsistency in table " + tInfo.getName());
824
      }
852
      }
825
    }
853
    }
826
    return tablesInfo;
854
    return tablesInfo;
827
  }
855
  }
828

    
   
856

   
829
  private Path getSidelineDir() throws IOException {
857
  private Path getSidelineDir() throws IOException {
830
    Path hbaseDir = FSUtils.getRootDir(conf);
858
    Path hbaseDir = FSUtils.getRootDir(conf);
831
    Path hbckDir = new Path(hbaseDir.getParent(), "hbck");
859
    Path hbckDir = new Path(hbaseDir.getParent(), "hbck");
832
    Path backupDir = new Path(hbckDir, hbaseDir.getName() + "-"
860
    Path backupDir = new Path(hbckDir, hbaseDir.getName() + "-"
833
        + startMillis);
861
        + startMillis);
834
    return backupDir;
862
    return backupDir;
835
  }
863
  }
836

    
   
864

   
837
  /**
865
  /**
838
   * Sideline a region dir (instead of deleting it)
866
   * Sideline a region dir (instead of deleting it)
839
   */
867
   */
840
  Path sidelineRegionDir(FileSystem fs, HbckInfo hi)
868
  Path sidelineRegionDir(FileSystem fs, HbckInfo hi)
841
    throws IOException {
869
    throws IOException {
842
    String tableName = Bytes.toString(hi.getTableName());
870
    String tableName = Bytes.toString(hi.getTableName());
843
    Path regionDir = hi.getHdfsRegionDir();
871
    Path regionDir = hi.getHdfsRegionDir();
844

    
   
872

   
845
    if (!fs.exists(regionDir)) {
873
    if (!fs.exists(regionDir)) {
846
      LOG.warn("No previous " + regionDir + " exists.  Continuing.");
874
      LOG.warn("No previous " + regionDir + " exists.  Continuing.");
847
      return null;
875
      return null;
848
    }
876
    }
849

    
   
877

   
850
    Path sidelineTableDir= new Path(getSidelineDir(), tableName);
878
    Path sidelineTableDir= new Path(getSidelineDir(), tableName);
851
    Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
879
    Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
852
    fs.mkdirs(sidelineRegionDir);
880
    fs.mkdirs(sidelineRegionDir);
853
    boolean success = false;
881
    boolean success = false;
854
    FileStatus[] cfs =  fs.listStatus(regionDir);
882
    FileStatus[] cfs =  fs.listStatus(regionDir);
855
    if (cfs == null) {
883
    if (cfs == null) {
856
      LOG.info("Region dir is empty: " + regionDir);
884
      LOG.info("Region dir is empty: " + regionDir);
857
    } else {
885
    } else {
858
      for (FileStatus cf : cfs) {
886
      for (FileStatus cf : cfs) {
859
        Path src = cf.getPath();
887
        Path src = cf.getPath();
860
        Path dst =  new Path(sidelineRegionDir, src.getName());
888
        Path dst =  new Path(sidelineRegionDir, src.getName());
861
        if (fs.isFile(src)) {
889
        if (fs.isFile(src)) {
862
          // simple file
890
          // simple file
863
          success = fs.rename(src, dst);
891
          success = fs.rename(src, dst);
864
          if (!success) {
892
          if (!success) {
865
            String msg = "Unable to rename file " + src +  " to " + dst;
893
            String msg = "Unable to rename file " + src +  " to " + dst;
866
            LOG.error(msg);
894
            LOG.error(msg);
867
            throw new IOException(msg);
895
            throw new IOException(msg);
868
          }
896
          }
869
          continue;
897
          continue;
870
        }
898
        }
871

    
   
899

   
872
        // is a directory.
900
        // is a directory.
873
        fs.mkdirs(dst);
901
        fs.mkdirs(dst);
874

    
   
902

   
875
        LOG.info("Sidelining files from " + src + " into containing region " + dst);
903
        LOG.info("Sidelining files from " + src + " into containing region " + dst);
876
        // FileSystem.rename is inconsistent with directories -- if the
904
        // FileSystem.rename is inconsistent with directories -- if the
877
        // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
905
        // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
878
        // it moves the src into the dst dir resulting in (foo/a/b).  If
906
        // it moves the src into the dst dir resulting in (foo/a/b).  If
879
        // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
907
        // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
880
        FileStatus[] hfiles = fs.listStatus(src);
908
        FileStatus[] hfiles = fs.listStatus(src);
881
        if (hfiles != null && hfiles.length > 0) {
909
        if (hfiles != null && hfiles.length > 0) {
882
          for (FileStatus hfile : hfiles) {
910
          for (FileStatus hfile : hfiles) {
883
            success = fs.rename(hfile.getPath(), dst);
911
            success = fs.rename(hfile.getPath(), dst);
884
            if (!success) {
912
            if (!success) {
885
              String msg = "Unable to rename file " + src +  " to " + dst;
913
              String msg = "Unable to rename file " + src +  " to " + dst;
886
              LOG.error(msg);
914
              LOG.error(msg);
887
              throw new IOException(msg);
915
              throw new IOException(msg);
888
            }
916
            }
889
          }
917
          }
890
        }
918
        }
891
        LOG.debug("Sideline directory contents:");
919
        LOG.debug("Sideline directory contents:");
892
        debugLsr(sidelineRegionDir);
920
        debugLsr(sidelineRegionDir);
893
      }
921
      }
894
    }
922
    }
895

    
   
923

   
896
    LOG.info("Removing old region dir: " + regionDir);
924
    LOG.info("Removing old region dir: " + regionDir);
897
    success = fs.delete(regionDir, true);
925
    success = fs.delete(regionDir, true);
898
    if (!success) {
926
    if (!success) {
899
      String msg = "Unable to delete dir " + regionDir;
927
      String msg = "Unable to delete dir " + regionDir;
900
      LOG.error(msg);
928
      LOG.error(msg);
901
      throw new IOException(msg);
929
      throw new IOException(msg);
902
    }
930
    }
903
    return sidelineRegionDir;
931
    return sidelineRegionDir;
904
  }
932
  }
905

    
   
933

   
906
  /**
934
  /**
907
   * Side line an entire table.
935
   * Side line an entire table.
908
   */
936
   */
909
  void sidelineTable(FileSystem fs, byte[] table, Path hbaseDir,
937
  void sidelineTable(FileSystem fs, byte[] table, Path hbaseDir,
910
      Path backupHbaseDir) throws IOException {
938
      Path backupHbaseDir) throws IOException {
911
    String tableName = Bytes.toString(table);
939
    String tableName = Bytes.toString(table);
912
    Path tableDir = new Path(hbaseDir, tableName);
940
    Path tableDir = new Path(hbaseDir, tableName);
913
    if (fs.exists(tableDir)) {
941
    if (fs.exists(tableDir)) {
914
      Path backupTableDir= new Path(backupHbaseDir, tableName);
942
      Path backupTableDir= new Path(backupHbaseDir, tableName);
915
      boolean success = fs.rename(tableDir, backupTableDir);
943
      boolean success = fs.rename(tableDir, backupTableDir);
916
      if (!success) {
944
      if (!success) {
917
        throw new IOException("Failed to move  " + tableName + " from " 
945
        throw new IOException("Failed to move  " + tableName + " from " 
918
            +  tableDir.getName() + " to " + backupTableDir.getName());
946
            +  tableDir.getName() + " to " + backupTableDir.getName());
919
      }
947
      }
920
    } else {
948
    } else {
921
      LOG.info("No previous " + tableName +  " exists.  Continuing.");
949
      LOG.info("No previous " + tableName +  " exists.  Continuing.");
922
    }
950
    }
923
  }
951
  }
924

    
   
952

   
925
  /**
953
  /**
926
   * @return Path to backup of original directory
954
   * @return Path to backup of original directory
927
   */
955
   */
928
  Path sidelineOldRootAndMeta() throws IOException {
956
  Path sidelineOldRootAndMeta() throws IOException {
929
    // put current -ROOT- and .META. aside.
957
    // put current -ROOT- and .META. aside.
930
    Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR));
958
    Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR));
931
    FileSystem fs = hbaseDir.getFileSystem(conf);
959
    FileSystem fs = hbaseDir.getFileSystem(conf);
932
    Path backupDir = new Path(hbaseDir.getParent(), hbaseDir.getName() + "-"
960
    Path backupDir = new Path(hbaseDir.getParent(), hbaseDir.getName() + "-"
933
        + startMillis);
961
        + startMillis);
934
    fs.mkdirs(backupDir);
962
    fs.mkdirs(backupDir);
935

    
   
963

   
936
    sidelineTable(fs, HConstants.ROOT_TABLE_NAME, hbaseDir, backupDir);
964
    sidelineTable(fs, HConstants.ROOT_TABLE_NAME, hbaseDir, backupDir);
937
    try {
965
    try {
938
      sidelineTable(fs, HConstants.META_TABLE_NAME, hbaseDir, backupDir);
966
      sidelineTable(fs, HConstants.META_TABLE_NAME, hbaseDir, backupDir);
939
    } catch (IOException e) {
967
    } catch (IOException e) {
940
      LOG.error("Attempt to sideline meta failed, attempt to revert...", e);
968
      LOG.error("Attempt to sideline meta failed, attempt to revert...", e);
941
      try {
969
      try {
942
        // move it back.
970
        // move it back.
943
        sidelineTable(fs, HConstants.ROOT_TABLE_NAME, backupDir, hbaseDir);
971
        sidelineTable(fs, HConstants.ROOT_TABLE_NAME, backupDir, hbaseDir);
944
        LOG.warn("... revert succeed.  -ROOT- and .META. still in "
972
        LOG.warn("... revert succeed.  -ROOT- and .META. still in "
945
            + "original state.");
973
            + "original state.");
946
      } catch (IOException ioe) {
974
      } catch (IOException ioe) {
947
        LOG.fatal("... failed to sideline root and meta and failed to restore "
975
        LOG.fatal("... failed to sideline root and meta and failed to restore "
948
            + "prevoius state.  Currently in inconsistent state.  To restore "
976
            + "prevoius state.  Currently in inconsistent state.  To restore "
949
            + "try to rename -ROOT- in " + backupDir.getName() + " to " 
977
            + "try to rename -ROOT- in " + backupDir.getName() + " to " 
950
            + hbaseDir.getName() + ".", ioe);
978
            + hbaseDir.getName() + ".", ioe);
951
      }
979
      }
952
      throw e; // throw original exception
980
      throw e; // throw original exception
953
    }
981
    }
954
    return backupDir;
982
    return backupDir;
955
  }
983
  }
956

    
   
984

   
957
  /**
985
  /**
958
   * Load the list of disabled tables in ZK into local set.
986
   * Load the list of disabled tables in ZK into local set.
959
   * @throws ZooKeeperConnectionException
987
   * @throws ZooKeeperConnectionException
960
   * @throws IOException
988
   * @throws IOException
961
   */
989
   */
962
  private void loadDisabledTables()
990
  private void loadDisabledTables()
963
  throws ZooKeeperConnectionException, IOException {
991
  throws ZooKeeperConnectionException, IOException {
964
    HConnectionManager.execute(new HConnectable<Void>(conf) {
992
    HConnectionManager.execute(new HConnectable<Void>(conf) {
965
      @Override
993
      @Override
966
      public Void connect(HConnection connection) throws IOException {
994
      public Void connect(HConnection connection) throws IOException {
967
        ZooKeeperWatcher zkw = createZooKeeperWatcher();
995
        ZooKeeperWatcher zkw = createZooKeeperWatcher();
968
        try {
996
        try {
969
          for (String tableName : ZKTable.getDisabledOrDisablingTables(zkw)) {
997
          for (String tableName : ZKTable.getDisabledOrDisablingTables(zkw)) {
970
            disabledTables.add(Bytes.toBytes(tableName));
998
            disabledTables.add(Bytes.toBytes(tableName));
971
          }
999
          }
972
        } catch (KeeperException ke) {
1000
        } catch (KeeperException ke) {
973
          throw new IOException(ke);
1001
          throw new IOException(ke);
974
        } finally {
1002
        } finally {
975
          zkw.close();
1003
          zkw.close();
976
        }
1004
        }
977
        return null;
1005
        return null;
978
      }
1006
      }
979
    });
1007
    });
980
  }
1008
  }
981

    
   
1009

   
982
  /**
1010
  /**
983
   * Check if the specified region's table is disabled.
1011
   * Check if the specified region's table is disabled.
984
   */
1012
   */
985
  private boolean isTableDisabled(HRegionInfo regionInfo) {
1013
  private boolean isTableDisabled(HRegionInfo regionInfo) {
986
    return disabledTables.contains(regionInfo.getTableName());
1014
    return disabledTables.contains(regionInfo.getTableName());
987
  }
1015
  }
988

    
   
1016

   
989
  /**
1017
  /**
990
   * Scan HDFS for all regions, recording their information into
1018
   * Scan HDFS for all regions, recording their information into
991
   * regionInfoMap
1019
   * regionInfoMap
992
   */
1020
   */
993
  public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1021
  public void loadHdfsRegionDirs() throws IOException, InterruptedException {
994
    Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
1022
    Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
995
    FileSystem fs = rootDir.getFileSystem(conf);
1023
    FileSystem fs = rootDir.getFileSystem(conf);
996

    
   
1024

   
997
    // list all tables from HDFS
1025
    // list all tables from HDFS
998
    List<FileStatus> tableDirs = Lists.newArrayList();
1026
    List<FileStatus> tableDirs = Lists.newArrayList();
999

    
   
1027

   
1000
    boolean foundVersionFile = false;
1028
    boolean foundVersionFile = false;
1001
    FileStatus[] files = fs.listStatus(rootDir);
1029
    FileStatus[] files = fs.listStatus(rootDir);
1002
    for (FileStatus file : files) {
1030
    for (FileStatus file : files) {
1003
      String dirName = file.getPath().getName();
1031
      String dirName = file.getPath().getName();
1004
      if (dirName.equals(HConstants.VERSION_FILE_NAME)) {
1032
      if (dirName.equals(HConstants.VERSION_FILE_NAME)) {
1005
        foundVersionFile = true;
1033
        foundVersionFile = true;
1006
      } else {
1034
      } else {
1007
        if (!checkMetaOnly ||
1035
        if (!checkMetaOnly ||
1008
            dirName.equals("-ROOT-") ||
1036
            dirName.equals("-ROOT-") ||
1009
            dirName.equals(".META.")) {
1037
            dirName.equals(".META.")) {
1010
          tableDirs.add(file);
1038
          tableDirs.add(file);
1011
        }
1039
        }
1012
      }
1040
      }
1013
    }
1041
    }
1014

    
   
1042

   
1015
    // verify that version file exists
1043
    // verify that version file exists
1016
    if (!foundVersionFile) {
1044
    if (!foundVersionFile) {
1017
      errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1045
      errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1018
          "Version file does not exist in root dir " + rootDir);
1046
          "Version file does not exist in root dir " + rootDir);
1019
      if (shouldFixVersionFile()) {
1047
      if (shouldFixVersionFile()) {
1020
        LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1048
        LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1021
            + " file.");
1049
            + " file.");
1022
        setShouldRerun();
1050
        setShouldRerun();
1023
        FSUtils.setVersion(fs, rootDir, conf.getInt(
1051
        FSUtils.setVersion(fs, rootDir, conf.getInt(
1024
            HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), conf.getInt(
1052
            HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), conf.getInt(
1025
            HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1053
            HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1026
            HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1054
            HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1027
      }
1055
      }
1028
    }
1056
    }
1029

    
   
1057

   
1030
    // level 1:  <HBASE_DIR>/*
1058
    // level 1:  <HBASE_DIR>/*
1031
    WorkItemHdfsDir[] dirs = new WorkItemHdfsDir[tableDirs.size()];
1059
    WorkItemHdfsDir[] dirs = new WorkItemHdfsDir[tableDirs.size()];
1032
    int num = 0;
1060
    int num = 0;
1033
    for (FileStatus tableDir : tableDirs) {
1061
    for (FileStatus tableDir : tableDirs) {
1034
      LOG.debug("Loading region dirs from " +tableDir.getPath());
1062
      LOG.debug("Loading region dirs from " +tableDir.getPath());
1035
      dirs[num] = new WorkItemHdfsDir(this, fs, errors, tableDir);
1063
      dirs[num] = new WorkItemHdfsDir(this, fs, errors, tableDir);
1036
      executor.execute(dirs[num]);
1064
      executor.execute(dirs[num]);
1037
      num++;
1065
      num++;
1038
    }
1066
    }
1039

    
   
1067

   
1040
    // wait for all directories to be done
1068
    // wait for all directories to be done
1041
    for (int i = 0; i < num; i++) {
1069
    for (int i = 0; i < num; i++) {
1042
      WorkItemHdfsDir dir = dirs[i];
1070
      WorkItemHdfsDir dir = dirs[i];
1043
      synchronized (dir) {
1071
      synchronized (dir) {
1044
        while (!dir.isDone()) {
1072
        while (!dir.isDone()) {
1045
          dir.wait();
1073
          dir.wait();
1046
        }
1074
        }
1047
      }
1075
      }
1048
    }
1076
    }
1049
  }
1077
  }
1050

    
   
1078

   
1051
  /**
1079
  /**
1052
   * Record the location of the ROOT region as found in ZooKeeper,
1080
   * Record the location of the ROOT region as found in ZooKeeper,
1053
   * as if it were in a META table. This is so that we can check
1081
   * as if it were in a META table. This is so that we can check
1054
   * deployment of ROOT.
1082
   * deployment of ROOT.
1055
   */
1083
   */
1056
  private boolean recordRootRegion() throws IOException {
1084
  private boolean recordRootRegion() throws IOException {
1057
    HRegionLocation rootLocation = connection.locateRegion(
1085
    HRegionLocation rootLocation = connection.locateRegion(
1058
      HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
1086
      HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
1059

    
   
1087

   
1060
    // Check if Root region is valid and existing
1088
    // Check if Root region is valid and existing
1061
    if (rootLocation == null || rootLocation.getRegionInfo() == null ||
1089
    if (rootLocation == null || rootLocation.getRegionInfo() == null ||
1062
        rootLocation.getHostname() == null) {
1090
        rootLocation.getHostname() == null) {
1063
      errors.reportError(ERROR_CODE.NULL_ROOT_REGION,
1091
      errors.reportError(ERROR_CODE.NULL_ROOT_REGION,
1064
        "Root Region or some of its attributes are null.");
1092
        "Root Region or some of its attributes are null.");
1065
      return false;
1093
      return false;
1066
    }
1094
    }
1067
    ServerName sn;
1095
    ServerName sn;
1068
    try {
1096
    try {
1069
      sn = getRootRegionServerName();
1097
      sn = getRootRegionServerName();
1070
    } catch (KeeperException e) {
1098
    } catch (KeeperException e) {
1071
      throw new IOException(e);
1099
      throw new IOException(e);
1072
    }
1100
    }
1073
    MetaEntry m =
1101
    MetaEntry m =
1074
      new MetaEntry(rootLocation.getRegionInfo(), sn, System.currentTimeMillis());
1102
      new MetaEntry(rootLocation.getRegionInfo(), sn, System.currentTimeMillis());
1075
    HbckInfo hbInfo = new HbckInfo(m);
1103
    HbckInfo hbInfo = new HbckInfo(m);
1076
    regionInfoMap.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo);
1104
    regionInfoMap.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo);
1077
    return true;
1105
    return true;
1078
  }
1106
  }
1079

    
   
1107

   
1080
  private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1108
  private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1081
    return new ZooKeeperWatcher(conf, "hbase Fsck", new Abortable() {
1109
    return new ZooKeeperWatcher(conf, "hbase Fsck", new Abortable() {
1082
      @Override
1110
      @Override
1083
      public void abort(String why, Throwable e) {
1111
      public void abort(String why, Throwable e) {
1084
        LOG.error(why, e);
1112
        LOG.error(why, e);
1085
        System.exit(1);
1113
        System.exit(1);
1086
      }
1114
      }
1087

    
   
1115

   
1088
      @Override
1116
      @Override
1089
      public boolean isAborted() {
1117
      public boolean isAborted() {
1090
        return false;
1118
        return false;
1091
      }
1119
      }
1092

    
   
1120

   
1093
    });
1121
    });
1094
  }
1122
  }
1095

    
   
1123

   
1096
  private ServerName getRootRegionServerName()
1124
  private ServerName getRootRegionServerName()
1097
  throws IOException, KeeperException {
1125
  throws IOException, KeeperException {
1098
    ZooKeeperWatcher zkw = createZooKeeperWatcher();
1126
    ZooKeeperWatcher zkw = createZooKeeperWatcher();
1099
    ServerName sn = null;
1127
    ServerName sn = null;
1100
    try {
1128
    try {
1101
      sn = RootRegionTracker.getRootRegionLocation(zkw);
1129
      sn = RootRegionTracker.getRootRegionLocation(zkw);
1102
    } finally {
1130
    } finally {
1103
      zkw.close();
1131
      zkw.close();
1104
    }
1132
    }
1105
    return sn;
1133
    return sn;
1106
  }
1134
  }
1107

    
   
1135

   
1108
  /**
1136
  /**
1109
   * Contacts each regionserver and fetches metadata about regions.
1137
   * Contacts each regionserver and fetches metadata about regions.
1110
   * @param regionServerList - the list of region servers to connect to
1138
   * @param regionServerList - the list of region servers to connect to
1111
   * @throws IOException if a remote or network exception occurs
1139
   * @throws IOException if a remote or network exception occurs
1112
   */
1140
   */
1113
  void processRegionServers(Collection<ServerName> regionServerList)
1141
  void processRegionServers(Collection<ServerName> regionServerList)
1114
    throws IOException, InterruptedException {
1142
    throws IOException, InterruptedException {
1115

    
   
1143

   
1116
    WorkItemRegion[] work = new WorkItemRegion[regionServerList.size()];
1144
    WorkItemRegion[] work = new WorkItemRegion[regionServerList.size()];
1117
    int num = 0;
1145
    int num = 0;
1118

    
   
1146

   
1119
    // loop to contact each region server in parallel
1147
    // loop to contact each region server in parallel
1120
    for (ServerName rsinfo: regionServerList) {
1148
    for (ServerName rsinfo: regionServerList) {
1121
      work[num] = new WorkItemRegion(this, rsinfo, errors, connection);
1149
      work[num] = new WorkItemRegion(this, rsinfo, errors, connection);
1122
      executor.execute(work[num]);
1150
      executor.execute(work[num]);
1123
      num++;
1151
      num++;
1124
    }
1152
    }
1125
    
1153
    
1126
    // wait for all submitted tasks to be done
1154
    // wait for all submitted tasks to be done
1127
    for (int i = 0; i < num; i++) {
1155
    for (int i = 0; i < num; i++) {
1128
      synchronized (work[i]) {
1156
      synchronized (work[i]) {
1129
        while (!work[i].isDone()) {
1157
        while (!work[i].isDone()) {
1130
          work[i].wait();
1158
          work[i].wait();
1131
        }
1159
        }
1132
      }
1160
      }
1133
    }
1161
    }
1134
  }
1162
  }
1135

    
   
1163

   
1136
  /**
1164
  /**
1137
   * Check consistency of all regions that have been found in previous phases.
1165
   * Check consistency of all regions that have been found in previous phases.
1138
   */
1166
   */
1139
  private void checkAndFixConsistency()
1167
  private void checkAndFixConsistency()
1140
  throws IOException, KeeperException, InterruptedException {
1168
  throws IOException, KeeperException, InterruptedException {
1141
    for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1169
    for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1142
      checkRegionConsistency(e.getKey(), e.getValue());
1170
      checkRegionConsistency(e.getKey(), e.getValue());
1143
    }
1171
    }
1144
  }
1172
  }
1145

    
   
1173

   
1146
  /**
1174
  /**
1147
   * Deletes region from meta table
1175
   * Deletes region from meta table
1148
   */
1176
   */
1149
  private void deleteMetaRegion(HbckInfo hi) throws IOException {
1177
  private void deleteMetaRegion(HbckInfo hi) throws IOException {
1150
    Delete d = new Delete(hi.metaEntry.getRegionName());
1178
    Delete d = new Delete(hi.metaEntry.getRegionName());
1151
    meta.delete(d);
1179
    meta.delete(d);
1152
    meta.flushCommits();
1180
    meta.flushCommits();
1153
    LOG.info("Deleted " + hi.metaEntry.getRegionNameAsString() + " from META" );
1181
    LOG.info("Deleted " + hi.metaEntry.getRegionNameAsString() + " from META" );
1154
  }
1182
  }
1155

    
   
1183

   
1156
  /**
1184
  /**
1157
   * This backwards-compatibility wrapper for permanently offlining a region
1185
   * This backwards-compatibility wrapper for permanently offlining a region
1158
   * that should not be alive.  If the region server does not support the
1186
   * that should not be alive.  If the region server does not support the
1159
   * "offline" method, it will use the closest unassign method instead.  This
1187
   * "offline" method, it will use the closest unassign method instead.  This
1160
   * will basically work until one attempts to disable or delete the affected
1188
   * will basically work until one attempts to disable or delete the affected
1161
   * table.  The problem has to do with in-memory only master state, so
1189
   * table.  The problem has to do with in-memory only master state, so
1162
   * restarting the HMaster or failing over to another should fix this.
1190
   * restarting the HMaster or failing over to another should fix this.
1163
   */
1191
   */
1164
  private void offline(byte[] regionName) throws IOException {
1192
  private void offline(byte[] regionName) throws IOException {
1165
    String regionString = Bytes.toStringBinary(regionName);
1193
    String regionString = Bytes.toStringBinary(regionName);
1166
    if (!rsSupportsOffline) {
1194
    if (!rsSupportsOffline) {
1167
      LOG.warn("Using unassign region " + regionString
1195
      LOG.warn("Using unassign region " + regionString
1168
          + " instead of using offline method, you should"
1196
          + " instead of using offline method, you should"
1169
          + " restart HMaster after these repairs");
1197
          + " restart HMaster after these repairs");
1170
      admin.unassign(regionName, true);
1198
      admin.unassign(regionName, true);
1171
      return;
1199
      return;
1172
    }
1200
    }
1173

    
   
1201

   
1174
    // first time we assume the rs's supports #offline.
1202
    // first time we assume the rs's supports #offline.
1175
    try {
1203
    try {
1176
      LOG.info("Offlining region " + regionString);
1204
      LOG.info("Offlining region " + regionString);
1177
      admin.getMaster().offline(regionName);
1205
      admin.getMaster().offline(regionName);
1178
    } catch (IOException ioe) {
1206
    } catch (IOException ioe) {
1179
      String notFoundMsg = "java.lang.NoSuchMethodException: " +
1207
      String notFoundMsg = "java.lang.NoSuchMethodException: " +
1180
        "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1208
        "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1181
      if (ioe.getMessage().contains(notFoundMsg)) {
1209
      if (ioe.getMessage().contains(notFoundMsg)) {
1182
        LOG.warn("Using unassign region " + regionString
1210
        LOG.warn("Using unassign region " + regionString
1183
            + " instead of using offline method, you should"
1211
            + " instead of using offline method, you should"
1184
            + " restart HMaster after these repairs");
1212
            + " restart HMaster after these repairs");
1185
        rsSupportsOffline = false; // in the future just use unassign
1213
        rsSupportsOffline = false; // in the future just use unassign
1186
        admin.unassign(regionName, true);
1214
        admin.unassign(regionName, true);
1187
        return;
1215
        return;
1188
      }
1216
      }
1189
      throw ioe;
1217
      throw ioe;
1190
    }
1218
    }
1191
  }
1219
  }
1192

    
   
1220

   
1193
  private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
1221
  private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
1194
    for (OnlineEntry rse : hi.deployedEntries) {
1222
    for (OnlineEntry rse : hi.deployedEntries) {
1195
      LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
1223
      LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
1196
      try {
1224
      try {
1197
        HBaseFsckRepair.closeRegionSilentlyAndWait(admin, rse.hsa, rse.hri);
1225
        HBaseFsckRepair.closeRegionSilentlyAndWait(admin, rse.hsa, rse.hri);
1198
        offline(rse.hri.getRegionName());
1226
        offline(rse.hri.getRegionName());
1199
      } catch (IOException ioe) {
1227
      } catch (IOException ioe) {
1200
        LOG.warn("Got exception when attempting to offline region "
1228
        LOG.warn("Got exception when attempting to offline region "
1201
            + Bytes.toString(rse.hri.getRegionName()), ioe);
1229
            + Bytes.toString(rse.hri.getRegionName()), ioe);
1202
      }
1230
      }
1203
    }
1231
    }
1204
  }
1232
  }
1205

    
   
1233

   
1206
  /**
1234
  /**
1207
   * Attempts to undeploy a region from a region server based in information in
1235
   * Attempts to undeploy a region from a region server based in information in
1208
   * META.  Any operations that modify the file system should make sure that
1236
   * META.  Any operations that modify the file system should make sure that
1209
   * its corresponding region is not deployed to prevent data races.
1237
   * its corresponding region is not deployed to prevent data races.
1210
   *
1238
   *
1211
   * A separate call is required to update the master in-memory region state
1239
   * A separate call is required to update the master in-memory region state
1212
   * kept in the AssignementManager.  Because disable uses this state instead of
1240
   * kept in the AssignementManager.  Because disable uses this state instead of
1213
   * that found in META, we can't seem to cleanly disable/delete tables that
1241
   * that found in META, we can't seem to cleanly disable/delete tables that
1214
   * have been hbck fixed.  When used on a version of HBase that does not have
1242
   * have been hbck fixed.  When used on a version of HBase that does not have
1215
   * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master
1243
   * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master
1216
   * restart or failover may be required.
1244
   * restart or failover may be required.
1217
   */
1245
   */
1218
  private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
1246
  private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
1219
    if (hi.metaEntry == null && hi.hdfsEntry == null) {
1247
    if (hi.metaEntry == null && hi.hdfsEntry == null) {
1220
      undeployRegions(hi);
1248
      undeployRegions(hi);
1221
      return;
1249
      return;
1222
    }
1250
    }
1223

    
   
1251

   
1224
    // get assignment info and hregioninfo from meta.
1252
    // get assignment info and hregioninfo from meta.
1225
    Get get = new Get(hi.getRegionName());
1253
    Get get = new Get(hi.getRegionName());
1226
    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1254
    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1227
    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1255
    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1228
    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1256
    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1229
    Result r = meta.get(get);
1257
    Result r = meta.get(get);
1230
    byte[] value = r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1258
    byte[] value = r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1231
    byte[] startcodeBytes = r.getValue(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1259
    byte[] startcodeBytes = r.getValue(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1232
    if (value == null || startcodeBytes == null) {
1260
    if (value == null || startcodeBytes == null) {
1233
      errors.reportError("Unable to close region "
1261
      errors.reportError("Unable to close region "
1234
          + hi.getRegionNameAsString() +  " because meta does not "
1262
          + hi.getRegionNameAsString() +  " because meta does not "
1235
          + "have handle to reach it.");
1263
          + "have handle to reach it.");
1236
      return;
1264
      return;
1237
    }
1265
    }
1238
    long startcode = Bytes.toLong(startcodeBytes);
1266
    long startcode = Bytes.toLong(startcodeBytes);
1239

    
   
1267

   
1240
    ServerName hsa = new ServerName(Bytes.toString(value), startcode);
1268
    ServerName hsa = new ServerName(Bytes.toString(value), startcode);
1241
    byte[] hriVal = r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1269
    byte[] hriVal = r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1242
    HRegionInfo hri= Writables.getHRegionInfoOrNull(hriVal);
1270
    HRegionInfo hri= Writables.getHRegionInfoOrNull(hriVal);
1243
    if (hri == null) {
1271
    if (hri == null) {
1244
      LOG.warn("Unable to close region " + hi.getRegionNameAsString()
1272
      LOG.warn("Unable to close region " + hi.getRegionNameAsString()
1245
          + " because META had invalid or missing "
1273
          + " because META had invalid or missing "
1246
          + HConstants.CATALOG_FAMILY_STR + ":"
1274
          + HConstants.CATALOG_FAMILY_STR + ":"
1247
          + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
1275
          + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
1248
          + " qualifier value.");
1276
          + " qualifier value.");
1249
      return;
1277
      return;
1250
    }
1278
    }
1251

    
   
1279

   
1252
    // close the region -- close files and remove assignment
1280
    // close the region -- close files and remove assignment
1253
    HBaseFsckRepair.closeRegionSilentlyAndWait(admin, hsa, hri);
1281
    HBaseFsckRepair.closeRegionSilentlyAndWait(admin, hsa, hri);
1254
  }
1282
  }
1255

    
   
1283

   
1256
  private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
1284
  private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
1257
    KeeperException, InterruptedException {
1285
    KeeperException, InterruptedException {
1258
    // If we are trying to fix the errors
1286
    // If we are trying to fix the errors
1259
    if (shouldFixAssignments()) {
1287
    if (shouldFixAssignments()) {
1260
      errors.print(msg);
1288
      errors.print(msg);
1261
      undeployRegions(hbi);
1289
      undeployRegions(hbi);
1262
      setShouldRerun();
1290
      setShouldRerun();
1263
      HBaseFsckRepair.fixUnassigned(admin, hbi.getHdfsHRI());
1291
      HBaseFsckRepair.fixUnassigned(admin, hbi.getHdfsHRI());
1264
      HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
1292
      HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
1265
    }
1293
    }
1266
  }
1294
  }
1267

    
   
1295

   
1268
  /**
1296
  /**
1269
   * Check a single region for consistency and correct deployment.
1297
   * Check a single region for consistency and correct deployment.
1270
   */
1298
   */
1271
  private void checkRegionConsistency(final String key, final HbckInfo hbi)
1299
  private void checkRegionConsistency(final String key, final HbckInfo hbi)
1272
  throws IOException, KeeperException, InterruptedException {
1300
  throws IOException, KeeperException, InterruptedException {
1273
    String descriptiveName = hbi.toString();
1301
    String descriptiveName = hbi.toString();
1274

    
   
1302

   
1275
    boolean inMeta = hbi.metaEntry != null;
1303
    boolean inMeta = hbi.metaEntry != null;
1276
    boolean inHdfs = hbi.getHdfsRegionDir()!= null;
1304
    boolean inHdfs = hbi.getHdfsRegionDir()!= null;
1277
    boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
1305
    boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
1278
    boolean isDeployed = !hbi.deployedOn.isEmpty();
1306
    boolean isDeployed = !hbi.deployedOn.isEmpty();
1279
    boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
1307
    boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
1280
    boolean deploymentMatchesMeta =
1308
    boolean deploymentMatchesMeta =
1281
      hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
1309
      hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
1282
      hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
1310
      hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
1283
    boolean splitParent =
1311
    boolean splitParent =
1284
      (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
1312
      (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
1285
    boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
1313
    boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
1286
    boolean recentlyModified = hbi.getHdfsRegionDir() != null &&
1314
    boolean recentlyModified = hbi.getHdfsRegionDir() != null &&
1287
      hbi.getModTime() + timelag > System.currentTimeMillis();
1315
      hbi.getModTime() + timelag > System.currentTimeMillis();
1288

    
   
1316

   
1289
    // ========== First the healthy cases =============
1317
    // ========== First the healthy cases =============
1290
    if (hbi.containsOnlyHdfsEdits()) {
1318
    if (hbi.containsOnlyHdfsEdits()) {
1291
      return;
1319
      return;
1292
    }
1320
    }
1293
    if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
1321
    if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
1294
      return;
1322
      return;
1295
    } else if (inMeta && inHdfs && !isDeployed && splitParent) {
1323
    } else if (inMeta && inHdfs && !isDeployed && splitParent) {
1296
      LOG.warn("Region " + descriptiveName + " is a split parent in META and in HDFS");
1324
      LOG.warn("Region " + descriptiveName + " is a split parent in META and in HDFS");
1297
      return;
1325
      return;
1298
    } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
1326
    } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
1299
      LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
1327
      LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
1300
        "tabled that is not deployed");
1328
        "tabled that is not deployed");
1301
      return;
1329
      return;
1302
    } else if (recentlyModified) {
1330
    } else if (recentlyModified) {
1303
      LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
1331
      LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
1304
      return;
1332
      return;
1305
    }
1333
    }
1306
    // ========== Cases where the region is not in META =============
1334
    // ========== Cases where the region is not in META =============
1307
    else if (!inMeta && !inHdfs && !isDeployed) {
1335
    else if (!inMeta && !inHdfs && !isDeployed) {
1308
      // We shouldn't have record of this region at all then!
1336
      // We shouldn't have record of this region at all then!
1309
      assert false : "Entry for region with no data";
1337
      assert false : "Entry for region with no data";
1310
    } else if (!inMeta && !inHdfs && isDeployed) {
1338
    } else if (!inMeta && !inHdfs && isDeployed) {
1311
      errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
1339
      errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
1312
          + descriptiveName + ", key=" + key + ", not on HDFS or in META but " +
1340
          + descriptiveName + ", key=" + key + ", not on HDFS or in META but " +
1313
          "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1341
          "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1314
      if (shouldFixAssignments()) {
1342
      if (shouldFixAssignments()) {
1315
        undeployRegions(hbi);
1343
        undeployRegions(hbi);
1316
      }
1344
      }
1317

    
   
1345

   
1318
    } else if (!inMeta && inHdfs && !isDeployed) {
1346
    } else if (!inMeta && inHdfs && !isDeployed) {
1319
      errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
1347
      errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
1320
          + descriptiveName + " on HDFS, but not listed in META " +
1348
          + descriptiveName + " on HDFS, but not listed in META " +
1321
          "or deployed on any region server");
1349
          "or deployed on any region server");
1322
      // restore region consistency of an adopted orphan
1350
      // restore region consistency of an adopted orphan
1323
      if (shouldFixMeta()) {
1351
      if (shouldFixMeta()) {
1324
        if (!hbi.isHdfsRegioninfoPresent()) {
1352
        if (!hbi.isHdfsRegioninfoPresent()) {
1325
          LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
1353
          LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
1326
              +  " in table integrity repair phase if -fixHdfsOrphans was" +
1354
              +  " in table integrity repair phase if -fixHdfsOrphans was" +
1327
              " used.");
1355
              " used.");
1328
          return;
1356
          return;
1329
        }
1357
        }
1330

    
   
1358

   
1331
        LOG.info("Patching .META. with .regioninfo: " + hbi.getHdfsHRI());
1359
        LOG.info("Patching .META. with .regioninfo: " + hbi.getHdfsHRI());
1332
        HBaseFsckRepair.fixMetaHoleOnline(conf, hbi.getHdfsHRI());
1360
        HBaseFsckRepair.fixMetaHoleOnline(conf, hbi.getHdfsHRI());
1333

    
   
1361

   
1334
        tryAssignmentRepair(hbi, "Trying to reassign region...");
1362
        tryAssignmentRepair(hbi, "Trying to reassign region...");
1335
      }
1363
      }
1336

    
   
1364

   
1337
    } else if (!inMeta && inHdfs && isDeployed) {
1365
    } else if (!inMeta && inHdfs && isDeployed) {
1338
      errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
1366
      errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
1339
          + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1367
          + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1340
      debugLsr(hbi.getHdfsRegionDir());
1368
      debugLsr(hbi.getHdfsRegionDir());
1341
      if (shouldFixMeta()) {
1369
      if (shouldFixMeta()) {
1342
        if (!hbi.isHdfsRegioninfoPresent()) {
1370
        if (!hbi.isHdfsRegioninfoPresent()) {
1343
          LOG.error("This should have been repaired in table integrity repair phase");
1371
          LOG.error("This should have been repaired in table integrity repair phase");
1344
          return;
1372
          return;
1345
        }
1373
        }
1346

    
   
1374

   
1347
        LOG.info("Patching .META. with with .regioninfo: " + hbi.getHdfsHRI());
1375
        LOG.info("Patching .META. with with .regioninfo: " + hbi.getHdfsHRI());
1348
        HBaseFsckRepair.fixMetaHoleOnline(conf, hbi.getHdfsHRI());
1376
        HBaseFsckRepair.fixMetaHoleOnline(conf, hbi.getHdfsHRI());
1349

    
   
1377

   
1350
        tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1378
        tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1351
      }
1379
      }
1352

    
   
1380

   
1353
    // ========== Cases where the region is in META =============
1381
    // ========== Cases where the region is in META =============
1354
    } else if (inMeta && !inHdfs && !isDeployed) {
1382
    } else if (inMeta && !inHdfs && !isDeployed) {
1355
      errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
1383
      errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
1356
          + descriptiveName + " found in META, but not in HDFS "
1384
          + descriptiveName + " found in META, but not in HDFS "
1357
          + "or deployed on any region server.");
1385
          + "or deployed on any region server.");
1358
      if (shouldFixMeta()) {
1386
      if (shouldFixMeta()) {
1359
        deleteMetaRegion(hbi);
1387
        deleteMetaRegion(hbi);
1360
      }
1388
      }
1361
    } else if (inMeta && !inHdfs && isDeployed) {
1389
    } else if (inMeta && !inHdfs && isDeployed) {
1362
      errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
1390
      errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
1363
          + " found in META, but not in HDFS, " +
1391
          + " found in META, but not in HDFS, " +
1364
          "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1392
          "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1365
      // We treat HDFS as ground truth.  Any information in meta is transient
1393
      // We treat HDFS as ground truth.  Any information in meta is transient
1366
      // and equivalent data can be regenerated.  So, lets unassign and remove
1394
      // and equivalent data can be regenerated.  So, lets unassign and remove
1367
      // these problems from META.
1395
      // these problems from META.
1368
      if (shouldFixAssignments()) {
1396
      if (shouldFixAssignments()) {
1369
        errors.print("Trying to fix unassigned region...");
1397
        errors.print("Trying to fix unassigned region...");
1370
        closeRegion(hbi);// Close region will cause RS to abort.
1398
        closeRegion(hbi);// Close region will cause RS to abort.
1371
      }
1399
      }
1372
      if (shouldFixMeta()) {
1400
      if (shouldFixMeta()) {
1373
        // wait for it to complete
1401
        // wait for it to complete
1374
        deleteMetaRegion(hbi);
1402
        deleteMetaRegion(hbi);
1375
      }
1403
      }
1376
    } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
1404
    } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
1377
      errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
1405
      errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
1378
          + " not deployed on any region server.");
1406
          + " not deployed on any region server.");
1379
      tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1407
      tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1380
    } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
1408
    } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
1381
      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
1409
      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
1382
          "Region " + descriptiveName + " should not be deployed according " +
1410
          "Region " + descriptiveName + " should not be deployed according " +
1383
          "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1411
          "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1384
      if (shouldFixAssignments()) {
1412
      if (shouldFixAssignments()) {
1385
        errors.print("Trying to close the region " + descriptiveName);
1413
        errors.print("Trying to close the region " + descriptiveName);
1386
        setShouldRerun();
1414
        setShouldRerun();
1387
        HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1415
        HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1388
      }
1416
      }
1389
    } else if (inMeta && inHdfs && isMultiplyDeployed) {
1417
    } else if (inMeta && inHdfs && isMultiplyDeployed) {
1390
      errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
1418
      errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
1391
          + " is listed in META on region server " + hbi.metaEntry.regionServer
1419
          + " is listed in META on region server " + hbi.metaEntry.regionServer
1392
          + " but is multiply assigned to region servers " +
1420
          + " but is multiply assigned to region servers " +
1393
          Joiner.on(", ").join(hbi.deployedOn));
1421
          Joiner.on(", ").join(hbi.deployedOn));
1394
      // If we are trying to fix the errors
1422
      // If we are trying to fix the errors
1395
      if (shouldFixAssignments()) {
1423
      if (shouldFixAssignments()) {
1396
        errors.print("Trying to fix assignment error...");
1424
        errors.print("Trying to fix assignment error...");
1397
        setShouldRerun();
1425
        setShouldRerun();
1398
        HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1426
        HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1399
      }
1427
      }
1400
    } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
1428
    } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
1401
      errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
1429
      errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
1402
          + descriptiveName + " listed in META on region server " +
1430
          + descriptiveName + " listed in META on region server " +
1403
          hbi.metaEntry.regionServer + " but found on region server " +
1431
          hbi.metaEntry.regionServer + " but found on region server " +
1404
          hbi.deployedOn.get(0));
1432
          hbi.deployedOn.get(0));
1405
      // If we are trying to fix the errors
1433
      // If we are trying to fix the errors
1406
      if (shouldFixAssignments()) {
1434
      if (shouldFixAssignments()) {
1407
        errors.print("Trying to fix assignment error...");
1435
        errors.print("Trying to fix assignment error...");
1408
        setShouldRerun();
1436
        setShouldRerun();
1409
        HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1437
        HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1410
        HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
1438
        HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
1411
      }
1439
      }
1412
    } else {
1440
    } else {
1413
      errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
1441
      errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
1414
          " is in an unforeseen state:" +
1442
          " is in an unforeseen state:" +
1415
          " inMeta=" + inMeta +
1443
          " inMeta=" + inMeta +
1416
          " inHdfs=" + inHdfs +
1444
          " inHdfs=" + inHdfs +
1417
          " isDeployed=" + isDeployed +
1445
          " isDeployed=" + isDeployed +
1418
          " isMultiplyDeployed=" + isMultiplyDeployed +
1446
          " isMultiplyDeployed=" + isMultiplyDeployed +
1419
          " deploymentMatchesMeta=" + deploymentMatchesMeta +
1447
          " deploymentMatchesMeta=" + deploymentMatchesMeta +
1420
          " shouldBeDeployed=" + shouldBeDeployed);
1448
          " shouldBeDeployed=" + shouldBeDeployed);
1421
    }
1449
    }
1422
  }
1450
  }
1423

    
   
1451

   
1424
  /**
1452
  /**
1425
   * Checks tables integrity. Goes over all regions and scans the tables.
1453
   * Checks tables integrity. Goes over all regions and scans the tables.
1426
   * Collects all the pieces for each table and checks if there are missing,
1454
   * Collects all the pieces for each table and checks if there are missing,
1427
   * repeated or overlapping ones.
1455
   * repeated or overlapping ones.
1428
   * @throws IOException
1456
   * @throws IOException
1429
   */
1457
   */
1430
  TreeMap<String, TableInfo> checkIntegrity() throws IOException {
1458
  SortedMap<String, TableInfo> checkIntegrity() throws IOException {
1431
    tablesInfo = new TreeMap<String,TableInfo> ();
1459
    tablesInfo = new TreeMap<String,TableInfo> ();
1432
    List<HbckInfo> noHDFSRegionInfos = new ArrayList<HbckInfo>();
1460
    List<HbckInfo> noHDFSRegionInfos = new ArrayList<HbckInfo>();
1433
    LOG.debug("There are " + regionInfoMap.size() + " region info entries");
1461
    LOG.debug("There are " + regionInfoMap.size() + " region info entries");
1434
    for (HbckInfo hbi : regionInfoMap.values()) {
1462
    for (HbckInfo hbi : regionInfoMap.values()) {
1435
      // Check only valid, working regions
1463
      // Check only valid, working regions
1436
      if (hbi.metaEntry == null) {
1464
      if (hbi.metaEntry == null) {
1437
        // this assumes that consistency check has run loadMetaEntry
1465
        // this assumes that consistency check has run loadMetaEntry
1438
        noHDFSRegionInfos.add(hbi);
1466
        noHDFSRegionInfos.add(hbi);
1439
        Path p = hbi.getHdfsRegionDir();
1467
        Path p = hbi.getHdfsRegionDir();
1440
        if (p == null) {
1468
        if (p == null) {
1441
          errors.report("No regioninfo in Meta or HDFS. " + hbi);
1469
          errors.report("No regioninfo in Meta or HDFS. " + hbi);
1442
        }
1470
        }
1443

    
   
1471

   
1444
        // TODO test.
1472
        // TODO test.
1445
        continue;
1473
        continue;
1446
      }
1474
      }
1447
      if (hbi.metaEntry.regionServer == null) {
1475
      if (hbi.metaEntry.regionServer == null) {
1448
        errors.detail("Skipping region because no region server: " + hbi);
1476
        errors.detail("Skipping region because no region server: " + hbi);
1449
        continue;
1477
        continue;
1450
      }
1478
      }
1451
      if (hbi.metaEntry.isOffline()) {
1479
      if (hbi.metaEntry.isOffline()) {
1452
        errors.detail("Skipping region because it is offline: " + hbi);
1480
        errors.detail("Skipping region because it is offline: " + hbi);
1453
        continue;
1481
        continue;
1454
      }
1482
      }
1455
      if (hbi.containsOnlyHdfsEdits()) {
1483
      if (hbi.containsOnlyHdfsEdits()) {
1456
        errors.detail("Skipping region because it only contains edits" + hbi);
1484
        errors.detail("Skipping region because it only contains edits" + hbi);
1457
        continue;
1485
        continue;
1458
      }
1486
      }
1459

    
   
1487

   
1460
      // Missing regionDir or over-deployment is checked elsewhere. Include
1488
      // Missing regionDir or over-deployment is checked elsewhere. Include
1461
      // these cases in modTInfo, so we can evaluate those regions as part of
1489
      // these cases in modTInfo, so we can evaluate those regions as part of
1462
      // the region chain in META
1490
      // the region chain in META
1463
      //if (hbi.foundRegionDir == null) continue;
1491
      //if (hbi.foundRegionDir == null) continue;
1464
      //if (hbi.deployedOn.size() != 1) continue;
1492
      //if (hbi.deployedOn.size() != 1) continue;
1465
      if (hbi.deployedOn.size() == 0) continue;
1493
      if (hbi.deployedOn.size() == 0) continue;
1466

    
   
1494

   
1467
      // We should be safe here
1495
      // We should be safe here
1468
      String tableName = hbi.metaEntry.getTableNameAsString();
1496
      String tableName = hbi.metaEntry.getTableNameAsString();
1469
      TableInfo modTInfo = tablesInfo.get(tableName);
1497
      TableInfo modTInfo = tablesInfo.get(tableName);
1470
      if (modTInfo == null) {
1498
      if (modTInfo == null) {
1471
        modTInfo = new TableInfo(tableName);
1499
        modTInfo = new TableInfo(tableName);
1472
      }
1500
      }
1473
      for (ServerName server : hbi.deployedOn) {
1501
      for (ServerName server : hbi.deployedOn) {
1474
        modTInfo.addServer(server);
1502
        modTInfo.addServer(server);
1475
      }
1503
      }
1476

    
   
1504

   
1477
      modTInfo.addRegionInfo(hbi);
1505
      modTInfo.addRegionInfo(hbi);
1478

    
   
1506

   
1479
      tablesInfo.put(tableName, modTInfo);
1507
      tablesInfo.put(tableName, modTInfo);
1480
    }
1508
    }
1481

    
   
1509

   
1482
    for (TableInfo tInfo : tablesInfo.values()) {
1510
    for (TableInfo tInfo : tablesInfo.values()) {
1483
      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1511
      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1484
      if (!tInfo.checkRegionChain(handler)) {
1512
      if (!tInfo.checkRegionChain(handler)) {
1485
        errors.report("Found inconsistency in table " + tInfo.getName());
1513
        errors.report("Found inconsistency in table " + tInfo.getName());
1486
      }
1514
      }
1487
    }
1515
    }
1488
    return tablesInfo;
1516
    return tablesInfo;
1489
  }
1517
  }
1490

    
   
1518

   
1491
  /**
1519
  /**
1492
   * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
1520
   * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
1493
   * @return number of file move fixes done to merge regions.
1521
   * @return number of file move fixes done to merge regions.
1494
   */
1522
   */
1495
  public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
1523
  public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
1496
    int fileMoves = 0;
1524
    int fileMoves = 0;
1497

    
   
1525

   
1498
    LOG.debug("Contained region dir after close and pause");
1526
    LOG.debug("Contained region dir after close and pause");
1499
    debugLsr(contained.getHdfsRegionDir());
1527
    debugLsr(contained.getHdfsRegionDir());
1500

    
   
1528

   
1501
    // rename the contained into the container.
1529
    // rename the contained into the container.
1502
    FileSystem fs = targetRegionDir.getFileSystem(conf);
1530
    FileSystem fs = targetRegionDir.getFileSystem(conf);
1503
    FileStatus[] dirs = fs.listStatus(contained.getHdfsRegionDir());
1531
    FileStatus[] dirs = fs.listStatus(contained.getHdfsRegionDir());
1504

    
   
1532

   
1505
    if (dirs == null) {
1533
    if (dirs == null) {
1506
      if (!fs.exists(contained.getHdfsRegionDir())) {
1534
      if (!fs.exists(contained.getHdfsRegionDir())) {
1507
        LOG.warn("HDFS region dir " + contained.getHdfsRegionDir() + " already sidelined.");
1535
        LOG.warn("HDFS region dir " + contained.getHdfsRegionDir() + " already sidelined.");
1508
      } else {
1536
      } else {
1509
        sidelineRegionDir(fs, contained);
1537
        sidelineRegionDir(fs, contained);
1510
      }
1538
      }
1511
      return fileMoves;
1539
      return fileMoves;
1512
    }
1540
    }
1513

    
   
1541

   
1514
    for (FileStatus cf : dirs) {
1542
    for (FileStatus cf : dirs) {
1515
      Path src = cf.getPath();
1543
      Path src = cf.getPath();
1516
      Path dst =  new Path(targetRegionDir, src.getName());
1544
      Path dst =  new Path(targetRegionDir, src.getName());
1517

    
   
1545

   
1518
      if (src.getName().equals(HRegion.REGIONINFO_FILE)) {
1546
      if (src.getName().equals(HRegion.REGIONINFO_FILE)) {
1519
        // do not copy the old .regioninfo file.
1547
        // do not copy the old .regioninfo file.
1520
        continue;
1548
        continue;
1521
      }
1549
      }
1522

    
   
1550

   
1523
      if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
1551
      if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
1524
        // do not copy the .oldlogs files
1552
        // do not copy the .oldlogs files
1525
        continue;
1553
        continue;
1526
      }
1554
      }
1527

    
   
1555

   
1528
      LOG.info("Moving files from " + src + " into containing region " + dst);
1556
      LOG.info("Moving files from " + src + " into containing region " + dst);
1529
      // FileSystem.rename is inconsistent with directories -- if the
1557
      // FileSystem.rename is inconsistent with directories -- if the
1530
      // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1558
      // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1531
      // it moves the src into the dst dir resulting in (foo/a/b).  If
1559
      // it moves the src into the dst dir resulting in (foo/a/b).  If
1532
      // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1560
      // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1533
      for (FileStatus hfile : fs.listStatus(src)) {
1561
      for (FileStatus hfile : fs.listStatus(src)) {
1534
        boolean success = fs.rename(hfile.getPath(), dst);
1562
        boolean success = fs.rename(hfile.getPath(), dst);
1535
        if (success) {
1563
        if (success) {
1536
          fileMoves++;
1564
          fileMoves++;
1537
        }
1565
        }
1538
      }
1566
      }
1539
      LOG.debug("Sideline directory contents:");
1567
      LOG.debug("Sideline directory contents:");
1540
      debugLsr(targetRegionDir);
1568
      debugLsr(targetRegionDir);
1541
    }
1569
    }
1542

    
   
1570

   
1543
    // if all success.
1571
    // if all success.
1544
    sidelineRegionDir(fs, contained);
1572
    sidelineRegionDir(fs, contained);
1545
    LOG.info("Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
1573
    LOG.info("Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
1546
        getSidelineDir());
1574
        getSidelineDir());
1547
    debugLsr(contained.getHdfsRegionDir());
1575
    debugLsr(contained.getHdfsRegionDir());
1548

    
   
1576

   
1549
    return fileMoves;
1577
    return fileMoves;
1550
  }
1578
  }
1551

    
   
1579

   
1552
  /**
1580
  /**
1553
   * Maintain information about a particular table.
1581
   * Maintain information about a particular table.
1554
   */
1582
   */
1555
  public class TableInfo {
1583
  public class TableInfo {
1556
    String tableName;
1584
    String tableName;
1557
    TreeSet <ServerName> deployedOn;
1585
    TreeSet <ServerName> deployedOn;
1558

    
   
1586

   
1559
    // backwards regions
1587
    // backwards regions
1560
    final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
1588
    final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
1561

    
   
1589

   
1562
    // sidelined big overlapped regions
1590
    // sidelined big overlapped regions
1563
    final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
1591
    final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
1564

    
   
1592

   
1565
    // region split calculator
1593
    // region split calculator
1566
    final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
1594
    final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
1567

    
   
1595

   
1568
    // Histogram of different HTableDescriptors found.  Ideally there is only one!
1596
    // Histogram of different HTableDescriptors found.  Ideally there is only one!
1569
    final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
1597
    final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
1570

    
   
1598

   
1571
    // key = start split, values = set of splits in problem group
1599
    // key = start split, values = set of splits in problem group
1572
    final Multimap<byte[], HbckInfo> overlapGroups =
1600
    final Multimap<byte[], HbckInfo> overlapGroups =
1573
      TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
1601
      TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
1574

    
   
1602

   
1575
    TableInfo(String name) {
1603
    TableInfo(String name) {
1576
      this.tableName = name;
1604
      this.tableName = name;
1577
      deployedOn = new TreeSet <ServerName>();
1605
      deployedOn = new TreeSet <ServerName>();
1578
    }
1606
    }
1579

    
   
1607

   
1580
    /**
1608
    /**
1581
     * @return descriptor common to all regions.  null if are none or multiple!
1609
     * @return descriptor common to all regions.  null if are none or multiple!
1582
     */
1610
     */
1583
    private HTableDescriptor getHTD() {
1611
    private HTableDescriptor getHTD() {
1584
      if (htds.size() == 1) {
1612
      if (htds.size() == 1) {
1585
        return (HTableDescriptor)htds.toArray()[0];
1613
        return (HTableDescriptor)htds.toArray()[0];
1586
      } else {
1614
      } else {
1587
        LOG.error("None/Multiple table descriptors found for table '"
1615
        LOG.error("None/Multiple table descriptors found for table '"
1588
          + tableName + "' regions: " + htds);
1616
          + tableName + "' regions: " + htds);
1589
      }
1617
      }
1590
      return null;
1618
      return null;
1591
    }
1619
    }
1592

    
   
1620

   
1593
    public void addRegionInfo(HbckInfo hir) {
1621
    public void addRegionInfo(HbckInfo hir) {
1594
      if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
1622
      if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
1595
        // end key is absolute end key, just add it.
1623
        // end key is absolute end key, just add it.
1596
        sc.add(hir);
1624
        sc.add(hir);
1597
        return;
1625
        return;
1598
      }
1626
      }
1599

    
   
1627

   
1600
      // if not the absolute end key, check for cycle 
1628
      // if not the absolute end key, check for cycle 
1601
      if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
1629
      if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
1602
        errors.reportError(
1630
        errors.reportError(
1603
            ERROR_CODE.REGION_CYCLE,
1631
            ERROR_CODE.REGION_CYCLE,
1604
            String.format("The endkey for this region comes before the "
1632
            String.format("The endkey for this region comes before the "
1605
                + "startkey, startkey=%s, endkey=%s",
1633
                + "startkey, startkey=%s, endkey=%s",
1606
                Bytes.toStringBinary(hir.getStartKey()),
1634
                Bytes.toStringBinary(hir.getStartKey()),
1607
                Bytes.toStringBinary(hir.getEndKey())), this, hir);
1635
                Bytes.toStringBinary(hir.getEndKey())), this, hir);
1608
        backwards.add(hir);
1636
        backwards.add(hir);
1609
        return;
1637
        return;
1610
      }
1638
      }
1611

    
   
1639

   
1612
      // main case, add to split calculator
1640
      // main case, add to split calculator
1613
      sc.add(hir);
1641
      sc.add(hir);
1614
    }
1642
    }
1615

    
   
1643

   
1616
    public void addServer(ServerName server) {
1644
    public void addServer(ServerName server) {
1617
      this.deployedOn.add(server);
1645
      this.deployedOn.add(server);
1618
    }
1646
    }
1619

    
   
1647

   
1620
    public String getName() {
1648
    public String getName() {
1621
      return tableName;
1649
      return tableName;
1622
    }
1650
    }
1623

    
   
1651

   
1624
    public int getNumRegions() {
1652
    public int getNumRegions() {
1625
      return sc.getStarts().size() + backwards.size();
1653
      return sc.getStarts().size() + backwards.size();
1626
    }
1654
    }
1627

    
   
1655

   
1628
    private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
1656
    private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
1629
      ErrorReporter errors;
1657
      ErrorReporter errors;
1630

    
   
1658

   
1631
      IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
1659
      IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
1632
        this.errors = errors;
1660
        this.errors = errors;
1633
        setTableInfo(ti);
1661
        setTableInfo(ti);
1634
      }
1662
      }
1635

    
   
1663

   
1636
      @Override
1664
      @Override
1637
      public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
1665
      public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
1638
        errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
1666
        errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
1639
            "First region should start with an empty key.  You need to "
1667
            "First region should start with an empty key.  You need to "
1640
            + " create a new region and regioninfo in HDFS to plug the hole.",
1668
            + " create a new region and regioninfo in HDFS to plug the hole.",
1641
            getTableInfo(), hi);
1669
            getTableInfo(), hi);
1642
      }
1670
      }
1643

    
   
1671

   
1644
      @Override
1672
      @Override
1645
      public void handleDegenerateRegion(HbckInfo hi) throws IOException{
1673
      public void handleDegenerateRegion(HbckInfo hi) throws IOException{
1646
        errors.reportError(ERROR_CODE.DEGENERATE_REGION,
1674
        errors.reportError(ERROR_CODE.DEGENERATE_REGION,
1647
            "Region has the same start and end key.", getTableInfo(), hi);
1675
            "Region has the same start and end key.", getTableInfo(), hi);
1648
      }
1676
      }
1649

    
   
1677

   
1650
      @Override
1678
      @Override
1651
      public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
1679
      public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
1652
        byte[] key = r1.getStartKey();
1680
        byte[] key = r1.getStartKey();
1653
        // dup start key
1681
        // dup start key
1654
        errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
1682
        errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
1655
            "Multiple regions have the same startkey: "
1683
            "Multiple regions have the same startkey: "
1656
            + Bytes.toStringBinary(key), getTableInfo(), r1);
1684
            + Bytes.toStringBinary(key), getTableInfo(), r1);
1657
        errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
1685
        errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
1658
            "Multiple regions have the same startkey: "
1686
            "Multiple regions have the same startkey: "
1659
            + Bytes.toStringBinary(key), getTableInfo(), r2);
1687
            + Bytes.toStringBinary(key), getTableInfo(), r2);
1660
      }
1688
      }
1661

    
   
1689

   
1662
      @Override
1690
      @Override
1663
      public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
1691
      public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
1664
        errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
1692
        errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
1665
            "There is an overlap in the region chain.",
1693
            "There is an overlap in the region chain.",
1666
            getTableInfo(), hi1, hi2);
1694
            getTableInfo(), hi1, hi2);
1667
      }
1695
      }
1668

    
   
1696

   
1669
      @Override
1697
      @Override
1670
      public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
1698
      public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
1671
        errors.reportError(
1699
        errors.reportError(
1672
            ERROR_CODE.HOLE_IN_REGION_CHAIN,
1700
            ERROR_CODE.HOLE_IN_REGION_CHAIN,
1673
            "There is a hole in the region chain between "
1701
            "There is a hole in the region chain between "
1674
                + Bytes.toStringBinary(holeStart) + " and "
1702
                + Bytes.toStringBinary(holeStart) + " and "
1675
                + Bytes.toStringBinary(holeStop)
1703
                + Bytes.toStringBinary(holeStop)
1676
                + ".  You need to create a new .regioninfo and region "
1704
                + ".  You need to create a new .regioninfo and region "
1677
                + "dir in hdfs to plug the hole.");
1705
                + "dir in hdfs to plug the hole.");
1678
      }
1706
      }
1679
    };
1707
    };
1680

    
   
1708

   
1681
    /**
1709
    /**
1682
     * This handler fixes integrity errors from hdfs information.  There are
1710
     * This handler fixes integrity errors from hdfs information.  There are
1683
     * basically three classes of integrity problems 1) holes, 2) overlaps, and
1711
     * basically three classes of integrity problems 1) holes, 2) overlaps, and
1684
     * 3) invalid regions.
1712
     * 3) invalid regions.
1685
     *
1713
     *
1686
     * This class overrides methods that fix holes and the overlap group case.
1714
     * This class overrides methods that fix holes and the overlap group case.
1687
     * Individual cases of particular overlaps are handled by the general
1715
     * Individual cases of particular overlaps are handled by the general
1688
     * overlap group merge repair case.
1716
     * overlap group merge repair case.
1689
     *
1717
     *
1690
     * If hbase is online, this forces regions offline before doing merge
1718
     * If hbase is online, this forces regions offline before doing merge
1691
     * operations.
1719
     * operations.
1692
     */
1720
     */
1693
    private class HDFSIntegrityFixer extends IntegrityFixSuggester {
1721
    private class HDFSIntegrityFixer extends IntegrityFixSuggester {
1694
      Configuration conf;
1722
      Configuration conf;
1695

    
   
1723

   
1696
      boolean fixOverlaps = true;
1724
      boolean fixOverlaps = true;
1697

    
   
1725

   
1698
      HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
1726
      HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
1699
          boolean fixHoles, boolean fixOverlaps) {
1727
          boolean fixHoles, boolean fixOverlaps) {
1700
        super(ti, errors);
1728
        super(ti, errors);
1701
        this.conf = conf;
1729
        this.conf = conf;
1702
        this.fixOverlaps = fixOverlaps;
1730
        this.fixOverlaps = fixOverlaps;
1703
        // TODO properly use fixHoles
1731
        // TODO properly use fixHoles
1704
      }
1732
      }
1705

    
   
1733

   
1706
      /**
1734
      /**
1707
       * This is a special case hole -- when the first region of a table is
1735
       * This is a special case hole -- when the first region of a table is
1708
       * missing from META, HBase doesn't acknowledge the existance of the
1736
       * missing from META, HBase doesn't acknowledge the existance of the
1709
       * table.
1737
       * table.
1710
       */
1738
       */
1711
      public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
1739
      public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
1712
        errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
1740
        errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
1713
            "First region should start with an empty key.  Creating a new " +
1741
            "First region should start with an empty key.  Creating a new " +
1714
            "region and regioninfo in HDFS to plug the hole.",
1742
            "region and regioninfo in HDFS to plug the hole.",
1715
            getTableInfo(), next);
1743
            getTableInfo(), next);
1716
        HTableDescriptor htd = getTableInfo().getHTD();
1744
        HTableDescriptor htd = getTableInfo().getHTD();
1717
        // from special EMPTY_START_ROW to next region's startKey
1745
        // from special EMPTY_START_ROW to next region's startKey
1718
        HRegionInfo newRegion = new HRegionInfo(htd.getName(),
1746
        HRegionInfo newRegion = new HRegionInfo(htd.getName(),
1719
            HConstants.EMPTY_START_ROW, next.getStartKey());
1747
            HConstants.EMPTY_START_ROW, next.getStartKey());
1720

    
   
1748

   
1721
        // TODO test
1749
        // TODO test
1722
        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
1750
        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
1723
        LOG.info("Table region start key was not empty.  Created new empty region: "
1751
        LOG.info("Table region start key was not empty.  Created new empty region: "
1724
            + newRegion + " " +region);
1752
            + newRegion + " " +region);
1725
        fixes++;
1753
        fixes++;
1726
      }
1754
      }
1727

    
   
1755

   
1728
      /**
1756
      /**
1729
       * There is a hole in the hdfs regions that violates the table integrity
1757
       * There is a hole in the hdfs regions that violates the table integrity
1730
       * rules.  Create a new empty region that patches the hole.
1758
       * rules.  Create a new empty region that patches the hole.
1731
       */
1759
       */
1732
      public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
1760
      public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
1733
        errors.reportError(
1761
        errors.reportError(
1734
            ERROR_CODE.HOLE_IN_REGION_CHAIN,
1762
            ERROR_CODE.HOLE_IN_REGION_CHAIN,
1735
            "There is a hole in the region chain between "
1763
            "There is a hole in the region chain between "
1736
                + Bytes.toStringBinary(holeStartKey) + " and "
1764
                + Bytes.toStringBinary(holeStartKey) + " and "
1737
                + Bytes.toStringBinary(holeStopKey)
1765
                + Bytes.toStringBinary(holeStopKey)
1738
                + ".  Creating a new regioninfo and region "
1766
                + ".  Creating a new regioninfo and region "
1739
                + "dir in hdfs to plug the hole.");
1767
                + "dir in hdfs to plug the hole.");
1740
        HTableDescriptor htd = getTableInfo().getHTD();
1768
        HTableDescriptor htd = getTableInfo().getHTD();
1741
        HRegionInfo newRegion = new HRegionInfo(htd.getName(), holeStartKey, holeStopKey);
1769
        HRegionInfo newRegion = new HRegionInfo(htd.getName(), holeStartKey, holeStopKey);
1742
        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
1770
        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
1743
        LOG.info("Plugged hold by creating new empty region: "+ newRegion + " " +region);
1771
        LOG.info("Plugged hold by creating new empty region: "+ newRegion + " " +region);
1744
        fixes++;
1772
        fixes++;
1745
      }
1773
      }
1746

    
   
1774

   
1747
      /**
1775
      /**
1748
       * This takes set of overlapping regions and merges them into a single
1776
       * This takes set of overlapping regions and merges them into a single
1749
       * region.  This covers cases like degenerate regions, shared start key,
1777
       * region.  This covers cases like degenerate regions, shared start key,
1750
       * general overlaps, duplicate ranges, and partial overlapping regions.
1778
       * general overlaps, duplicate ranges, and partial overlapping regions.
1751
       *
1779
       *
1752
       * Cases:
1780
       * Cases:
1753
       * - Clean regions that overlap
1781
       * - Clean regions that overlap
1754
       * - Only .oldlogs regions (can't find start/stop range, or figure out)
1782
       * - Only .oldlogs regions (can't find start/stop range, or figure out)
1755
       */
1783
       */
1756
      @Override
1784
      @Override
1757
      public void handleOverlapGroup(Collection<HbckInfo> overlap)
1785
      public void handleOverlapGroup(Collection<HbckInfo> overlap)
1758
          throws IOException {
1786
          throws IOException {
1759
        Preconditions.checkNotNull(overlap);
1787
        Preconditions.checkNotNull(overlap);
1760
        Preconditions.checkArgument(overlap.size() >0);
1788
        Preconditions.checkArgument(overlap.size() >0);
1761

    
   
1789

   
1762
        if (!this.fixOverlaps) {
1790
        if (!this.fixOverlaps) {
1763
          LOG.warn("Not attempting to repair overlaps.");
1791
          LOG.warn("Not attempting to repair overlaps.");
1764
          return;
1792
          return;
1765
        }
1793
        }
1766

    
   
1794

   
1767
        if (overlap.size() > maxMerge) {
1795
        if (overlap.size() > maxMerge) {
1768
          LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
1796
          LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
1769
            "regions which is greater than " + maxMerge + ", the max number of regions to merge");
1797
            "regions which is greater than " + maxMerge + ", the max number of regions to merge");
1770
          if (sidelineBigOverlaps) {
1798
          if (sidelineBigOverlaps) {
1771
            // we only sideline big overlapped groups that exceeds the max number of regions to merge
1799
            // we only sideline big overlapped groups that exceeds the max number of regions to merge
1772
            sidelineBigOverlaps(overlap);
1800
            sidelineBigOverlaps(overlap);
1773
          }
1801
          }
1774
          return;
1802
          return;
1775
        }
1803
        }
1776

    
   
1804

   
1777
        mergeOverlaps(overlap);
1805
        mergeOverlaps(overlap);
1778
      }
1806
      }
1779

    
   
1807

   
1780
      void mergeOverlaps(Collection<HbckInfo> overlap)
1808
      void mergeOverlaps(Collection<HbckInfo> overlap)
1781
          throws IOException {
1809
          throws IOException {
1782
        LOG.info("== Merging regions into one region: "
1810
        LOG.info("== Merging regions into one region: "
1783
          + Joiner.on(",").join(overlap));
1811
          + Joiner.on(",").join(overlap));
1784
        // get the min / max range and close all concerned regions
1812
        // get the min / max range and close all concerned regions
1785
        Pair<byte[], byte[]> range = null;
1813
        Pair<byte[], byte[]> range = null;
1786
        for (HbckInfo hi : overlap) {
1814
        for (HbckInfo hi : overlap) {
1787
          if (range == null) {
1815
          if (range == null) {
1788
            range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
1816
            range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
1789
          } else {
1817
          } else {
1790
            if (RegionSplitCalculator.BYTES_COMPARATOR
1818
            if (RegionSplitCalculator.BYTES_COMPARATOR
1791
                .compare(hi.getStartKey(), range.getFirst()) < 0) {
1819
                .compare(hi.getStartKey(), range.getFirst()) < 0) {
1792
              range.setFirst(hi.getStartKey());
1820
              range.setFirst(hi.getStartKey());
1793
            }
1821
            }
1794
            if (RegionSplitCalculator.BYTES_COMPARATOR
1822
            if (RegionSplitCalculator.BYTES_COMPARATOR
1795
                .compare(hi.getEndKey(), range.getSecond()) > 0) {
1823
                .compare(hi.getEndKey(), range.getSecond()) > 0) {
1796
              range.setSecond(hi.getEndKey());
1824
              range.setSecond(hi.getEndKey());
1797
            }
1825
            }
1798
          }
1826
          }
1799
          // need to close files so delete can happen.
1827
          // need to close files so delete can happen.
1800
          LOG.debug("Closing region before moving data around: " +  hi);
1828
          LOG.debug("Closing region before moving data around: " +  hi);
1801
          LOG.debug("Contained region dir before close");
1829
          LOG.debug("Contained region dir before close");
1802
          debugLsr(hi.getHdfsRegionDir());
1830
          debugLsr(hi.getHdfsRegionDir());
1803
          try {
1831
          try {
1804
            closeRegion(hi);
1832
            closeRegion(hi);
1805
          } catch (IOException ioe) {
1833
          } catch (IOException ioe) {
1806
            // TODO exercise this
1834
            // TODO exercise this
1807
            LOG.warn("Was unable to close region " + hi.getRegionNameAsString()
1835
            LOG.warn("Was unable to close region " + hi.getRegionNameAsString()
1808
                + ".  Just continuing... ");
1836
                + ".  Just continuing... ");
1809
          } catch (InterruptedException e) {
1837
          } catch (InterruptedException e) {
1810
            // TODO exercise this
1838
            // TODO exercise this
1811
            LOG.warn("Was unable to close region " + hi.getRegionNameAsString()
1839
            LOG.warn("Was unable to close region " + hi.getRegionNameAsString()
1812
                + ".  Just continuing... ");
1840
                + ".  Just continuing... ");
1813
          }
1841
          }
1814

    
   
1842

   
1815
          try {
1843
          try {
1816
            LOG.info("Offlining region: " + hi);
1844
            LOG.info("Offlining region: " + hi);
1817
            offline(hi.getRegionName());
1845
            offline(hi.getRegionName());
1818
          } catch (IOException ioe) {
1846
          } catch (IOException ioe) {
1819
            LOG.warn("Unable to offline region from master: " + hi, ioe);
1847
            LOG.warn("Unable to offline region from master: " + hi, ioe);
1820
          }
1848
          }
1821
        }
1849
        }
1822

    
   
1850

   
1823
        // create new empty container region.
1851
        // create new empty container region.
1824
        HTableDescriptor htd = getTableInfo().getHTD();
1852
        HTableDescriptor htd = getTableInfo().getHTD();
1825
        // from start key to end Key
1853
        // from start key to end Key
1826
        HRegionInfo newRegion = new HRegionInfo(htd.getName(), range.getFirst(),
1854
        HRegionInfo newRegion = new HRegionInfo(htd.getName(), range.getFirst(),
1827
            range.getSecond());
1855
            range.getSecond());
1828
        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
1856
        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
1829
        LOG.info("Created new empty container region: " +
1857
        LOG.info("Created new empty container region: " +
1830
            newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
1858
            newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
1831
        debugLsr(region.getRegionDir());
1859
        debugLsr(region.getRegionDir());
1832

    
   
1860

   
1833
        // all target regions are closed, should be able to safely cleanup.
1861
        // all target regions are closed, should be able to safely cleanup.
1834
        boolean didFix= false;
1862
        boolean didFix= false;
1835
        Path target = region.getRegionDir();
1863
        Path target = region.getRegionDir();
1836
        for (HbckInfo contained : overlap) {
1864
        for (HbckInfo contained : overlap) {
1837
          LOG.info("Merging " + contained  + " into " + target );
1865
          LOG.info("Merging " + contained  + " into " + target );
1838
          int merges = mergeRegionDirs(target, contained);
1866
          int merges = mergeRegionDirs(target, contained);
1839
          if (merges > 0) {
1867
          if (merges > 0) {
1840
            didFix = true;
1868
            didFix = true;
1841
          }
1869
          }
1842
        }
1870
        }
1843
        if (didFix) {
1871
        if (didFix) {
1844
          fixes++;
1872
          fixes++;
1845
        }
1873
        }
1846
      }
1874
      }
1847

    
   
1875

   
1848
      /**
1876
      /**
1849
       * Sideline some regions in a big overlap group so that it
1877
       * Sideline some regions in a big overlap group so that it
1850
       * will have fewer regions, and it is easier to merge them later on.
1878
       * will have fewer regions, and it is easier to merge them later on.
1851
       *
1879
       *
1852
       * @param bigOverlap the overlapped group with regions more than maxMerge
1880
       * @param bigOverlap the overlapped group with regions more than maxMerge
1853
       * @throws IOException
1881
       * @throws IOException
1854
       */
1882
       */
1855
      void sidelineBigOverlaps(
1883
      void sidelineBigOverlaps(
1856
          Collection<HbckInfo> bigOverlap) throws IOException {
1884
          Collection<HbckInfo> bigOverlap) throws IOException {
1857
        int overlapsToSideline = bigOverlap.size() - maxMerge;
1885
        int overlapsToSideline = bigOverlap.size() - maxMerge;
1858
        if (overlapsToSideline > maxOverlapsToSideline) {
1886
        if (overlapsToSideline > maxOverlapsToSideline) {
1859
          overlapsToSideline = maxOverlapsToSideline;
1887
          overlapsToSideline = maxOverlapsToSideline;
1860
        }
1888
        }
1861
        List<HbckInfo> regionsToSideline =
1889
        List<HbckInfo> regionsToSideline =
1862
          RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
1890
          RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
1863
        FileSystem fs = FileSystem.get(conf);
1891
        FileSystem fs = FileSystem.get(conf);
1864
        for (HbckInfo regionToSideline: regionsToSideline) {
1892
        for (HbckInfo regionToSideline: regionsToSideline) {
1865
          try {
1893
          try {
1866
            LOG.info("Closing region: " + regionToSideline);
1894
            LOG.info("Closing region: " + regionToSideline);
1867
            closeRegion(regionToSideline);
1895
            closeRegion(regionToSideline);
1868
          } catch (InterruptedException ie) {
1896
          } catch (InterruptedException ie) {
1869
            LOG.warn("Was unable to close region " + regionToSideline.getRegionNameAsString()
1897
            LOG.warn("Was unable to close region " + regionToSideline.getRegionNameAsString()
1870
              + ".  Interrupted.");
1898
              + ".  Interrupted.");
1871
            throw new IOException(ie);
1899
            throw new IOException(ie);
1872
          }
1900
          }
1873

    
   
1901

   
1874
          LOG.info("Offlining region: " + regionToSideline);
1902
          LOG.info("Offlining region: " + regionToSideline);
1875
          offline(regionToSideline.getRegionName());
1903
          offline(regionToSideline.getRegionName());
1876

    
   
1904

   
1877
          LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
1905
          LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
1878
          Path sidelineRegionDir = sidelineRegionDir(fs, regionToSideline);
1906
          Path sidelineRegionDir = sidelineRegionDir(fs, regionToSideline);
1879
          if (sidelineRegionDir != null) {
1907
          if (sidelineRegionDir != null) {
1880
            sidelinedRegions.put(sidelineRegionDir, regionToSideline);
1908
            sidelinedRegions.put(sidelineRegionDir, regionToSideline);
1881
            LOG.info("After sidelined big overlapped region: "
1909
            LOG.info("After sidelined big overlapped region: "
1882
              + regionToSideline.getRegionNameAsString()
1910
              + regionToSideline.getRegionNameAsString()
1883
              + " to " + sidelineRegionDir.toString());
1911
              + " to " + sidelineRegionDir.toString());
1884
            fixes++;
1912
            fixes++;
1885
          }
1913
          }
1886
        }
1914
        }
1887
      }
1915
      }
1888
    }
1916
    }
1889

    
   
1917

   
1890
    /**
1918
    /**
1891
     * Check the region chain (from META) of this table.  We are looking for
1919
     * Check the region chain (from META) of this table.  We are looking for
1892
     * holes, overlaps, and cycles.
1920
     * holes, overlaps, and cycles.
1893
     * @return false if there are errors
1921
     * @return false if there are errors
1894
     * @throws IOException
1922
     * @throws IOException
1895
     */
1923
     */
1896
    public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
1924
    public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
1897
      int originalErrorsCount = errors.getErrorList().size();
1925
      int originalErrorsCount = errors.getErrorList().size();
1898
      Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
1926
      Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
1899
      SortedSet<byte[]> splits = sc.getSplits();
1927
      SortedSet<byte[]> splits = sc.getSplits();
1900

    
   
1928

   
1901
      byte[] prevKey = null;
1929
      byte[] prevKey = null;
1902
      byte[] problemKey = null;
1930
      byte[] problemKey = null;
1903
      for (byte[] key : splits) {
1931
      for (byte[] key : splits) {
1904
        Collection<HbckInfo> ranges = regions.get(key);
1932
        Collection<HbckInfo> ranges = regions.get(key);
1905
        if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
1933
        if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
1906
          for (HbckInfo rng : ranges) {
1934
          for (HbckInfo rng : ranges) {
1907
            handler.handleRegionStartKeyNotEmpty(rng);
1935
            handler.handleRegionStartKeyNotEmpty(rng);
1908
          }
1936
          }
1909
        }
1937
        }
1910

    
   
1938

   
1911
        // check for degenerate ranges
1939
        // check for degenerate ranges
1912
        for (HbckInfo rng : ranges) {
1940
        for (HbckInfo rng : ranges) {
1913
          // special endkey case converts '' to null
1941
          // special endkey case converts '' to null
1914
          byte[] endKey = rng.getEndKey();
1942
          byte[] endKey = rng.getEndKey();
1915
          endKey = (endKey.length == 0) ? null : endKey;
1943
          endKey = (endKey.length == 0) ? null : endKey;
1916
          if (Bytes.equals(rng.getStartKey(),endKey)) {
1944
          if (Bytes.equals(rng.getStartKey(),endKey)) {
1917
            handler.handleDegenerateRegion(rng);
1945
            handler.handleDegenerateRegion(rng);
1918
          }
1946
          }
1919
        }
1947
        }
1920

    
   
1948

   
1921
        if (ranges.size() == 1) {
1949
        if (ranges.size() == 1) {
1922
          // this split key is ok -- no overlap, not a hole.
1950
          // this split key is ok -- no overlap, not a hole.
1923
          if (problemKey != null) {
1951
          if (problemKey != null) {
1924
            LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
1952
            LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
1925
          }
1953
          }
1926
          problemKey = null; // fell through, no more problem.
1954
          problemKey = null; // fell through, no more problem.
1927
        } else if (ranges.size() > 1) {
1955
        } else if (ranges.size() > 1) {
1928
          // set the new problem key group name, if already have problem key, just
1956
          // set the new problem key group name, if already have problem key, just
1929
          // keep using it.
1957
          // keep using it.
1930
          if (problemKey == null) {
1958
          if (problemKey == null) {
1931
            // only for overlap regions.
1959
            // only for overlap regions.
1932
            LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
1960
            LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
1933
            problemKey = key;
1961
            problemKey = key;
1934
          }
1962
          }
1935
          overlapGroups.putAll(problemKey, ranges);
1963
          overlapGroups.putAll(problemKey, ranges);
1936

    
   
1964

   
1937
          // record errors
1965
          // record errors
1938
          ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
1966
          ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
1939
          //  this dumb and n^2 but this shouldn't happen often
1967
          //  this dumb and n^2 but this shouldn't happen often
1940
          for (HbckInfo r1 : ranges) {
1968
          for (HbckInfo r1 : ranges) {
1941
            subRange.remove(r1);
1969
            subRange.remove(r1);
1942
            for (HbckInfo r2 : subRange) {
1970
            for (HbckInfo r2 : subRange) {
1943
              if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
1971
              if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
1944
                handler.handleDuplicateStartKeys(r1,r2);
1972
                handler.handleDuplicateStartKeys(r1,r2);
1945
              } else {
1973
              } else {
1946
                // overlap
1974
                // overlap
1947
                handler.handleOverlapInRegionChain(r1, r2);
1975
                handler.handleOverlapInRegionChain(r1, r2);
1948
              }
1976
              }
1949
            }
1977
            }
1950
          }
1978
          }
1951

    
   
1979

   
1952
        } else if (ranges.size() == 0) {
1980
        } else if (ranges.size() == 0) {
1953
          if (problemKey != null) {
1981
          if (problemKey != null) {
1954
            LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
1982
            LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
1955
          }
1983
          }
1956
          problemKey = null;
1984
          problemKey = null;
1957

    
   
1985

   
1958
          byte[] holeStopKey = sc.getSplits().higher(key);
1986
          byte[] holeStopKey = sc.getSplits().higher(key);
1959
          // if higher key is null we reached the top.
1987
          // if higher key is null we reached the top.
1960
          if (holeStopKey != null) {
1988
          if (holeStopKey != null) {
1961
            // hole
1989
            // hole
1962
            handler.handleHoleInRegionChain(key, holeStopKey);
1990
            handler.handleHoleInRegionChain(key, holeStopKey);
1963
          }
1991
          }
1964
        }
1992
        }
1965
        prevKey = key;
1993
        prevKey = key;
1966
      }
1994
      }
1967

    
   
1995

   
1968
      for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
1996
      for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
1969
        handler.handleOverlapGroup(overlap);
1997
        handler.handleOverlapGroup(overlap);
1970
      }
1998
      }
1971

    
   
1999

   
1972
      if (details) {
2000
      if (details) {
1973
        // do full region split map dump
2001
        // do full region split map dump
1974
        System.out.println("---- Table '"  +  this.tableName
2002
        System.out.println("---- Table '"  +  this.tableName
1975
            + "': region split map");
2003
            + "': region split map");
1976
        dump(splits, regions);
2004
        dump(splits, regions);
1977
        System.out.println("---- Table '"  +  this.tableName
2005
        System.out.println("---- Table '"  +  this.tableName
1978
            + "': overlap groups");
2006
            + "': overlap groups");
1979
        dumpOverlapProblems(overlapGroups);
2007
        dumpOverlapProblems(overlapGroups);
1980
        System.out.println("There are " + overlapGroups.keySet().size()
2008
        System.out.println("There are " + overlapGroups.keySet().size()
1981
            + " overlap groups with " + overlapGroups.size()
2009
            + " overlap groups with " + overlapGroups.size()
1982
            + " overlapping regions");
2010
            + " overlapping regions");
1983
      }
2011
      }
1984
      if (!sidelinedRegions.isEmpty()) {
2012
      if (!sidelinedRegions.isEmpty()) {
1985
        LOG.warn("Sidelined big overlapped regions, please bulk load them!");
2013
        LOG.warn("Sidelined big overlapped regions, please bulk load them!");
1986
        System.out.println("---- Table '"  +  this.tableName
2014
        System.out.println("---- Table '"  +  this.tableName
1987
            + "': sidelined big overlapped regions");
2015
            + "': sidelined big overlapped regions");
1988
        dumpSidelinedRegions(sidelinedRegions);
2016
        dumpSidelinedRegions(sidelinedRegions);
1989
      }
2017
      }
1990
      return errors.getErrorList().size() == originalErrorsCount;
2018
      return errors.getErrorList().size() == originalErrorsCount;
1991
    }
2019
    }
1992

    
   
2020

   
1993
    /**
2021
    /**
1994
     * This dumps data in a visually reasonable way for visual debugging
2022
     * This dumps data in a visually reasonable way for visual debugging
1995
     * 
2023
     * 
1996
     * @param splits
2024
     * @param splits
1997
     * @param regions
2025
     * @param regions
1998
     */
2026
     */
1999
    void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
2027
    void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
2000
      // we display this way because the last end key should be displayed as well.
2028
      // we display this way because the last end key should be displayed as well.
2001
      for (byte[] k : splits) {
2029
      for (byte[] k : splits) {
2002
        System.out.print(Bytes.toStringBinary(k) + ":\t");
2030
        System.out.print(Bytes.toStringBinary(k) + ":\t");
2003
        for (HbckInfo r : regions.get(k)) {
2031
        for (HbckInfo r : regions.get(k)) {
2004
          System.out.print("[ "+ r.toString() + ", "
2032
          System.out.print("[ "+ r.toString() + ", "
2005
              + Bytes.toStringBinary(r.getEndKey())+ "]\t");
2033
              + Bytes.toStringBinary(r.getEndKey())+ "]\t");
2006
        }
2034
        }
2007
        System.out.println();
2035
        System.out.println();
2008
      }
2036
      }
2009
    }
2037
    }
2010
  }
2038
  }
2011

    
   
2039

   
2012
  public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
2040
  public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
2013
    // we display this way because the last end key should be displayed as
2041
    // we display this way because the last end key should be displayed as
2014
    // well.
2042
    // well.
2015
    for (byte[] k : regions.keySet()) {
2043
    for (byte[] k : regions.keySet()) {
2016
      System.out.print(Bytes.toStringBinary(k) + ":\n");
2044
      System.out.print(Bytes.toStringBinary(k) + ":\n");
2017
      for (HbckInfo r : regions.get(k)) {
2045
      for (HbckInfo r : regions.get(k)) {
2018
        System.out.print("[ " + r.toString() + ", "
2046
        System.out.print("[ " + r.toString() + ", "
2019
            + Bytes.toStringBinary(r.getEndKey()) + "]\n");
2047
            + Bytes.toStringBinary(r.getEndKey()) + "]\n");
2020
      }
2048
      }
2021
      System.out.println("----");
2049
      System.out.println("----");
2022
    }
2050
    }
2023
  }
2051
  }
2024

    
   
2052

   
2025
  public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
2053
  public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
2026
    for (Path k : regions.keySet()) {
2054
    for (Path k : regions.keySet()) {
2027
      System.out.println("To be bulk loaded sidelined region dir: "
2055
      System.out.println("To be bulk loaded sidelined region dir: "
2028
        + k.toString());
2056
        + k.toString());
2029
    }
2057
    }
2030
  }
2058
  }
2031

    
   
2059

   
2032
  public Multimap<byte[], HbckInfo> getOverlapGroups(
2060
  public Multimap<byte[], HbckInfo> getOverlapGroups(
2033
      String table) {
2061
      String table) {
2034
    TableInfo ti = tablesInfo.get(table);
2062
    TableInfo ti = tablesInfo.get(table);
2035
    return ti.overlapGroups;
2063
    return ti.overlapGroups;
2036
  }
2064
  }
2037

    
   
2065

   
2038
  /**
2066
  /**
2039
   * Return a list of user-space table names whose metadata have not been
2067
   * Return a list of user-space table names whose metadata have not been
2040
   * modified in the last few milliseconds specified by timelag
2068
   * modified in the last few milliseconds specified by timelag
2041
   * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
2069
   * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
2042
   * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
2070
   * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
2043
   * milliseconds specified by timelag, then the table is a candidate to be returned.
2071
   * milliseconds specified by timelag, then the table is a candidate to be returned.
2044
   * @return tables that have not been modified recently
2072
   * @return tables that have not been modified recently
2045
   * @throws IOException if an error is encountered
2073
   * @throws IOException if an error is encountered
2046
   */
2074
   */
2047
   HTableDescriptor[] getTables(AtomicInteger numSkipped) {
2075
   HTableDescriptor[] getTables(AtomicInteger numSkipped) {
2048
    List<String> tableNames = new ArrayList<String>();
2076
    List<String> tableNames = new ArrayList<String>();
2049
    long now = System.currentTimeMillis();
2077
    long now = System.currentTimeMillis();
2050

    
   
2078

   
2051
    for (HbckInfo hbi : regionInfoMap.values()) {
2079
    for (HbckInfo hbi : regionInfoMap.values()) {
2052
      MetaEntry info = hbi.metaEntry;
2080
      MetaEntry info = hbi.metaEntry;
2053

    
   
2081

   
2054
      // if the start key is zero, then we have found the first region of a table.
2082
      // if the start key is zero, then we have found the first region of a table.
2055
      // pick only those tables that were not modified in the last few milliseconds.
2083
      // pick only those tables that were not modified in the last few milliseconds.
2056
      if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2084
      if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2057
        if (info.modTime + timelag < now) {
2085
        if (info.modTime + timelag < now) {
2058
          tableNames.add(info.getTableNameAsString());
2086
          tableNames.add(info.getTableNameAsString());
2059
        } else {
2087
        } else {
2060
          numSkipped.incrementAndGet(); // one more in-flux table
2088
          numSkipped.incrementAndGet(); // one more in-flux table
2061
        }
2089
        }
2062
      }
2090
      }
2063
    }
2091
    }
2064
    return getHTableDescriptors(tableNames);
2092
    return getHTableDescriptors(tableNames);
2065
  }
2093
  }
2066

    
   
2094

   
2067
   HTableDescriptor[] getHTableDescriptors(List<String> tableNames) {
2095
   HTableDescriptor[] getHTableDescriptors(List<String> tableNames) {
2068
    HTableDescriptor[] htd = null;
2096
    HTableDescriptor[] htd = null;
2069
     try {
2097
     try {
2070
       LOG.info("getHTableDescriptors == tableNames => " + tableNames);
2098
       LOG.info("getHTableDescriptors == tableNames => " + tableNames);
2071
       htd = new HBaseAdmin(conf).getTableDescriptors(tableNames);
2099
       htd = new HBaseAdmin(conf).getTableDescriptors(tableNames);
2072
     } catch (IOException e) {
2100
     } catch (IOException e) {
2073
       LOG.debug("Exception getting table descriptors", e);
2101
       LOG.debug("Exception getting table descriptors", e);
2074
     }
2102
     }
2075
     return htd;
2103
     return htd;
2076
  }
2104
  }
2077

    
   
2105

   
2078

    
   
2106

   
2079
  /**
2107
  /**
2080
   * Gets the entry in regionInfo corresponding to the the given encoded
2108
   * Gets the entry in regionInfo corresponding to the the given encoded
2081
   * region name. If the region has not been seen yet, a new entry is added
2109
   * region name. If the region has not been seen yet, a new entry is added
2082
   * and returned.
2110
   * and returned.
2083
   */
2111
   */
2084
  private synchronized HbckInfo getOrCreateInfo(String name) {
2112
  private synchronized HbckInfo getOrCreateInfo(String name) {
2085
    HbckInfo hbi = regionInfoMap.get(name);
2113
    HbckInfo hbi = regionInfoMap.get(name);
2086
    if (hbi == null) {
2114
    if (hbi == null) {
2087
      hbi = new HbckInfo(null);
2115
      hbi = new HbckInfo(null);
2088
      regionInfoMap.put(name, hbi);
2116
      regionInfoMap.put(name, hbi);
2089
    }
2117
    }
2090
    return hbi;
2118
    return hbi;
2091
  }
2119
  }
2092

    
   
2120

   
2093
  /**
2121
  /**
2094
    * Check values in regionInfo for .META.
2122
    * Check values in regionInfo for .META.
2095
    * Check if zero or more than one regions with META are found.
2123
    * Check if zero or more than one regions with META are found.
2096
    * If there are inconsistencies (i.e. zero or more than one regions
2124
    * If there are inconsistencies (i.e. zero or more than one regions
2097
    * pretend to be holding the .META.) try to fix that and report an error.
2125
    * pretend to be holding the .META.) try to fix that and report an error.
2098
    * @throws IOException from HBaseFsckRepair functions
2126
    * @throws IOException from HBaseFsckRepair functions
2099
   * @throws KeeperException
2127
   * @throws KeeperException
2100
   * @throws InterruptedException
2128
   * @throws InterruptedException
2101
    */
2129
    */
2102
  boolean checkMetaRegion()
2130
  boolean checkMetaRegion()
2103
    throws IOException, KeeperException, InterruptedException {
2131
    throws IOException, KeeperException, InterruptedException {
2104
    List <HbckInfo> metaRegions = Lists.newArrayList();
2132
    List <HbckInfo> metaRegions = Lists.newArrayList();
2105
    for (HbckInfo value : regionInfoMap.values()) {
2133
    for (HbckInfo value : regionInfoMap.values()) {
2106
      if (value.metaEntry.isMetaRegion()) {
2134
      if (value.metaEntry.isMetaRegion()) {
2107
        metaRegions.add(value);
2135
        metaRegions.add(value);
2108
      }
2136
      }
2109
    }
2137
    }
2110

    
   
2138

   
2111
    // If something is wrong
2139
    // If something is wrong
2112
    if (metaRegions.size() != 1) {
2140
    if (metaRegions.size() != 1) {
2113
      HRegionLocation rootLocation = connection.locateRegion(
2141
      HRegionLocation rootLocation = connection.locateRegion(
2114
        HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
2142
        HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
2115
      HbckInfo root =
2143
      HbckInfo root =
2116
          regionInfoMap.get(rootLocation.getRegionInfo().getEncodedName());
2144
          regionInfoMap.get(rootLocation.getRegionInfo().getEncodedName());
2117

    
   
2145

   
2118
      // If there is no region holding .META.
2146
      // If there is no region holding .META.
2119
      if (metaRegions.size() == 0) {
2147
      if (metaRegions.size() == 0) {
2120
        errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
2148
        errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
2121
        if (shouldFixAssignments()) {
2149
        if (shouldFixAssignments()) {
2122
          errors.print("Trying to fix a problem with .META...");
2150
          errors.print("Trying to fix a problem with .META...");
2123
          setShouldRerun();
2151
          setShouldRerun();
2124
          // try to fix it (treat it as unassigned region)
2152
          // try to fix it (treat it as unassigned region)
2125
          HBaseFsckRepair.fixUnassigned(admin, root.metaEntry);
2153
          HBaseFsckRepair.fixUnassigned(admin, root.metaEntry);
2126
          HBaseFsckRepair.waitUntilAssigned(admin, root.getHdfsHRI());
2154
          HBaseFsckRepair.waitUntilAssigned(admin, root.getHdfsHRI());
2127
        }
2155
        }
2128
      }
2156
      }
2129
      // If there are more than one regions pretending to hold the .META.
2157
      // If there are more than one regions pretending to hold the .META.
2130
      else if (metaRegions.size() > 1) {
2158
      else if (metaRegions.size() > 1) {
2131
        errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
2159
        errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
2132
        if (shouldFixAssignments()) {
2160
        if (shouldFixAssignments()) {
2133
          errors.print("Trying to fix a problem with .META...");
2161
          errors.print("Trying to fix a problem with .META...");
2134
          setShouldRerun();
2162
          setShouldRerun();
2135
          // try fix it (treat is a dupe assignment)
2163
          // try fix it (treat is a dupe assignment)
2136
          List <ServerName> deployedOn = Lists.newArrayList();
2164
          List <ServerName> deployedOn = Lists.newArrayList();
2137
          for (HbckInfo mRegion : metaRegions) {
2165
          for (HbckInfo mRegion : metaRegions) {
2138
            deployedOn.add(mRegion.metaEntry.regionServer);
2166
            deployedOn.add(mRegion.metaEntry.regionServer);
2139
          }
2167
          }
2140
          HBaseFsckRepair.fixMultiAssignment(admin, root.metaEntry, deployedOn);
2168
          HBaseFsckRepair.fixMultiAssignment(admin, root.metaEntry, deployedOn);
2141
        }
2169
        }
2142
      }
2170
      }
2143
      // rerun hbck with hopefully fixed META
2171
      // rerun hbck with hopefully fixed META
2144
      return false;
2172
      return false;
2145
    }
2173
    }
2146
    // no errors, so continue normally
2174
    // no errors, so continue normally
2147
    return true;
2175
    return true;
2148
  }
2176
  }
2149

    
   
2177

   
2150
  /**
2178
  /**
2151
   * Scan .META. and -ROOT-, adding all regions found to the regionInfo map.
2179
   * Scan .META. and -ROOT-, adding all regions found to the regionInfo map.
2152
   * @throws IOException if an error is encountered
2180
   * @throws IOException if an error is encountered
2153
   */
2181
   */
2154
  boolean loadMetaEntries() throws IOException {
2182
  boolean loadMetaEntries() throws IOException {
2155

    
   
2183

   
2156
    // get a list of all regions from the master. This involves
2184
    // get a list of all regions from the master. This involves
2157
    // scanning the META table
2185
    // scanning the META table
2158
    if (!recordRootRegion()) {
2186
    if (!recordRootRegion()) {
2159
      // Will remove later if we can fix it
2187
      // Will remove later if we can fix it
2160
      errors.reportError("Fatal error: unable to get root region location. Exiting...");
2188
      errors.reportError("Fatal error: unable to get root region location. Exiting...");
2161
      return false;
2189
      return false;
2162
    }
2190
    }
2163

    
   
2191

   
2164
    MetaScannerVisitor visitor = new MetaScannerVisitor() {
2192
    MetaScannerVisitor visitor = new MetaScannerVisitor() {
2165
      int countRecord = 1;
2193
      int countRecord = 1;
2166

    
   
2194

   
2167
      // comparator to sort KeyValues with latest modtime
2195
      // comparator to sort KeyValues with latest modtime
2168
      final Comparator<KeyValue> comp = new Comparator<KeyValue>() {
2196
      final Comparator<KeyValue> comp = new Comparator<KeyValue>() {
2169
        public int compare(KeyValue k1, KeyValue k2) {
2197
        public int compare(KeyValue k1, KeyValue k2) {
2170
          return (int)(k1.getTimestamp() - k2.getTimestamp());
2198
          return (int)(k1.getTimestamp() - k2.getTimestamp());
2171
        }
2199
        }
2172
      };
2200
      };
2173

    
   
2201

   
2174
      public boolean processRow(Result result) throws IOException {
2202
      public boolean processRow(Result result) throws IOException {
2175
        try {
2203
        try {
2176

    
   
2204

   
2177
          // record the latest modification of this META record
2205
          // record the latest modification of this META record
2178
          long ts =  Collections.max(result.list(), comp).getTimestamp();
2206
          long ts =  Collections.max(result.list(), comp).getTimestamp();
2179
          Pair<HRegionInfo, ServerName> pair = MetaReader.parseCatalogResult(result);
2207
          Pair<HRegionInfo, ServerName> pair = MetaReader.parseCatalogResult(result);
2180
          if (pair == null || pair.getFirst() == null) {
2208
          if (pair == null || pair.getFirst() == null) {
2181
            emptyRegionInfoQualifiers.add(result);
2209
            emptyRegionInfoQualifiers.add(result);
2182
            return true;
2210
            return true;
2183
          }
2211
          }
2184
          ServerName sn = null;
2212
          ServerName sn = null;
2185
          if (pair.getSecond() != null) {
2213
          if (pair.getSecond() != null) {
2186
            sn = pair.getSecond();
2214
            sn = pair.getSecond();
2187
          }
2215
          }
2188
          MetaEntry m = new MetaEntry(pair.getFirst(), sn, ts);
2216
          MetaEntry m = new MetaEntry(pair.getFirst(), sn, ts);
2189
          HbckInfo hbInfo = new HbckInfo(m);
2217
          HbckInfo hbInfo = new HbckInfo(m);
2190
          HbckInfo previous = regionInfoMap.put(pair.getFirst().getEncodedName(), hbInfo);
2218
          HbckInfo previous = regionInfoMap.put(pair.getFirst().getEncodedName(), hbInfo);
2191
          if (previous != null) {
2219
          if (previous != null) {
2192
            throw new IOException("Two entries in META are same " + previous);
2220
            throw new IOException("Two entries in META are same " + previous);
2193
          }
2221
          }
2194

    
   
2222

   
2195
          // show proof of progress to the user, once for every 100 records.
2223
          // show proof of progress to the user, once for every 100 records.
2196
          if (countRecord % 100 == 0) {
2224
          if (countRecord % 100 == 0) {
2197
            errors.progress();
2225
            errors.progress();
2198
          }
2226
          }
2199
          countRecord++;
2227
          countRecord++;
2200
          return true;
2228
          return true;
2201
        } catch (RuntimeException e) {
2229
        } catch (RuntimeException e) {
2202
          LOG.error("Result=" + result);
2230
          LOG.error("Result=" + result);
2203
          throw e;
2231
          throw e;
2204
        }
2232
        }
2205
      }
2233
      }
2206
    };
2234
    };
2207

    
   
2235

   
2208
    // Scan -ROOT- to pick up META regions
2236
    // Scan -ROOT- to pick up META regions
2209
    MetaScanner.metaScan(conf, visitor, null, null,
2237
    MetaScanner.metaScan(conf, visitor, null, null,
2210
      Integer.MAX_VALUE, HConstants.ROOT_TABLE_NAME);
2238
      Integer.MAX_VALUE, HConstants.ROOT_TABLE_NAME);
2211

    
   
2239

   
2212
    if (!checkMetaOnly) {
2240
    if (!checkMetaOnly) {
2213
      // Scan .META. to pick up user regions
2241
      // Scan .META. to pick up user regions
2214
      MetaScanner.metaScan(conf, visitor);
2242
      MetaScanner.metaScan(conf, visitor);
2215
    }
2243
    }
2216
    
2244
    
2217
    errors.print("");
2245
    errors.print("");
2218
    return true;
2246
    return true;
2219
  }
2247
  }
2220

    
   
2248

   
2221
  /**
2249
  /**
2222
   * Stores the regioninfo entries scanned from META
2250
   * Stores the regioninfo entries scanned from META
2223
   */
2251
   */
2224
  static class MetaEntry extends HRegionInfo {
2252
  static class MetaEntry extends HRegionInfo {
2225
    ServerName regionServer;   // server hosting this region
2253
    ServerName regionServer;   // server hosting this region
2226
    long modTime;          // timestamp of most recent modification metadata
2254
    long modTime;          // timestamp of most recent modification metadata
2227

    
   
2255

   
2228
    public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
2256
    public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
2229
      super(rinfo);
2257
      super(rinfo);
2230
      this.regionServer = regionServer;
2258
      this.regionServer = regionServer;
2231
      this.modTime = modTime;
2259
      this.modTime = modTime;
2232
    }
2260
    }
2233

    
   
2261

   
2234
    public boolean equals(Object o) {
2262
    public boolean equals(Object o) {
2235
      boolean superEq = super.equals(o);
2263
      boolean superEq = super.equals(o);
2236
      if (!superEq) {
2264
      if (!superEq) {
2237
        return superEq;
2265
        return superEq;
2238
      }
2266
      }
2239

    
   
2267

   
2240
      MetaEntry me = (MetaEntry) o;
2268
      MetaEntry me = (MetaEntry) o;
2241
      if (!regionServer.equals(me.regionServer)) {
2269
      if (!regionServer.equals(me.regionServer)) {
2242
        return false;
2270
        return false;
2243
      }
2271
      }
2244
      return (modTime == me.modTime);
2272
      return (modTime == me.modTime);
2245
    }
2273
    }
2246
  }
2274
  }
2247

    
   
2275

   
2248
  /**
2276
  /**
2249
   * Stores the regioninfo entries from HDFS
2277
   * Stores the regioninfo entries from HDFS
2250
   */
2278
   */
2251
  static class HdfsEntry {
2279
  static class HdfsEntry {
2252
    HRegionInfo hri;
2280
    HRegionInfo hri;
2253
    Path hdfsRegionDir = null;
2281
    Path hdfsRegionDir = null;
2254
    long hdfsRegionDirModTime  = 0;
2282
    long hdfsRegionDirModTime  = 0;
2255
    boolean hdfsRegioninfoFilePresent = false;
2283
    boolean hdfsRegioninfoFilePresent = false;
2256
    boolean hdfsOnlyEdits = false;
2284
    boolean hdfsOnlyEdits = false;
2257
  }
2285
  }
2258

    
   
2286

   
2259
  /**
2287
  /**
2260
   * Stores the regioninfo retrieved from Online region servers.
2288
   * Stores the regioninfo retrieved from Online region servers.
2261
   */
2289
   */
2262
  static class OnlineEntry {
2290
  static class OnlineEntry {
2263
    HRegionInfo hri;
2291
    HRegionInfo hri;
2264
    ServerName hsa;
2292
    ServerName hsa;
2265

    
   
2293

   
2266
    public String toString() {
2294
    public String toString() {
2267
      return hsa.toString() + ";" + hri.getRegionNameAsString();
2295
      return hsa.toString() + ";" + hri.getRegionNameAsString();
2268
    }
2296
    }
2269
  }
2297
  }
2270

    
   
2298

   
2271
  /**
2299
  /**
2272
   * Maintain information about a particular region.  It gathers information
2300
   * Maintain information about a particular region.  It gathers information
2273
   * from three places -- HDFS, META, and region servers.
2301
   * from three places -- HDFS, META, and region servers.
2274
   */
2302
   */
2275
  public static class HbckInfo implements KeyRange {
2303
  public static class HbckInfo implements KeyRange {
2276
    private MetaEntry metaEntry = null; // info in META
2304
    private MetaEntry metaEntry = null; // info in META
2277
    private HdfsEntry hdfsEntry = null; // info in HDFS
2305
    private HdfsEntry hdfsEntry = null; // info in HDFS
2278
    private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
2306
    private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
2279
    private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
2307
    private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
2280

    
   
2308

   
2281
    HbckInfo(MetaEntry metaEntry) {
2309
    HbckInfo(MetaEntry metaEntry) {
2282
      this.metaEntry = metaEntry;
2310
      this.metaEntry = metaEntry;
2283
    }
2311
    }
2284

    
   
2312

   
2285
    public synchronized void addServer(HRegionInfo hri, ServerName server) {
2313
    public synchronized void addServer(HRegionInfo hri, ServerName server) {
2286
      OnlineEntry rse = new OnlineEntry() ;
2314
      OnlineEntry rse = new OnlineEntry() ;
2287
      rse.hri = hri;
2315
      rse.hri = hri;
2288
      rse.hsa = server;
2316
      rse.hsa = server;
2289
      this.deployedEntries.add(rse);
2317
      this.deployedEntries.add(rse);
2290
      this.deployedOn.add(server);
2318
      this.deployedOn.add(server);
2291
    }
2319
    }
2292

    
   
2320

   
2293
    public synchronized String toString() {
2321
    public synchronized String toString() {
2294
      StringBuilder sb = new StringBuilder();
2322
      StringBuilder sb = new StringBuilder();
2295
      sb.append("{ meta => ");
2323
      sb.append("{ meta => ");
2296
      sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
2324
      sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
2297
      sb.append( ", hdfs => " + getHdfsRegionDir());
2325
      sb.append( ", hdfs => " + getHdfsRegionDir());
2298
      sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
2326
      sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
2299
      sb.append(" }");
2327
      sb.append(" }");
2300
      return sb.toString();
2328
      return sb.toString();
2301
    }
2329
    }
2302

    
   
2330

   
2303
    @Override
2331
    @Override
2304
    public byte[] getStartKey() {
2332
    public byte[] getStartKey() {
2305
      if (this.metaEntry != null) {
2333
      if (this.metaEntry != null) {
2306
        return this.metaEntry.getStartKey();
2334
        return this.metaEntry.getStartKey();
2307
      } else if (this.hdfsEntry != null) {
2335
      } else if (this.hdfsEntry != null) {
2308
        return this.hdfsEntry.hri.getStartKey();
2336
        return this.hdfsEntry.hri.getStartKey();
2309
      } else {
2337
      } else {
2310
        LOG.error("Entry " + this + " has no meta or hdfs region start key.");
2338
        LOG.error("Entry " + this + " has no meta or hdfs region start key.");
2311
        return null;
2339
        return null;
2312
      }
2340
      }
2313
    }
2341
    }
2314

    
   
2342

   
2315
    @Override
2343
    @Override
2316
    public byte[] getEndKey() {
2344
    public byte[] getEndKey() {
2317
      if (this.metaEntry != null) {
2345
      if (this.metaEntry != null) {
2318
        return this.metaEntry.getEndKey();
2346
        return this.metaEntry.getEndKey();
2319
      } else if (this.hdfsEntry != null) {
2347
      } else if (this.hdfsEntry != null) {
2320
        return this.hdfsEntry.hri.getEndKey();
2348
        return this.hdfsEntry.hri.getEndKey();
2321
      } else {
2349
      } else {
2322
        LOG.error("Entry " + this + " has no meta or hdfs region start key.");
2350
        LOG.error("Entry " + this + " has no meta or hdfs region start key.");
2323
        return null;
2351
        return null;
2324
      }
2352
      }
2325
    }
2353
    }
2326

    
   
2354

   
2327
    public byte[] getTableName() {
2355
    public byte[] getTableName() {
2328
      if (this.metaEntry != null) {
2356
      if (this.metaEntry != null) {
2329
        return this.metaEntry.getTableName();
2357
        return this.metaEntry.getTableName();
2330
      } else if (this.hdfsEntry != null) {
2358
      } else if (this.hdfsEntry != null) {
2331
        // we are only guaranteed to have a path and not an HRI for hdfsEntry,
2359
        // we are only guaranteed to have a path and not an HRI for hdfsEntry,
2332
        // so we get the name from the Path
2360
        // so we get the name from the Path
2333
        Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
2361
        Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
2334
        return Bytes.toBytes(tableDir.getName());
2362
        return Bytes.toBytes(tableDir.getName());
2335
      } else {
2363
      } else {
2336
        // Currently no code exercises this path, but we could add one for
2364
        // Currently no code exercises this path, but we could add one for
2337
        // getting table name from OnlineEntry
2365
        // getting table name from OnlineEntry
2338
        return null;
2366
        return null;
2339
      }
2367
      }
2340
    }
2368
    }
2341

    
   
2369

   
2342
    public String getRegionNameAsString() {
2370
    public String getRegionNameAsString() {
2343
      if (metaEntry != null) {
2371
      if (metaEntry != null) {
2344
        return metaEntry.getRegionNameAsString();
2372
        return metaEntry.getRegionNameAsString();
2345
      } else if (hdfsEntry != null) {
2373
      } else if (hdfsEntry != null) {
2346
        return hdfsEntry.hri.getRegionNameAsString();
2374
        return hdfsEntry.hri.getRegionNameAsString();
2347
      } else {
2375
      } else {
2348
        return null;
2376
        return null;
2349
      }
2377
      }
2350
    }
2378
    }
2351

    
   
2379

   
2352
    public byte[] getRegionName() {
2380
    public byte[] getRegionName() {
2353
      if (metaEntry != null) {
2381
      if (metaEntry != null) {
2354
        return metaEntry.getRegionName();
2382
        return metaEntry.getRegionName();
2355
      } else if (hdfsEntry != null) {
2383
      } else if (hdfsEntry != null) {
2356
        return hdfsEntry.hri.getRegionName();
2384
        return hdfsEntry.hri.getRegionName();
2357
      } else {
2385
      } else {
2358
        return null;
2386
        return null;
2359
      }
2387
      }
2360
    }
2388
    }
2361

    
   
2389

   
2362
    Path getHdfsRegionDir() {
2390
    Path getHdfsRegionDir() {
2363
      if (hdfsEntry == null) {
2391
      if (hdfsEntry == null) {
2364
        return null;
2392
        return null;
2365
      }
2393
      }
2366
      return hdfsEntry.hdfsRegionDir;
2394
      return hdfsEntry.hdfsRegionDir;
2367
    }
2395
    }
2368

    
   
2396

   
2369
    boolean containsOnlyHdfsEdits() {
2397
    boolean containsOnlyHdfsEdits() {
2370
      if (hdfsEntry == null) {
2398
      if (hdfsEntry == null) {
2371
        return false;
2399
        return false;
2372
      }
2400
      }
2373
      return hdfsEntry.hdfsOnlyEdits;
2401
      return hdfsEntry.hdfsOnlyEdits;
2374
    }
2402
    }
2375

    
   
2403

   
2376
    boolean isHdfsRegioninfoPresent() {
2404
    boolean isHdfsRegioninfoPresent() {
2377
      if (hdfsEntry == null) {
2405
      if (hdfsEntry == null) {
2378
        return false;
2406
        return false;
2379
      }
2407
      }
2380
      return hdfsEntry.hdfsRegioninfoFilePresent;
2408
      return hdfsEntry.hdfsRegioninfoFilePresent;
2381
    }
2409
    }
2382

    
   
2410

   
2383
    long getModTime() {
2411
    long getModTime() {
2384
      if (hdfsEntry == null) {
2412
      if (hdfsEntry == null) {
2385
        return 0;
2413
        return 0;
2386
      }
2414
      }
2387
      return hdfsEntry.hdfsRegionDirModTime;
2415
      return hdfsEntry.hdfsRegionDirModTime;
2388
    }
2416
    }
2389

    
   
2417

   
2390
    HRegionInfo getHdfsHRI() {
2418
    HRegionInfo getHdfsHRI() {
2391
      if (hdfsEntry == null) {
2419
      if (hdfsEntry == null) {
2392
        return null;
2420
        return null;
2393
      }
2421
      }
2394
      return hdfsEntry.hri;
2422
      return hdfsEntry.hri;
2395
    }
2423
    }
2396
  }
2424
  }
2397

    
   
2425

   
2398
  final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
2426
  final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
2399
    @Override
2427
    @Override
2400
    public int compare(HbckInfo l, HbckInfo r) {
2428
    public int compare(HbckInfo l, HbckInfo r) {
2401
      if (l == r) {
2429
      if (l == r) {
2402
        // same instance
2430
        // same instance
2403
        return 0;
2431
        return 0;
2404
      }
2432
      }
2405

    
   
2433

   
2406
      int tableCompare = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2434
      int tableCompare = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2407
          l.getTableName(), r.getTableName());
2435
          l.getTableName(), r.getTableName());
2408
      if (tableCompare != 0) {
2436
      if (tableCompare != 0) {
2409
        return tableCompare;
2437
        return tableCompare;
2410
      }
2438
      }
2411

    
   
2439

   
2412
      int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2440
      int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2413
          l.getStartKey(), r.getStartKey());
2441
          l.getStartKey(), r.getStartKey());
2414
      if (startComparison != 0) {
2442
      if (startComparison != 0) {
2415
        return startComparison;
2443
        return startComparison;
2416
      }
2444
      }
2417

    
   
2445

   
2418
      // Special case for absolute endkey
2446
      // Special case for absolute endkey
2419
      byte[] endKey = r.getEndKey();
2447
      byte[] endKey = r.getEndKey();
2420
      endKey = (endKey.length == 0) ? null : endKey;
2448
      endKey = (endKey.length == 0) ? null : endKey;
2421
      byte[] endKey2 = l.getEndKey();
2449
      byte[] endKey2 = l.getEndKey();
2422
      endKey2 = (endKey2.length == 0) ? null : endKey2;
2450
      endKey2 = (endKey2.length == 0) ? null : endKey2;
2423
      int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2451
      int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2424
          endKey2,  endKey);
2452
          endKey2,  endKey);
2425

    
   
2453

   
2426
      if (endComparison != 0) {
2454
      if (endComparison != 0) {
2427
        return endComparison;
2455
        return endComparison;
2428
      }
2456
      }
2429

    
   
2457

   
2430
      // use regionId as tiebreaker.
2458
      // use regionId as tiebreaker.
2431
      // Null is considered after all possible values so make it bigger.
2459
      // Null is considered after all possible values so make it bigger.
2432
      if (l.hdfsEntry == null && r.hdfsEntry == null) {
2460
      if (l.hdfsEntry == null && r.hdfsEntry == null) {
2433
        return 0;
2461
        return 0;
2434
      }
2462
      }
2435
      if (l.hdfsEntry == null && r.hdfsEntry != null) {
2463
      if (l.hdfsEntry == null && r.hdfsEntry != null) {
2436
        return 1;
2464
        return 1;
2437
      }
2465
      }
2438
      // l.hdfsEntry must not be null
2466
      // l.hdfsEntry must not be null
2439
      if (r.hdfsEntry == null) {
2467
      if (r.hdfsEntry == null) {
2440
        return -1;
2468
        return -1;
2441
      }
2469
      }
2442
      // both l.hdfsEntry and r.hdfsEntry must not be null.
2470
      // both l.hdfsEntry and r.hdfsEntry must not be null.
2443
      return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
2471
      return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
2444
    }
2472
    }
2445
  };
2473
  };
2446

    
   
2474

   
2447
  /**
2475
  /**
2448
   * Prints summary of all tables found on the system.
2476
   * Prints summary of all tables found on the system.
2449
   */
2477
   */
2450
  private void printTableSummary(TreeMap<String, TableInfo> tablesInfo) {
2478
  private void printTableSummary(SortedMap<String, TableInfo> tablesInfo) {
2451
    System.out.println("Summary:");
2479
    System.out.println("Summary:");
2452
    for (TableInfo tInfo : tablesInfo.values()) {
2480
    for (TableInfo tInfo : tablesInfo.values()) {
2453
      if (errors.tableHasErrors(tInfo)) {
2481
      if (errors.tableHasErrors(tInfo)) {
2454
        System.out.println("Table " + tInfo.getName() + " is inconsistent.");
2482
        System.out.println("Table " + tInfo.getName() + " is inconsistent.");
2455
      } else {
2483
      } else {
2456
        System.out.println("  " + tInfo.getName() + " is okay.");
2484
        System.out.println("  " +