Review Board 1.7.22


HBASE-6004 Adding more logging to help debugging MR job

Review Request #5131 - Created May 15, 2012 and submitted

Jimmy Xiang
HBASE-6004
Reviewers
hbase
hbase-git
Added some logging for MR debugging in case scanner times out.  The logging is disabled by default.
It will be helpful to know how much time spent in the scanner and, how much in the mapper task.
In case scanner issue, it is helpful to know the region server id, last successful rows and so on.

 
src/main/java/org/apache/hadoop/hbase/client/ScannerCallable.java
Revision 46b1c56 New Change
[20] 22 lines
[+20]
23

    
   
23

   
24
import org.apache.commons.logging.Log;
24
import org.apache.commons.logging.Log;
25
import org.apache.commons.logging.LogFactory;
25
import org.apache.commons.logging.LogFactory;
26
import org.apache.hadoop.classification.InterfaceAudience;
26
import org.apache.hadoop.classification.InterfaceAudience;
27
import org.apache.hadoop.classification.InterfaceStability;
27
import org.apache.hadoop.classification.InterfaceStability;

    
   
28
import org.apache.hadoop.conf.Configuration;
28
import org.apache.hadoop.hbase.DoNotRetryIOException;
29
import org.apache.hadoop.hbase.DoNotRetryIOException;
29
import org.apache.hadoop.hbase.HRegionInfo;
30
import org.apache.hadoop.hbase.HRegionInfo;

    
   
31
import org.apache.hadoop.hbase.HRegionLocation;
30
import org.apache.hadoop.hbase.NotServingRegionException;
32
import org.apache.hadoop.hbase.NotServingRegionException;
31
import org.apache.hadoop.hbase.RemoteExceptionHandler;
33
import org.apache.hadoop.hbase.RemoteExceptionHandler;

    
   
34
import org.apache.hadoop.hbase.UnknownScannerException;
32
import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
35
import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
33
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
36
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
34
import org.apache.hadoop.hbase.protobuf.RequestConverter;
37
import org.apache.hadoop.hbase.protobuf.RequestConverter;
35
import org.apache.hadoop.hbase.protobuf.ResponseConverter;
38
import org.apache.hadoop.hbase.protobuf.ResponseConverter;
36
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
39
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
[+20] [20] 9 lines
[+20]
46
 * Used by {@link ResultScanner}s made by {@link HTable}.
49
 * Used by {@link ResultScanner}s made by {@link HTable}.
47
 */
50
 */
48
@InterfaceAudience.Public
51
@InterfaceAudience.Public
49
@InterfaceStability.Stable
52
@InterfaceStability.Stable
50
public class ScannerCallable extends ServerCallable<Result[]> {
53
public class ScannerCallable extends ServerCallable<Result[]> {

    
   
54
  public static final String LOG_SCANNER_LATENCY_CUTOFF

    
   
55
    = "hbase.client.log.scanner.latency.cutoff";

    
   
56
  public static final String LOG_SCANNER_ACTIVITY = "hbase.client.log.scanner.activity";

    
   
57

   
51
  private static final Log LOG = LogFactory.getLog(ScannerCallable.class);
58
  private static final Log LOG = LogFactory.getLog(ScannerCallable.class);
52
  private long scannerId = -1L;
59
  private long scannerId = -1L;
53
  private boolean instantiated = false;
60
  private boolean instantiated = false;
54
  private boolean closed = false;
61
  private boolean closed = false;
55
  private Scan scan;
62
  private Scan scan;
56
  private int caching = 1;
63
  private int caching = 1;
57
  private ScanMetrics scanMetrics;
64
  private ScanMetrics scanMetrics;

    
   
65
  private boolean logScannerActivity = false;

    
   
66
  private int logCutOffLatency = 1000;
58

    
   
67

   
59
  // indicate if it is a remote server call
68
  // indicate if it is a remote server call
60
  private boolean isRegionServerRemote = true;
69
  private boolean isRegionServerRemote = true;
61

    
   
70

   
62
  /**
71
  /**
[+20] [20] 6 lines
[+20]
69
  public ScannerCallable (HConnection connection, byte [] tableName, Scan scan,
78
  public ScannerCallable (HConnection connection, byte [] tableName, Scan scan,
70
    ScanMetrics scanMetrics) {
79
    ScanMetrics scanMetrics) {
71
    super(connection, tableName, scan.getStartRow());
80
    super(connection, tableName, scan.getStartRow());
72
    this.scan = scan;
81
    this.scan = scan;
73
    this.scanMetrics = scanMetrics;
82
    this.scanMetrics = scanMetrics;

    
   
83
    Configuration conf = connection.getConfiguration();

    
   
84
    logScannerActivity = conf.getBoolean(LOG_SCANNER_ACTIVITY, false);

    
   
85
    logCutOffLatency = conf.getInt(LOG_SCANNER_LATENCY_CUTOFF, 1000);
74
  }
86
  }
75

    
   
87

   
76
  /**
88
  /**
77
   * @param reload force reload of server location
89
   * @param reload force reload of server location
78
   * @throws IOException
90
   * @throws IOException
[+20] [20] 48 lines
[+20] [+] private void checkIfRegionServerIsRemote() throws UnknownHostException {
127
          incRPCcallsMetrics();
139
          incRPCcallsMetrics();
128
          ScanRequest request =
140
          ScanRequest request =
129
            RequestConverter.buildScanRequest(scannerId, caching, false);
141
            RequestConverter.buildScanRequest(scannerId, caching, false);
130
          try {
142
          try {
131
            ScanResponse response = server.scan(null, request);
143
            ScanResponse response = server.scan(null, request);

    
   
144
            long timestamp = System.currentTimeMillis();
132
            rrs = ResponseConverter.getResults(response);
145
            rrs = ResponseConverter.getResults(response);

    
   
146
            if (logScannerActivity) {

    
   
147
              long now = System.currentTimeMillis();

    
   
148
              if (now - timestamp > logCutOffLatency) {

    
   
149
                int rows = rrs == null ? 0 : rrs.length;

    
   
150
                LOG.info("Took " + (now-timestamp) + "ms to fetch "

    
   
151
                  + rows + " rows from scanner=" + scannerId);

    
   
152
              }

    
   
153
            }
133
            if (response.hasMoreResults()
154
            if (response.hasMoreResults()
134
                && !response.getMoreResults()) {
155
                && !response.getMoreResults()) {
135
              scannerId = -1L;
156
              scannerId = -1L;
136
              closed = true;
157
              closed = true;
137
              return null;
158
              return null;
138
            }
159
            }
139
          } catch (ServiceException se) {
160
          } catch (ServiceException se) {
140
            throw ProtobufUtil.getRemoteException(se);
161
            throw ProtobufUtil.getRemoteException(se);
141
          }
162
          }
142
          updateResultsMetrics(rrs);
163
          updateResultsMetrics(rrs);
143
        } catch (IOException e) {
164
        } catch (IOException e) {

    
   
165
          if (logScannerActivity) {

    
   
166
            LOG.info("Got exception in fetching from scanner="

    
   
167
              + scannerId, e);

    
   
168
          }
144
          IOException ioe = e;
169
          IOException ioe = e;
145
          if (e instanceof RemoteException) {
170
          if (e instanceof RemoteException) {
146
            ioe = RemoteExceptionHandler.decodeRemoteException((RemoteException)e);
171
            ioe = RemoteExceptionHandler.decodeRemoteException((RemoteException)e);
147
          }
172
          }

    
   
173
          if (logScannerActivity && (ioe instanceof UnknownScannerException)) {

    
   
174
            try {

    
   
175
              HRegionLocation location =

    
   
176
                connection.relocateRegion(tableName, scan.getStartRow());

    
   
177
              LOG.info("Scanner=" + scannerId

    
   
178
                + " expired, current region location is " + location.toString()

    
   
179
                + " ip:" + location.getServerAddress().getBindAddress());

    
   
180
            } catch (Throwable t) {

    
   
181
              LOG.info("Failed to relocate region", t);

    
   
182
            }

    
   
183
          }
148
          if (ioe instanceof NotServingRegionException) {
184
          if (ioe instanceof NotServingRegionException) {
149
            // Throw a DNRE so that we break out of cycle of calling NSRE
185
            // Throw a DNRE so that we break out of cycle of calling NSRE
150
            // when what we need is to open scanner against new location.
186
            // when what we need is to open scanner against new location.
151
            // Attach NSRE to signal client that it needs to resetup scanner.
187
            // Attach NSRE to signal client that it needs to resetup scanner.
152
            if (this.scanMetrics != null) {
188
            if (this.scanMetrics != null) {
[+20] [20] 66 lines
[+20] [+] protected long openScanner() throws IOException {
219
      RequestConverter.buildScanRequest(
255
      RequestConverter.buildScanRequest(
220
        this.location.getRegionInfo().getRegionName(),
256
        this.location.getRegionInfo().getRegionName(),
221
        this.scan, 0, false);
257
        this.scan, 0, false);
222
    try {
258
    try {
223
      ScanResponse response = server.scan(null, request);
259
      ScanResponse response = server.scan(null, request);
224
      return response.getScannerId();
260
      long id = response.getScannerId();

    
   
261
      if (logScannerActivity) {

    
   
262
        LOG.info("Open scanner=" + id + " for scan=" + scan.toString()

    
   
263
          + " on region " + this.location.toString() + " ip:"

    
   
264
          + this.location.getServerAddress().getBindAddress());

    
   
265
      }

    
   
266
      return id;
225
    } catch (ServiceException se) {
267
    } catch (ServiceException se) {
226
      throw ProtobufUtil.getRemoteException(se);
268
      throw ProtobufUtil.getRemoteException(se);
227
    }
269
    }
228
  }
270
  }
229

    
   
271

   
[+20] [20] 37 lines
src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java
Revision 42569fb New Change
 
src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java
Revision 1c8a393 New Change
 
  1. src/main/java/org/apache/hadoop/hbase/client/ScannerCallable.java: Loading...
  2. src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java: Loading...
  3. src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java: Loading...