Review Board 1.7.22


HBASE-3996 Support multiple tables and scanners as input to the mapper in map/reduce jobs

Review Request #4411 - Created March 20, 2012 and updated

Ted Yu
trunbk
Reviewers
hbase
hbase
It seems that in many cases feeding data from multiple tables or multiple scanners on a single table can save a lot of time when running map/reduce jobs.
A new MultiTableInputFormat class would allow doing this.
TestMultiTableInputFormat.java is added
/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputCollection.java
New File

    
   
1
/**

    
   
2
 * Licensed to the Apache Software Foundation (ASF) under one

    
   
3
 * or more contributor license agreements.  See the NOTICE file

    
   
4
 * distributed with this work for additional information

    
   
5
 * regarding copyright ownership.  The ASF licenses this file

    
   
6
 * to you under the Apache License, Version 2.0 (the

    
   
7
 * "License"); you may not use this file except in compliance

    
   
8
 * with the License.  You may obtain a copy of the License at

    
   
9
 *

    
   
10
 *     http://www.apache.org/licenses/LICENSE-2.0

    
   
11
 *

    
   
12
 * Unless required by applicable law or agreed to in writing, software

    
   
13
 * distributed under the License is distributed on an "AS IS" BASIS,

    
   
14
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

    
   
15
 * See the License for the specific language governing permissions and

    
   
16
 * limitations under the License.

    
   
17
 */

    
   
18
package org.apache.hadoop.hbase.mapreduce;

    
   
19

   

    
   
20
import java.util.ArrayList;

    
   
21
import java.util.Iterator;

    
   
22

   

    
   
23
import org.apache.hadoop.classification.InterfaceAudience;

    
   
24
import org.apache.hadoop.hbase.client.Scan;

    
   
25

   

    
   
26
/**

    
   
27
 * A collection of input tables and scanners to be used as the source data for a

    
   
28
 * mapper

    
   
29
 */

    
   
30
@InterfaceAudience.Public

    
   
31
public class MultiTableInputCollection implements

    
   
32
    Iterable<MultiTableInputCollection.TableInputConf> {

    
   
33
  /**

    
   
34
   * An internal structure to hold table names + associated scanners

    
   
35
   */

    
   
36
  static class TableInputConf {

    
   
37
    private final String tableName;

    
   
38
    private final Scan scan;

    
   
39
    /**

    
   
40
     * The empty constructor

    
   
41
     */

    
   
42
    public TableInputConf() {

    
   
43
      this(null, null);

    
   
44
    }

    
   
45

   

    
   
46
    /**

    
   
47
     * @param tableName Input table name

    
   
48
     * @param scan A Scanner associated with this table

    
   
49
     */

    
   
50
    public TableInputConf(String tableName, Scan scan) {

    
   
51
      this.tableName = tableName;

    
   
52
      this.scan = scan;

    
   
53
    }

    
   
54

   

    
   
55
    /**

    
   
56
     * @return The table name

    
   
57
     */

    
   
58
    public String getTableName() {

    
   
59
      return this.tableName;

    
   
60
    }

    
   
61

   

    
   
62
    /**

    
   
63
     * Returns the scan

    
   
64
     *

    
   
65
     * @return The scan

    
   
66
     */

    
   
67
    public Scan getScan() {

    
   
68
      return this.scan;

    
   
69
    }

    
   
70

   

    
   
71
  }

    
   
72

   

    
   
73
  private final ArrayList<TableInputConf> tables =

    
   
74
      new ArrayList<TableInputConf>();

    
   
75

   

    
   
76
  /**

    
   
77
   * Add new table and scan pair to the collection

    
   
78
   *

    
   
79
   * @param tableName An input table name

    
   
80
   * @param scan An associated scan

    
   
81
   */

    
   
82
  public void add(String tableName, Scan scan) {

    
   
83
    TableInputConf tic = new TableInputConf(tableName, scan);

    
   
84
    tables.add(tic);

    
   
85
  }

    
   
86

   

    
   
87
  @Override

    
   
88
  public Iterator<TableInputConf> iterator() {

    
   
89
    return tables.iterator();

    
   
90
  }

    
   
91

   

    
   
92
  /**

    
   
93
   * Checks if the tables collection is empty

    
   
94
   *

    
   
95
   * @return true if empty false otherwise

    
   
96
   */

    
   
97
  public boolean isEmpty() {

    
   
98
    return tables.isEmpty();

    
   
99
  }

    
   
100
}
/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java
New File
 
/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
New File
 
/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
Revision 1306515 New Change
 
/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java
Revision 1306515 New Change
 
/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
Revision 1306515 New Change
 
/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java
New File
 
  1. /src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputCollection.java: Loading...
  2. /src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java: Loading...
  3. /src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java: Loading...
  4. /src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java: Loading...
  5. /src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java: Loading...
  6. /src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java: Loading...
  7. /src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java: Loading...