Review Board 1.7.22


HIVE-2128: Automatic Indexing with multiple tables

Review Request #1010 - Created July 5, 2011 and updated

Syed Albiz
trunk
HIVE-2128
Reviewers
hive
jvs
hive-git
Grab the indexed tables during optimized query generation, grab the associated path URIs, and keep those around in the Configuration object. When the job is passed to ExecDriver, this data is extracted and used in HiveIndexedInputFormat to decide whether to use the index file or delegate to the parent (HiveInputFormat) class. Not sure if this is robust. 
added new testcase index_auto_mult_tables.q
ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java
Revision b9b586e New Change
[20] 79 lines
[+20] [+] public boolean equals(Object obj) {
80

    
   
80

   
81
  JobConf job = null;
81
  JobConf job = null;
82
  BytesRefWritable[] bytesRef = new BytesRefWritable[2];
82
  BytesRefWritable[] bytesRef = new BytesRefWritable[2];
83
  boolean ignoreHdfsLoc = false;
83
  boolean ignoreHdfsLoc = false;
84

    
   
84

   
85
  public HiveIndexResult(String indexFile, JobConf conf) throws IOException,
85
  public HiveIndexResult(List<String> indexFiles, JobConf conf) throws IOException,
86
      HiveException {
86
      HiveException {
87
    job = conf;
87
    job = conf;
88

    
   
88

   
89
    bytesRef[0] = new BytesRefWritable();
89
    bytesRef[0] = new BytesRefWritable();
90
    bytesRef[1] = new BytesRefWritable();
90
    bytesRef[1] = new BytesRefWritable();
91
    ignoreHdfsLoc = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_INDEX_IGNORE_HDFS_LOC);
91
    ignoreHdfsLoc = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_INDEX_IGNORE_HDFS_LOC);
92

    
   
92

   
93
    if (indexFile != null) {
93
    if (indexFiles != null && indexFiles.size() > 0) {
94
      Path indexFilePath = new Path(indexFile);

   
95
      FileSystem fs = FileSystem.get(conf);
94
      FileSystem fs = FileSystem.get(conf);
96
      FileStatus indexStat = fs.getFileStatus(indexFilePath);

   
97
      List<Path> paths = new ArrayList<Path>();
95
      List<Path> paths = new ArrayList<Path>();

    
   
96
      for (String indexFile : indexFiles) {

    
   
97
        Path indexFilePath = new Path(indexFile);

    
   
98
        FileStatus indexStat = fs.getFileStatus(indexFilePath);
98
      if (indexStat.isDir()) {
99
        if (indexStat.isDir()) {
99
        FileStatus[] fss = fs.listStatus(indexFilePath);
100
          FileStatus[] fss = fs.listStatus(indexFilePath);
100
        for (FileStatus f : fss) {
101
          for (FileStatus f : fss) {
101
          paths.add(f.getPath());
102
            paths.add(f.getPath());
102
        }
103
          }
103
      } else {
104
        } else {
104
        paths.add(indexFilePath);
105
          paths.add(indexFilePath);
105
      }
106
        }

    
   
107
      }
106

    
   
108

   
107
      long maxEntriesToLoad = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVE_INDEX_COMPACT_QUERY_MAX_ENTRIES);
109
      long maxEntriesToLoad = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVE_INDEX_COMPACT_QUERY_MAX_ENTRIES);
108
      if (maxEntriesToLoad < 0) {
110
      if (maxEntriesToLoad < 0) {
109
        maxEntriesToLoad=Long.MAX_VALUE;
111
        maxEntriesToLoad=Long.MAX_VALUE;
110
      }
112
      }
[+20] [20] 93 lines
ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
Revision f1ee95d New Change
 
ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java
Revision 61bbbf5 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
Revision 7c91946 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java
Revision dbc489f New Change
 
ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java
Revision da084f6 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
Revision a03a9a6 New Change
 
ql/src/test/queries/clientpositive/index_auto_mult_tables.q
New File
 
ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q
New File
 
ql/src/test/queries/clientpositive/index_auto_self_join.q
New File
 
ql/src/test/results/clientpositive/index_auto_mult_tables.q.out
New File
 
ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out
New File
 
ql/src/test/results/clientpositive/index_auto_self_join.q.out
New File
 
ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out
Revision 4c9efd1 New Change
 
  1. ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java: Loading...
  2. ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java: Loading...
  3. ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java: Loading...
  4. ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java: Loading...
  5. ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java: Loading...
  6. ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java: Loading...
  7. ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java: Loading...
  8. ql/src/test/queries/clientpositive/index_auto_mult_tables.q: Loading...
  9. ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q: Loading...
  10. ql/src/test/queries/clientpositive/index_auto_self_join.q: Loading...
  11. ql/src/test/results/clientpositive/index_auto_mult_tables.q.out: Loading...
  12. ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out: Loading...
  13. ql/src/test/results/clientpositive/index_auto_self_join.q.out: Loading...
  14. ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out: Loading...