Review Board 1.7.22


HIVE-4113: Optimize select count(1) with RCFile and Orc

Review Request #11770 - Created June 9, 2013 and updated

Brock Noland
trunk
HIVE-4113
Reviewers
hive
hive-git
Modifies ColumnProjectionUtils such there are two flags. One for the column ids and one indicating whether all columns should be read. Additionally the patch updates all locations which uses the old method of empty string indicating all columns should be read.

The automatic formatter generated by ant eclipse-files is fairly aggressive so there are some unrelated import/whitespace cleanup.
All unit tests pass with the patch. ColumnProjectionUtils has new unit tests covering it's functionality. Additionally I verified manually the select count(1) from RCFile/Orc resulted in less IO after the change.

Before:

hive> select count(1) from users_orc;
Job 0: Map: 1  Reduce: 1   Cumulative CPU: 17.75 sec   HDFS Read: 28782851 HDFS Write: 9 SUCCESS

hive> select count(1) from users_rc; 
Job 0: Map: 3  Reduce: 1   Cumulative CPU: 23.72 sec   HDFS Read: 825865962 HDFS Write: 9 SUCCESS

After:


hive> select count(1) from users_orc;
Job 0: Map: 1  Reduce: 1   Cumulative CPU: 9.9 sec   HDFS Read: 67325 HDFS Write: 9 SUCCESS

hive> select count(1) from users_rc; 
Job 0: Map: 3  Reduce: 1   Cumulative CPU: 16.96 sec   HDFS Read: 96045618 HDFS Write: 9 SUCCESS

Diff revision 4 (Latest)

1 2 3 4
1 2 3 4

  1. hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java: Loading...
  2. hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java: Loading...
  3. hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatRecordReader.java: Loading...
  4. hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InitializeInput.java: Loading...
  5. hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InternalUtil.java: Loading...
  6. hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java: Loading...
  7. hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitioned.java: Loading...
  8. hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java: Loading...
  9. ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java: Loading...
  10. ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java: Loading...
  11. ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java: Loading...
  12. ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java: Loading...
  13. ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java: Loading...
  14. ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java: Loading...
  15. ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java: Loading...
  16. ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileBlockMergeRecordReader.java: Loading...
  17. ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java: Loading...
  18. ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java: Loading...
  19. ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java: Loading...
  20. ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java: Loading...
  21. serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java: Loading...
  22. serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java: Loading...
  23. serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java: Loading...
  24. serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java: Loading...
  25. serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarSerDe.java: Loading...
  26. serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java: Loading...
  27. serde/src/test/org/apache/hadoop/hive/serde2/TestColumnProjectionUtils.java: Loading...
  28. serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java: Loading...
  29. serde/src/test/org/apache/hadoop/hive/serde2/columnar/TestLazyBinaryColumnarSerDe.java: Loading...
hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
Revision da85501 New Change
[20] 101 lines
[+20] [+] public class HiveHBaseTableInputFormat extends TableInputFormatBase
102

    
   
102

   
103
    if (columnsMapping.size() < readColIDs.size()) {
103
    if (columnsMapping.size() < readColIDs.size()) {
104
      throw new IOException("Cannot read more columns than the given table contains.");
104
      throw new IOException("Cannot read more columns than the given table contains.");
105
    }
105
    }
106

    
   
106

   
107
    boolean addAll = (readColIDs.size() == 0);
107
    boolean readAllColumns = ColumnProjectionUtils.isReadAllColumns(jobConf);
108
    Scan scan = new Scan();
108
    Scan scan = new Scan();
109
    boolean empty = true;
109
    boolean empty = true;
110

    
   
110

   
111
    if (!addAll) {
111
    if (!readAllColumns) {
112
      for (int i : readColIDs) {
112
      for (int i : readColIDs) {
113
        ColumnMapping colMap = columnsMapping.get(i);
113
        ColumnMapping colMap = columnsMapping.get(i);
114
        if (colMap.hbaseRowKey) {
114
        if (colMap.hbaseRowKey) {
115
          continue;
115
          continue;
116
        }
116
        }
[+20] [20] 24 lines
[+20] public class HiveHBaseTableInputFormat extends TableInputFormatBase
141
          scan.addFamily(colMap.familyNameBytes);
141
          scan.addFamily(colMap.familyNameBytes);
142
        } else {
142
        } else {
143
          scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
143
          scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
144
        }
144
        }
145

    
   
145

   
146
        if (!addAll) {
146
        if (!readAllColumns) {
147
          break;
147
          break;
148
        }
148
        }
149
      }
149
      }
150
    }
150
    }
151

    
   
151

   
[+20] [20] 359 lines
hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java
Revision bc0e04c New Change
 
hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatRecordReader.java
Revision ac3753f New Change
 
hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InitializeInput.java
Revision 02ec37f New Change
 
hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InternalUtil.java
Revision 4167afa New Change
 
hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java
Revision b5f22af New Change
 
hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitioned.java
Revision dd2ac10 New Change
 
hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java
Revision e907c73 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
Revision 1a784b2 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java
Revision f72ecfb New Change
 
ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
Revision 49145b7 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
Revision adf4923 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java
Revision d18d403 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java
Revision 9521060 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
Revision 96ac584 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileBlockMergeRecordReader.java
Revision cbdc2db New Change
 
ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java
Revision 400abf3 New Change
 
ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java
Revision fb9fca1 New Change
 
ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java
Revision ae6a5ee New Change
 
ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
Revision 785f0b1 New Change
 
serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
Revision 23180cf New Change
 
serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java
Revision 11f5f07 New Change
 
serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java
Revision 1335446 New Change
 
serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java
Revision e1270cc New Change
 
serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarSerDe.java
Revision b717278 New Change
 
serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java
Revision 0317024 New Change
 
serde/src/test/org/apache/hadoop/hive/serde2/TestColumnProjectionUtils.java
New File
 
serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java
Revision 3ba2699 New Change
 
serde/src/test/org/apache/hadoop/hive/serde2/columnar/TestLazyBinaryColumnarSerDe.java
Revision 99420ca New Change
 
  1. hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java: Loading...
  2. hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java: Loading...
  3. hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatRecordReader.java: Loading...
  4. hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InitializeInput.java: Loading...
  5. hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InternalUtil.java: Loading...
  6. hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java: Loading...
  7. hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitioned.java: Loading...
  8. hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java: Loading...
  9. ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java: Loading...
  10. ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java: Loading...
  11. ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java: Loading...
  12. ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java: Loading...
  13. ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java: Loading...
  14. ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java: Loading...
  15. ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java: Loading...
  16. ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileBlockMergeRecordReader.java: Loading...
  17. ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java: Loading...
  18. ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java: Loading...
  19. ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java: Loading...
  20. ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java: Loading...
  21. serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java: Loading...
  22. serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java: Loading...
  23. serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java: Loading...
  24. serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java: Loading...
  25. serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarSerDe.java: Loading...
  26. serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java: Loading...
  27. serde/src/test/org/apache/hadoop/hive/serde2/TestColumnProjectionUtils.java: Loading...
  28. serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java: Loading...
  29. serde/src/test/org/apache/hadoop/hive/serde2/columnar/TestLazyBinaryColumnarSerDe.java: Loading...