Review Board 1.7.22


HIVE-2206: add a new optimizer for query correlation discovery and optimization

Review Request #7126 - Created Sept. 15, 2012 and updated

Yin Huai
trunk
HIVE-2206
Reviewers
hive
hive-git
This optimizer exploits intra-query correlations and merges multiple correlated MapReduce jobs into one jobs. Open a new request since I have been working on hive-git.
All tests pass.

Diff revision 12 (Latest)

1 2 3 4 5 6 7 8 9 10 11 12
1 2 3 4 5 6 7 8 9 10 11 12

  1. common/src/java/org/apache/hadoop/hive/conf/HiveConf.java: Loading...
  2. conf/hive-default.xml.template: Loading...
  3. ql/if/queryplan.thrift: Loading...
  4. ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java: Loading...
  5. ql/src/java/org/apache/hadoop/hive/ql/exec/BaseReduceSinkOperator.java: Loading...
  6. ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationCompositeOperator.java: Loading...
  7. ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationLocalSimulativeReduceSinkOperator.java: Loading...
  8. ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationReducerDispatchOperator.java: Loading...
  9. ql/src/java/org/apache/hadoop/hive/ql/exec/ExecReducer.java: Loading...
  10. ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java: Loading...
  11. ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java: Loading...
  12. ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java: Loading...
  13. ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java: Loading...
  14. ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java: Loading...
  15. ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizer.java: Loading...
  16. ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizerUtils.java: Loading...
  17. ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java: Loading...
  18. ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java: Loading...
  19. ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java: Loading...
  20. ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java: Loading...
This diff has been split across 2 pages: 1 2 >
common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Revision 15fce1e New Change
[20] 518 lines
[+20] [+] public class HiveConf extends Configuration {
519
    HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true),
519
    HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true),
520
    // whether to optimize union followed by select followed by filesink
520
    // whether to optimize union followed by select followed by filesink
521
    // It creates sub-directories in the final output, so should not be turned on in systems
521
    // It creates sub-directories in the final output, so should not be turned on in systems
522
    // where MAPREDUCE-1501 is not present
522
    // where MAPREDUCE-1501 is not present
523
    HIVE_OPTIMIZE_UNION_REMOVE("hive.optimize.union.remove", false),
523
    HIVE_OPTIMIZE_UNION_REMOVE("hive.optimize.union.remove", false),

    
   
524
    HIVEOPTCORRELATION("hive.optimize.correlation", false), // exploit intra-query correlations
524

    
   
525

   
525
    // whether hadoop map-reduce supports sub-directories. It was added by MAPREDUCE-1501.
526
    // whether hadoop map-reduce supports sub-directories. It was added by MAPREDUCE-1501.
526
    // Some optimizations can only be performed if the version of hadoop being used supports
527
    // Some optimizations can only be performed if the version of hadoop being used supports
527
    // sub-directories
528
    // sub-directories
528
    HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES("hive.mapred.supports.subdirectories", false),
529
    HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES("hive.mapred.supports.subdirectories", false),
[+20] [20] 616 lines
conf/hive-default.xml.template
Revision cdc1afd New Change
 
ql/if/queryplan.thrift
Revision 4427929 New Change
 
ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java
Revision 7c4c413 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/BaseReduceSinkOperator.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationCompositeOperator.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationLocalSimulativeReduceSinkOperator.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationReducerDispatchOperator.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/exec/ExecReducer.java
Revision 18a9bd2 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
Revision 7f9ad24 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
Revision 68302f8 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
Revision 69fff0e New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
Revision 919a140 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
Revision 18b5540 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizer.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizerUtils.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
Revision 09f8139 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
Revision d1555e2 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java
Revision 2f5140a New Change
 
ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
Revision cb43d84 New Change
 
  1. common/src/java/org/apache/hadoop/hive/conf/HiveConf.java: Loading...
  2. conf/hive-default.xml.template: Loading...
  3. ql/if/queryplan.thrift: Loading...
  4. ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java: Loading...
  5. ql/src/java/org/apache/hadoop/hive/ql/exec/BaseReduceSinkOperator.java: Loading...
  6. ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationCompositeOperator.java: Loading...
  7. ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationLocalSimulativeReduceSinkOperator.java: Loading...
  8. ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationReducerDispatchOperator.java: Loading...
  9. ql/src/java/org/apache/hadoop/hive/ql/exec/ExecReducer.java: Loading...
  10. ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java: Loading...
  11. ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java: Loading...
  12. ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java: Loading...
  13. ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java: Loading...
  14. ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java: Loading...
  15. ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizer.java: Loading...
  16. ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizerUtils.java: Loading...
  17. ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java: Loading...
  18. ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java: Loading...
  19. ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java: Loading...
  20. ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java: Loading...
This diff has been split across 2 pages: 1 2 >