Review Board 1.7.22


HIVE-2206: add a new optimizer for query correlation discovery and optimization

Review Request #7126 - Created Sept. 15, 2012 and updated

Yin Huai
trunk
HIVE-2206
Reviewers
hive
hive-git
This optimizer exploits intra-query correlations and merges multiple correlated MapReduce jobs into one jobs. Open a new request since I have been working on hive-git.
All tests pass.

Diff revision 11

This is not the most recent revision of the diff. The latest diff is revision 12. See what's changed.

1 2 3 4 5 6 7 8 9 10 11 12
1 2 3 4 5 6 7 8 9 10 11 12

  1. common/src/java/org/apache/hadoop/hive/conf/HiveConf.java: Loading...
  2. conf/hive-default.xml.template: Loading...
  3. ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java: Loading...
  4. ql/src/java/org/apache/hadoop/hive/ql/exec/BaseReduceSinkOperator.java: Loading...
  5. ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationCompositeOperator.java: Loading...
  6. ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationLocalSimulativeReduceSinkOperator.java: Loading...
  7. ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationReducerDispatchOperator.java: Loading...
  8. ql/src/java/org/apache/hadoop/hive/ql/exec/ExecReducer.java: Loading...
  9. ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java: Loading...
  10. ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java: Loading...
  11. ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java: Loading...
  12. ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java: Loading...
  13. ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java: Loading...
  14. ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizer.java: Loading...
  15. ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizerUtils.java: Loading...
  16. ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java: Loading...
  17. ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java: Loading...
  18. ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java: Loading...
  19. ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java: Loading...
  20. ql/src/java/org/apache/hadoop/hive/ql/plan/BaseReduceSinkDesc.java: Loading...
This diff has been split across 2 pages: 1 2 >
common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Revision 9fa9525 New Change
[20] 512 lines
[+20] [+] public class HiveConf extends Configuration {
513
    HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true),
513
    HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true),
514
    // whether to optimize union followed by select followed by filesink
514
    // whether to optimize union followed by select followed by filesink
515
    // It creates sub-directories in the final output, so should not be turned on in systems
515
    // It creates sub-directories in the final output, so should not be turned on in systems
516
    // where MAPREDUCE-1501 is not present
516
    // where MAPREDUCE-1501 is not present
517
    HIVE_OPTIMIZE_UNION_REMOVE("hive.optimize.union.remove", false),
517
    HIVE_OPTIMIZE_UNION_REMOVE("hive.optimize.union.remove", false),

    
   
518
    HIVEOPTCORRELATION("hive.optimize.correlation", false), // exploit intra-query correlations
518

    
   
519

   
519
    // whether hadoop map-reduce supports sub-directories. It was added by MAPREDUCE-1501.
520
    // whether hadoop map-reduce supports sub-directories. It was added by MAPREDUCE-1501.
520
    // Some optimizations can only be performed if the version of hadoop being used supports
521
    // Some optimizations can only be performed if the version of hadoop being used supports
521
    // sub-directories
522
    // sub-directories
522
    HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES("hive.mapred.supports.subdirectories", false),
523
    HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES("hive.mapred.supports.subdirectories", false),
[+20] [20] 599 lines
conf/hive-default.xml.template
Revision f332f3a New Change
 
ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java
Revision 7c4c413 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/BaseReduceSinkOperator.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationCompositeOperator.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationLocalSimulativeReduceSinkOperator.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationReducerDispatchOperator.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/exec/ExecReducer.java
Revision 18a9bd2 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
Revision 46daeb2 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
Revision 68302f8 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
Revision 0c22141 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
Revision 919a140 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
Revision 1469325 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizer.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizerUtils.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
Revision edde378 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
Revision d1555e2 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
Revision 2bf284d New Change
 
ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
Revision 330aa52 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/plan/BaseReduceSinkDesc.java
New File
 
  1. common/src/java/org/apache/hadoop/hive/conf/HiveConf.java: Loading...
  2. conf/hive-default.xml.template: Loading...
  3. ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java: Loading...
  4. ql/src/java/org/apache/hadoop/hive/ql/exec/BaseReduceSinkOperator.java: Loading...
  5. ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationCompositeOperator.java: Loading...
  6. ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationLocalSimulativeReduceSinkOperator.java: Loading...
  7. ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationReducerDispatchOperator.java: Loading...
  8. ql/src/java/org/apache/hadoop/hive/ql/exec/ExecReducer.java: Loading...
  9. ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java: Loading...
  10. ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java: Loading...
  11. ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java: Loading...
  12. ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java: Loading...
  13. ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java: Loading...
  14. ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizer.java: Loading...
  15. ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizerUtils.java: Loading...
  16. ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java: Loading...
  17. ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java: Loading...
  18. ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java: Loading...
  19. ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java: Loading...
  20. ql/src/java/org/apache/hadoop/hive/ql/plan/BaseReduceSinkDesc.java: Loading...
This diff has been split across 2 pages: 1 2 >