Review Board 1.7.22


HIVE-2206: add a new optimizer for query correlation discovery and optimization

Review Request #2001 - Created Sept. 21, 2011 and updated

Yin Huai
trunk
HIVE-2206
Reviewers
hive
hive
This optimizer exploits intra-query correlations and merges multiple correlated MapReduce jobs into one jobs.

 

Diff revision 4

This is not the most recent revision of the diff. The latest diff is revision 5. See what's changed.

1 2 3 4 5
1 2 3 4 5

  1. trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java: Loading...
  2. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/BaseReduceSinkOperator.java: Loading...
  3. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationCompositeOperator.java: Loading...
  4. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationLocalSimulativeReduceSinkOperator.java: Loading...
  5. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationReducerDispatchOperator.java: Loading...
  6. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecReducer.java: Loading...
  7. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java: Loading...
  8. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java: Loading...
  9. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java: Loading...
  10. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java: Loading...
  11. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java: Loading...
  12. trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizer.java: Loading...
  13. trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizerUtils.java: Loading...
  14. trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java: Loading...
  15. trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java: Loading...
  16. trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java: Loading...
  17. trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java: Loading...
  18. trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseReduceSinkDesc.java: Loading...
  19. trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CorrelationCompositeDesc.java: Loading...
  20. trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CorrelationLocalSimulativeReduceSinkDesc.java: Loading...
  21. trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CorrelationReducerDispatchDesc.java: Loading...
  22. trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java: Loading...
  23. trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java: Loading...
  24. trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java: Loading...
  25. trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java: Loading...
  26. trunk/ql/src/test/results/compiler/plan/groupby1.q.xml: Loading...
  27. trunk/ql/src/test/results/compiler/plan/groupby2.q.xml: Loading...
  28. trunk/ql/src/test/results/compiler/plan/groupby3.q.xml: Loading...
  29. trunk/ql/src/test/results/compiler/plan/groupby5.q.xml: Loading...
trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Revision 1237326 New Change
[20] 447 lines
[+20] [+] public class HiveConf extends Configuration {
448
    HIVEOPTPPD_STORAGE("hive.optimize.ppd.storage", true),
448
    HIVEOPTPPD_STORAGE("hive.optimize.ppd.storage", true),
449
    HIVEOPTGROUPBY("hive.optimize.groupby", true), // optimize group by
449
    HIVEOPTGROUPBY("hive.optimize.groupby", true), // optimize group by
450
    HIVEOPTBUCKETMAPJOIN("hive.optimize.bucketmapjoin", false), // optimize bucket map join
450
    HIVEOPTBUCKETMAPJOIN("hive.optimize.bucketmapjoin", false), // optimize bucket map join
451
    HIVEOPTSORTMERGEBUCKETMAPJOIN("hive.optimize.bucketmapjoin.sortedmerge", false), // try to use sorted merge bucket map join
451
    HIVEOPTSORTMERGEBUCKETMAPJOIN("hive.optimize.bucketmapjoin.sortedmerge", false), // try to use sorted merge bucket map join
452
    HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true),
452
    HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true),

    
   
453
    HIVEOPTCORRELATION("hive.optimize.correlation", false), // exploit intra-query correlations
453

    
   
454

   
454
    // Indexes
455
    // Indexes
455
    HIVEOPTINDEXFILTER_COMPACT_MINSIZE("hive.optimize.index.filter.compact.minsize", (long) 5 * 1024 * 1024 * 1024), // 5G
456
    HIVEOPTINDEXFILTER_COMPACT_MINSIZE("hive.optimize.index.filter.compact.minsize", (long) 5 * 1024 * 1024 * 1024), // 5G
456
    HIVEOPTINDEXFILTER_COMPACT_MAXSIZE("hive.optimize.index.filter.compact.maxsize", (long) -1), // infinity
457
    HIVEOPTINDEXFILTER_COMPACT_MAXSIZE("hive.optimize.index.filter.compact.maxsize", (long) -1), // infinity
457
    HIVE_INDEX_COMPACT_QUERY_MAX_ENTRIES("hive.index.compact.query.max.entries", (long) 10000000), // 10M
458
    HIVE_INDEX_COMPACT_QUERY_MAX_ENTRIES("hive.index.compact.query.max.entries", (long) 10000000), // 10M
[+20] [20] 482 lines
trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/BaseReduceSinkOperator.java
New File
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationCompositeOperator.java
New File
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationLocalSimulativeReduceSinkOperator.java
New File
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationReducerDispatchOperator.java
New File
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecReducer.java
Revision 1237326 New Change
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
Revision 1237326 New Change
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
Revision 1237326 New Change
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
Revision 1237326 New Change
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
Revision 1237326 New Change
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
Revision 1237326 New Change
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizer.java
New File
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizerUtils.java
New File
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
Revision 1237326 New Change
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
Revision 1237326 New Change
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
Revision 1237326 New Change
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
Revision 1237326 New Change
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseReduceSinkDesc.java
New File
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CorrelationCompositeDesc.java
New File
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CorrelationLocalSimulativeReduceSinkDesc.java
New File
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CorrelationReducerDispatchDesc.java
New File
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
Revision 1237326 New Change
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java
Revision 1237326 New Change
 
trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
Revision 1237326 New Change
 
trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
Revision 1237326 New Change
 
trunk/ql/src/test/results/compiler/plan/groupby1.q.xml
Revision 1237326 New Change
 
trunk/ql/src/test/results/compiler/plan/groupby2.q.xml
Revision 1237326 New Change
 
trunk/ql/src/test/results/compiler/plan/groupby3.q.xml
Revision 1237326 New Change
 
trunk/ql/src/test/results/compiler/plan/groupby5.q.xml
Revision 1237326 New Change
 
  1. trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java: Loading...
  2. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/BaseReduceSinkOperator.java: Loading...
  3. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationCompositeOperator.java: Loading...
  4. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationLocalSimulativeReduceSinkOperator.java: Loading...
  5. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CorrelationReducerDispatchOperator.java: Loading...
  6. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecReducer.java: Loading...
  7. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java: Loading...
  8. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java: Loading...
  9. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java: Loading...
  10. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java: Loading...
  11. trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java: Loading...
  12. trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizer.java: Loading...
  13. trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/CorrelationOptimizerUtils.java: Loading...
  14. trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java: Loading...
  15. trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java: Loading...
  16. trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java: Loading...
  17. trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java: Loading...
  18. trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseReduceSinkDesc.java: Loading...
  19. trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CorrelationCompositeDesc.java: Loading...
  20. trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CorrelationLocalSimulativeReduceSinkDesc.java: Loading...
  21. trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CorrelationReducerDispatchDesc.java: Loading...
  22. trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java: Loading...
  23. trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java: Loading...
  24. trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java: Loading...
  25. trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java: Loading...
  26. trunk/ql/src/test/results/compiler/plan/groupby1.q.xml: Loading...
  27. trunk/ql/src/test/results/compiler/plan/groupby2.q.xml: Loading...
  28. trunk/ql/src/test/results/compiler/plan/groupby3.q.xml: Loading...
  29. trunk/ql/src/test/results/compiler/plan/groupby5.q.xml: Loading...