Review Board 1.7.22


HIVE-4878: With Dynamic partitioning, some queries would scan default partition even if query is not using it.

Review Request #12705 - Created July 17, 2013 and updated

John Pullokkaran
Reviewers
hive
ashutoshc
hive-git
With Dynamic partitioning, Hive would scan default partitions in some cases even if query excludes it. As part of partition pruning, predicate is narrowed down to those pieces that involve partition columns only. This predicate is then evaluated with partition values to determine, if scan should include those partitions.
But in some cases (like when comparing "_HIVE_DEFAULT_PARTITION_" to numeric data types) expression evaluation would fail and would return NULL instead of true/false. In such cases the partition is added to unknown partitions which is then subsequently scanned.

This fix avoids scanning default partition if all of the following is true:
a) Hive dynamic partition mode is strict (hive.exec.dynamic.partition.mode=strict).
b) partition pruning expression failed to evaluate for a given partition.
c) at the least one of the columns in the partition is default partition.
Hive Unit Tests Passed.
ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
Revision 6a4a360 New Change
[20] 214 lines
[+20] [+] public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr,
215
              }
215
              }
216
            } else {
216
            } else {
217
              LOG.info(ErrorMsg.INVALID_JDO_FILTER_EXPRESSION.getMsg("by condition '"
217
              LOG.info(ErrorMsg.INVALID_JDO_FILTER_EXPRESSION.getMsg("by condition '"
218
                  + message + "'"));
218
                  + message + "'"));
219
              pruneBySequentialScan(tab, true_parts, unkn_parts, denied_parts,
219
              pruneBySequentialScan(tab, true_parts, unkn_parts, denied_parts,
220
                  prunerExpr, rowObjectInspector);
220
                  prunerExpr, rowObjectInspector, conf);
221
            }
221
            }
222
          }
222
          }
223
        }
223
        }
224
        LOG.debug("tabname = " + tab.getTableName() + " is partitioned");
224
        LOG.debug("tabname = " + tab.getTableName() + " is partitioned");
225
      } else {
225
      } else {
[+20] [20] 72 lines
[+20] [+] static private void pruneByPushDown(Table tab, Set<Partition> true_parts, String filter)
298
   * @param unkn_parts the resulting partitions if the partition pruning expression that only contains
298
   * @param unkn_parts the resulting partitions if the partition pruning expression that only contains
299
   *        non-partition columns.
299
   *        non-partition columns.
300
   * @param denied_parts pruned out partitions.
300
   * @param denied_parts pruned out partitions.
301
   * @param prunerExpr the SQL predicate that involves partition columns.
301
   * @param prunerExpr the SQL predicate that involves partition columns.
302
   * @param rowObjectInspector object inspector used by the evaluator
302
   * @param rowObjectInspector object inspector used by the evaluator

    
   
303
   * @param conf Hive Configuration object, can not be NULL.
303
   * @throws Exception
304
   * @throws Exception
304
   */
305
   */
305
  static private void pruneBySequentialScan(Table tab, Set<Partition> true_parts, Set<Partition> unkn_parts,
306
  static private void pruneBySequentialScan(Table tab, Set<Partition> true_parts, Set<Partition> unkn_parts,
306
      Set<Partition> denied_parts, ExprNodeDesc prunerExpr, StructObjectInspector rowObjectInspector)
307
      Set<Partition> denied_parts, ExprNodeDesc prunerExpr, StructObjectInspector rowObjectInspector, HiveConf conf)
307
      throws Exception {
308
      throws Exception {
308

    
   
309

   
309
    List<String> trueNames = null;
310
    List<String> trueNames = null;
310
    List<String> unknNames = null;
311
    List<String> unknNames = null;
311

    
   
312

   
[+20] [20] 6 lines
[+20] static private void pruneByPushDown(Table tab, Set<Partition> true_parts, String filter)
318

    
   
319

   
319
    List<FieldSchema> pCols = tab.getPartCols();
320
    List<FieldSchema> pCols = tab.getPartCols();
320
    List<String> partCols = new ArrayList<String>(pCols.size());
321
    List<String> partCols = new ArrayList<String>(pCols.size());
321
    List<String> values = new ArrayList<String>(pCols.size());
322
    List<String> values = new ArrayList<String>(pCols.size());
322
    Object[] objectWithPart = new Object[2];
323
    Object[] objectWithPart = new Object[2];

    
   
324
    String defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME);
323

    
   
325

   
324
    for (FieldSchema pCol : pCols) {
326
    for (FieldSchema pCol : pCols) {
325
      partCols.add(pCol.getName());
327
      partCols.add(pCol.getName());
326
    }
328
    }
327

    
   
329

   
[+20] [20] 14 lines
[+20] static private void pruneByPushDown(Table tab, Set<Partition> true_parts, String filter)
342

    
   
344

   
343
      // evaluate the expression tree
345
      // evaluate the expression tree
344
      Boolean r = (Boolean) PartExprEvalUtils.evaluateExprOnPart(handle, objectWithPart);
346
      Boolean r = (Boolean) PartExprEvalUtils.evaluateExprOnPart(handle, objectWithPart);
345

    
   
347

   
346
      if (r == null) {
348
      if (r == null) {

    
   
349
        // Reject default partitions if we couldn't determine whether we should include it or not.

    
   
350
        // Note that predicate would only contains partition column parts of original predicate.

    
   
351
        if (values.contains(defaultPartitionName)) {

    
   
352
          LOG.debug("skipping default/bad partition: " + partName);

    
   
353
        }else {
347
        if (unknNames == null) {
354
          if (unknNames == null) {
348
          unknNames = new LinkedList<String>();
355
            unknNames = new LinkedList<String>();
349
        }
356
          }
350
        unknNames.add(partName);
357
          unknNames.add(partName);
351
        LOG.debug("retained unknown partition: " + partName);
358
          LOG.debug("retained unknown partition: " + partName);

    
   
359
        }
352
      } else if (Boolean.TRUE.equals(r)) {
360
      } else if (Boolean.TRUE.equals(r)) {
353
        if (trueNames == null) {
361
        if (trueNames == null) {
354
          trueNames = new LinkedList<String>();
362
          trueNames = new LinkedList<String>();
355
        }
363
        }
356
        trueNames.add(partName);
364
        trueNames.add(partName);
[+20] [20] 43 lines
ql/src/test/queries/clientpositive/dynamic_partition_skip_default.q
New File
 
ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out
New File
 
  1. ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java: Loading...
  2. ql/src/test/queries/clientpositive/dynamic_partition_skip_default.q: Loading...
  3. ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out: Loading...