Review Board 1.7.22


Implemented end-to-end support for SUBSTR() with changes to VectorizationContext class

Review Request #12926 - Created July 24, 2013 and updated

Eric Hanson
vectorization
HIVE-4512
Reviewers
hive
hive-git
Implemented end-to-end support for SUBSTR() with changes to VectorizationContext class
Verified all these queries run vectorized:

explain 
select l_shipmode, concat(l_shipmode, "<<<")
from lineitem_orc 
where l_orderkey = 1;

select l_shipmode, concat('>>>', l_shipmode)
from lineitem_orc 
where l_orderkey = 1;

select l_shipmode, concat('>>>', l_shipmode)
from lineitem_orc 
where l_orderkey = 1
and lower(l_shipmode) = 'mail';


select l_shipmode, concat(l_shipmode, l_shipmode)
from lineitem_orc 
where l_orderkey = 1;

select l_shipmode, concat(lower(l_shipmode), upper(lower(l_shipmode)))
from lineitem_orc 
where l_orderkey = 1;
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
Revision 9c90230 New Change
[20] 37 lines
[+20]
38
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar;
38
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar;
39
import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
39
import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
40
import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNotNull;
40
import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNotNull;
41
import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull;
41
import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull;
42
import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue;
42
import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue;

    
   
43
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatColCol;

    
   
44
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatColScalar;

    
   
45
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatScalarCol;
43
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
46
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
44
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampLong;
47
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampLong;
45
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
48
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
46
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount;
49
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount;
47
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar;
50
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar;
[+20] [20] 20 lines
[+20]
68
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
71
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
69
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
72
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
70
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
73
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
71
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
74
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
72
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
75
import org.apache.hadoop.hive.ql.plan.api.OperatorType;

    
   
76
import org.apache.hadoop.hive.ql.udf.UDFConcat;
73
import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
77
import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
74
import org.apache.hadoop.hive.ql.udf.UDFHour;
78
import org.apache.hadoop.hive.ql.udf.UDFHour;
75
import org.apache.hadoop.hive.ql.udf.UDFLength;
79
import org.apache.hadoop.hive.ql.udf.UDFLength;
76
import org.apache.hadoop.hive.ql.udf.UDFLike;
80
import org.apache.hadoop.hive.ql.udf.UDFLike;
77
import org.apache.hadoop.hive.ql.udf.UDFLower;
81
import org.apache.hadoop.hive.ql.udf.UDFLower;
[+20] [20] 382 lines
[+20] [+] private VectorExpression getVectorExpression(GenericUDFBridge udf,
460
      return getUnaryStringExpression("StringLower", "String", childExpr);
464
      return getUnaryStringExpression("StringLower", "String", childExpr);
461
    } else if (cl.equals(UDFUpper.class)) {
465
    } else if (cl.equals(UDFUpper.class)) {
462
      return getUnaryStringExpression("StringUpper", "String", childExpr);
466
      return getUnaryStringExpression("StringUpper", "String", childExpr);
463
    } else if (cl.equals(UDFLength.class)) {
467
    } else if (cl.equals(UDFLength.class)) {
464
      return getUnaryStringExpression("StringLength", "Long", childExpr);
468
      return getUnaryStringExpression("StringLength", "Long", childExpr);
465
    }
469
    } else if (cl.equals(UDFConcat.class)) {

    
   
470
      return getConcatExpression(childExpr);

    
   
471
    } 
466

    
   
472

   
467
    throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
473
    throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
468
  }
474
  }
469

    
   
475

   

    
   
476
  /* Return a vector expression for string concatenation, including the column-scalar,

    
   
477
   * scalar-column, and column-column cases.

    
   
478
   */

    
   
479
  private VectorExpression getConcatExpression(List<ExprNodeDesc> childExprList)

    
   
480
      throws HiveException {

    
   
481
    ExprNodeDesc left = childExprList.get(0);

    
   
482
    ExprNodeDesc right = childExprList.get(1);

    
   
483
    int inputColLeft = -1;

    
   
484
    int inputColRight = -1;

    
   
485
    VectorExpression vLeft = null;

    
   
486
    VectorExpression vRight = null;

    
   
487
    VectorExpression expr = null;

    
   
488
    

    
   
489
    // Generate trees to evaluate non-leaf inputs, if there are any.

    
   
490
    if (left instanceof ExprNodeGenericFuncDesc) {

    
   
491
      vLeft = getVectorExpression(left);

    
   
492
      inputColLeft = vLeft.getOutputColumn();

    
   
493
    }

    
   
494
    

    
   
495
    if (right instanceof ExprNodeGenericFuncDesc) {

    
   
496
      vRight = getVectorExpression(right);

    
   
497
      inputColRight = vRight.getOutputColumn();

    
   
498
    }

    
   
499
    

    
   
500
    // Handle case for left input a column and right input a constant

    
   
501
    if ((left instanceof ExprNodeColumnDesc || inputColLeft != -1) && 

    
   
502
        right instanceof ExprNodeConstantDesc) {

    
   
503
      if (inputColLeft == -1) {

    
   
504
        inputColLeft = getInputColumnIndex(((ExprNodeColumnDesc) left).getColumn());

    
   
505
      }

    
   
506
      int outputCol = ocm.allocateOutputColumn("String");

    
   
507
      byte[] constant = (byte[]) getScalarValue((ExprNodeConstantDesc) right);

    
   
508
      expr = new StringConcatColScalar(inputColLeft, outputCol, constant);

    
   
509
      if (vLeft != null) {

    
   
510
        expr.setChildExpressions(new VectorExpression [] {vLeft});

    
   
511
      }

    
   
512
    }

    
   
513
    

    
   
514
    // Handle case for left input a constant and right input a column

    
   
515
    else if ((left instanceof ExprNodeConstantDesc) &&

    
   
516
        (right instanceof ExprNodeColumnDesc || inputColRight != -1)) {

    
   
517
      if (inputColRight == -1) {

    
   
518
        inputColRight = getInputColumnIndex(((ExprNodeColumnDesc) right).getColumn());

    
   
519
      }

    
   
520
      int outputCol = ocm.allocateOutputColumn("String");

    
   
521
      byte[] constant = (byte[]) getScalarValue((ExprNodeConstantDesc) left);

    
   
522
      expr = new StringConcatScalarCol(constant, inputColRight, outputCol);

    
   
523
      if (vRight != null) {

    
   
524
        expr.setChildExpressions(new VectorExpression [] {vRight});

    
   
525
      }

    
   
526
    }

    
   
527
    

    
   
528
    // Handle case where both left and right inputs are columns

    
   
529
    else if ((left instanceof ExprNodeColumnDesc || inputColLeft != -1) &&

    
   
530
        (right instanceof ExprNodeColumnDesc || inputColRight != -1)) {

    
   
531
      if (inputColLeft == -1) {

    
   
532
        inputColLeft = getInputColumnIndex(((ExprNodeColumnDesc) left).getColumn());

    
   
533
      }

    
   
534
      if (inputColRight == -1) {

    
   
535
        inputColRight = getInputColumnIndex(((ExprNodeColumnDesc) right).getColumn());

    
   
536
      }

    
   
537
      int outputCol = ocm.allocateOutputColumn("String");

    
   
538
      expr = new StringConcatColCol(inputColLeft, inputColRight, outputCol);

    
   
539
      if (vLeft == null && vRight != null) {

    
   
540
        expr.setChildExpressions(new VectorExpression [] {vRight});

    
   
541
      } else if (vLeft != null && vRight == null) {

    
   
542
        expr.setChildExpressions(new VectorExpression [] {vLeft});

    
   
543
      } else if (vLeft != null && vRight != null) {

    
   
544
        

    
   
545
        // Both left and right have child expressions

    
   
546
        expr.setChildExpressions(new VectorExpression [] {vLeft, vRight});

    
   
547
      }

    
   
548
    } else {

    
   
549
      throw new HiveException("Failed to vectorize Substr()");      

    
   
550
    }

    
   
551
    

    
   
552
    // Free output columns if inputs have non-leaf expression trees.

    
   
553
    if (vLeft != null) {

    
   
554
      ocm.freeOutputColumn(vLeft.getOutputColumn());

    
   
555
    }

    
   
556
    if (vRight != null) {

    
   
557
      ocm.freeOutputColumn(vRight.getOutputColumn());

    
   
558
    }

    
   
559
    return expr;

    
   
560
  }

    
   
561

   
470
  /* Return a unary string vector expression. This is used for functions like
562
  /* Return a unary string vector expression. This is used for functions like
471
   * UPPER() and LOWER().
563
   * UPPER() and LOWER().
472
   */
564
   */
473
  private VectorExpression getUnaryStringExpression(String vectorExprClassName, 
565
  private VectorExpression getUnaryStringExpression(String vectorExprClassName, 
474
      String resultType, // result type name
566
      String resultType, // result type name
[+20] [20] 356 lines
[+20] [+] private VectorExpression getVectorExpression(GenericUDFOPNotNull udf,
831
    } else {
923
    } else {
832
      throw new HiveException("Not supported");
924
      throw new HiveException("Not supported");
833
    }
925
    }
834
  }
926
  }
835

    
   
927

   
836
  private Object getScalarValue(ExprNodeConstantDesc constDesc) {
928
  private Object getScalarValue(ExprNodeConstantDesc constDesc) 

    
   
929
      throws HiveException {
837
    if (constDesc.getTypeString().equalsIgnoreCase("String")) {
930
    if (constDesc.getTypeString().equalsIgnoreCase("String")) {
838
      return ((String) constDesc.getValue()).getBytes();
931
      try {

    
   
932
         byte[] bytes = ((String) constDesc.getValue()).getBytes("UTF-8");

    
   
933
         return bytes;

    
   
934
      } catch (Exception ex) {

    
   
935
        throw new HiveException(ex);

    
   
936
      }
839
    } else {
937
    } else {
840
      return constDesc.getValue();
938
      return constDesc.getValue();
841
    }
939
    }
842
  }
940
  }
843

    
   
941

   
[+20] [20] 455 lines
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColScalar.java
Revision 29ef244 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatScalarCol.java
Revision 14205ee New Change
 
  1. ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java: Loading...
  2. ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColScalar.java: Loading...
  3. ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatScalarCol.java: Loading...