Review Board 1.7.22


ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
Revision 80bf671 New Change
[20] 15 lines
[+20]
16
 * limitations under the License.
16
 * limitations under the License.
17
 */
17
 */
18

    
   
18

   
19
package org.apache.hadoop.hive.ql.exec.vector;
19
package org.apache.hadoop.hive.ql.exec.vector;
20

    
   
20

   

    
   
21
import java.io.IOException;

    
   
22
import java.nio.ByteBuffer;
21
import java.sql.Timestamp;
23
import java.sql.Timestamp;
22
import java.util.List;
24
import java.util.List;
23

    
   
25

   
24
import org.apache.hadoop.hive.ql.metadata.HiveException;
26
import org.apache.hadoop.hive.ql.metadata.HiveException;

    
   
27
import org.apache.hadoop.hive.serde2.ByteStream;

    
   
28
import org.apache.hadoop.hive.serde2.SerDeException;
25
import org.apache.hadoop.hive.serde2.io.ByteWritable;
29
import org.apache.hadoop.hive.serde2.io.ByteWritable;
26
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
30
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
27
import org.apache.hadoop.hive.serde2.io.ShortWritable;
31
import org.apache.hadoop.hive.serde2.io.ShortWritable;
28
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
32
import org.apache.hadoop.hive.serde2.io.TimestampWritable;

    
   
33
import org.apache.hadoop.hive.serde2.lazy.LazyLong;

    
   
34
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;

    
   
35
import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp;

    
   
36
import org.apache.hadoop.hive.serde2.lazy.LazyUtils;
29
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
37
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
30
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
38
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
31
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
39
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
32
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
40
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
33
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
41
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
[+20] [20] 8 lines
[+20] [+] public class VectorizedBatchUtil {
42
  /**
50
  /**
43
   * Sets the IsNull value for ColumnVector at specified index
51
   * Sets the IsNull value for ColumnVector at specified index
44
   * @param cv
52
   * @param cv
45
   * @param rowIndex
53
   * @param rowIndex
46
   */
54
   */
47
  public static void SetNullColIsNullValue(ColumnVector cv, int rowIndex) {
55
  public static void setNullColIsNullValue(ColumnVector cv, int rowIndex) {
48
    cv.isNull[rowIndex] = true;
56
    cv.isNull[rowIndex] = true;
49
    if (cv.noNulls) {
57
    if (cv.noNulls) {
50
      cv.noNulls = false;
58
      cv.noNulls = false;
51
    }
59
    }
52
  }
60
  }
[+20] [20] 5 lines
[+20] public static void SetNullColIsNullValue(ColumnVector cv, int rowIndex) { [+] public static void setNullColIsNullValue(ColumnVector cv, int rowIndex) {
58
   * @param valueToSet
66
   * @param valueToSet
59
   *          noNull value to set
67
   *          noNull value to set
60
   * @param batch
68
   * @param batch
61
   *          Batch on which noNull is set
69
   *          Batch on which noNull is set
62
   */
70
   */
63
  public static void SetNoNullFields(boolean valueToSet, VectorizedRowBatch batch) {
71
  public static void setNoNullFields(boolean valueToSet, VectorizedRowBatch batch) {
64
    for (int i = 0; i < batch.numCols; i++) {
72
    for (int i = 0; i < batch.numCols; i++) {
65
      batch.cols[i].noNulls = true;
73
      batch.cols[i].noNulls = true;
66
    }
74
    }
67
  }
75
  }
68

    
   
76

   
69
  /**
77
  /**
70
   * Iterates thru all the columns in a given row and populates the batch
78
   * Iterates thru all the columns in a given row and populates the batch
71
   * @param row Deserialized row object
79
   * @param row Deserialized row object
72
   * @param oi Object insepector for that row
80
   * @param oi Object insepector for that row
73
   * @param rowIndex index to which the row should be added to batch
81
   * @param rowIndex index to which the row should be added to batch
74
   * @param batch Vectorized batch to which the row is added at rowIndex
82
   * @param batch Vectorized batch to which the row is added at rowIndex
75
   * @throws HiveException
83
   * @throws HiveException
76
   */
84
   */
77
  public static void AddRowToBatch(Object row, StructObjectInspector oi, int rowIndex,
85
  public static void addRowToBatch(Object row, StructObjectInspector oi, int rowIndex,
78
      VectorizedRowBatch batch) throws HiveException {
86
      VectorizedRowBatch batch) throws HiveException {
79
    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
87
    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
80
    // Iterate thru the cols and load the batch
88
    // Iterate thru the cols and load the batch
81
    for (int i = 0; i < fieldRefs.size(); i++) {
89
    for (int i = 0; i < fieldRefs.size(); i++) {
82
      Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
90
      Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
[+20] [20] 14 lines
[+20] public static void AddRowToBatch(Object row, StructObjectInspector oi, int rowIndex, [+] public static void addRowToBatch(Object row, StructObjectInspector oi, int rowIndex,
97
        if (writableCol != null) {
105
        if (writableCol != null) {
98
          lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
106
          lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
99
          lcv.isNull[rowIndex] = false;
107
          lcv.isNull[rowIndex] = false;
100
        } else {
108
        } else {
101
          lcv.vector[rowIndex] = 1;
109
          lcv.vector[rowIndex] = 1;
102
          SetNullColIsNullValue(lcv, rowIndex);
110
          setNullColIsNullValue(lcv, rowIndex);
103
        }
111
        }
104
      }
112
      }
105
        break;
113
        break;
106
      case BYTE: {
114
      case BYTE: {
107
        LongColumnVector lcv = (LongColumnVector) batch.cols[i];
115
        LongColumnVector lcv = (LongColumnVector) batch.cols[i];
108
        if (writableCol != null) {
116
        if (writableCol != null) {
109
          lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
117
          lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
110
          lcv.isNull[rowIndex] = false;
118
          lcv.isNull[rowIndex] = false;
111
        } else {
119
        } else {
112
          lcv.vector[rowIndex] = 1;
120
          lcv.vector[rowIndex] = 1;
113
          SetNullColIsNullValue(lcv, rowIndex);
121
          setNullColIsNullValue(lcv, rowIndex);
114
        }
122
        }
115
      }
123
      }
116
        break;
124
        break;
117
      case SHORT: {
125
      case SHORT: {
118
        LongColumnVector lcv = (LongColumnVector) batch.cols[i];
126
        LongColumnVector lcv = (LongColumnVector) batch.cols[i];
119
        if (writableCol != null) {
127
        if (writableCol != null) {
120
          lcv.vector[rowIndex] = ((ShortWritable) writableCol).get();
128
          lcv.vector[rowIndex] = ((ShortWritable) writableCol).get();
121
          lcv.isNull[rowIndex] = false;
129
          lcv.isNull[rowIndex] = false;
122
        } else {
130
        } else {
123
          lcv.vector[rowIndex] = 1;
131
          lcv.vector[rowIndex] = 1;
124
          SetNullColIsNullValue(lcv, rowIndex);
132
          setNullColIsNullValue(lcv, rowIndex);
125
        }
133
        }
126
      }
134
      }
127
        break;
135
        break;
128
      case INT: {
136
      case INT: {
129
        LongColumnVector lcv = (LongColumnVector) batch.cols[i];
137
        LongColumnVector lcv = (LongColumnVector) batch.cols[i];
130
        if (writableCol != null) {
138
        if (writableCol != null) {
131
          lcv.vector[rowIndex] = ((IntWritable) writableCol).get();
139
          lcv.vector[rowIndex] = ((IntWritable) writableCol).get();
132
          lcv.isNull[rowIndex] = false;
140
          lcv.isNull[rowIndex] = false;
133
        } else {
141
        } else {
134
          lcv.vector[rowIndex] = 1;
142
          lcv.vector[rowIndex] = 1;
135
          SetNullColIsNullValue(lcv, rowIndex);
143
          setNullColIsNullValue(lcv, rowIndex);
136
        }
144
        }
137
      }
145
      }
138
        break;
146
        break;
139
      case LONG: {
147
      case LONG: {
140
        LongColumnVector lcv = (LongColumnVector) batch.cols[i];
148
        LongColumnVector lcv = (LongColumnVector) batch.cols[i];
141
        if (writableCol != null) {
149
        if (writableCol != null) {
142
          lcv.vector[rowIndex] = ((LongWritable) writableCol).get();
150
          lcv.vector[rowIndex] = ((LongWritable) writableCol).get();
143
          lcv.isNull[rowIndex] = false;
151
          lcv.isNull[rowIndex] = false;
144
        } else {
152
        } else {
145
          lcv.vector[rowIndex] = 1;
153
          lcv.vector[rowIndex] = 1;
146
          SetNullColIsNullValue(lcv, rowIndex);
154
          setNullColIsNullValue(lcv, rowIndex);
147
        }
155
        }
148
      }
156
      }
149
        break;
157
        break;
150
      case FLOAT: {
158
      case FLOAT: {
151
        DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[i];
159
        DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[i];
152
        if (writableCol != null) {
160
        if (writableCol != null) {
153
          dcv.vector[rowIndex] = ((FloatWritable) writableCol).get();
161
          dcv.vector[rowIndex] = ((FloatWritable) writableCol).get();
154
          dcv.isNull[rowIndex] = false;
162
          dcv.isNull[rowIndex] = false;
155
        } else {
163
        } else {
156
          dcv.vector[rowIndex] = Double.NaN;
164
          dcv.vector[rowIndex] = Double.NaN;
157
          SetNullColIsNullValue(dcv, rowIndex);
165
          setNullColIsNullValue(dcv, rowIndex);
158
        }
166
        }
159
      }
167
      }
160
        break;
168
        break;
161
      case DOUBLE: {
169
      case DOUBLE: {
162
        DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[i];
170
        DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[i];
163
        if (writableCol != null) {
171
        if (writableCol != null) {
164
          dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get();
172
          dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get();
165
          dcv.isNull[rowIndex] = false;
173
          dcv.isNull[rowIndex] = false;
166
        } else {
174
        } else {
167
          dcv.vector[rowIndex] = Double.NaN;
175
          dcv.vector[rowIndex] = Double.NaN;
168
          SetNullColIsNullValue(dcv, rowIndex);
176
          setNullColIsNullValue(dcv, rowIndex);
169
        }
177
        }
170
      }
178
      }
171
        break;
179
        break;
172
      case TIMESTAMP: {
180
      case TIMESTAMP: {
173
        LongColumnVector lcv = (LongColumnVector) batch.cols[i];
181
        LongColumnVector lcv = (LongColumnVector) batch.cols[i];
174
        if (writableCol != null) {
182
        if (writableCol != null) {
175
          Timestamp t = ((TimestampWritable) writableCol).getTimestamp();
183
          Timestamp t = ((TimestampWritable) writableCol).getTimestamp();
176
          lcv.vector[rowIndex] = TimestampUtils.getTimeNanoSec(t);
184
          lcv.vector[rowIndex] = TimestampUtils.getTimeNanoSec(t);
177
          lcv.isNull[rowIndex] = false;
185
          lcv.isNull[rowIndex] = false;
178
        } else {
186
        } else {
179
          lcv.vector[rowIndex] = 1;
187
          lcv.vector[rowIndex] = 1;
180
          SetNullColIsNullValue(lcv, rowIndex);
188
          setNullColIsNullValue(lcv, rowIndex);
181
        }
189
        }
182
      }
190
      }
183
        break;
191
        break;
184
      case STRING: {
192
      case STRING: {
185
        BytesColumnVector bcv = (BytesColumnVector) batch.cols[i];
193
        BytesColumnVector bcv = (BytesColumnVector) batch.cols[i];
186
        if (writableCol != null) {
194
        if (writableCol != null) {
187
          bcv.isNull[rowIndex] = false;
195
          bcv.isNull[rowIndex] = false;
188
          Text colText = (Text) writableCol;
196
          Text colText = (Text) writableCol;
189
          bcv.setRef(rowIndex, colText.getBytes(), 0, colText.getLength());
197
          bcv.setRef(rowIndex, colText.getBytes(), 0, colText.getLength());
190
        } else {
198
        } else {
191
          SetNullColIsNullValue(bcv, rowIndex);
199
          setNullColIsNullValue(bcv, rowIndex);
192
        }
200
        }
193
      }
201
      }
194
        break;
202
        break;
195
      default:
203
      default:
196
        throw new HiveException("Vectorizaton is not supported for datatype:"
204
        throw new HiveException("Vectorizaton is not supported for datatype:"
197
            + poi.getPrimitiveCategory());
205
            + poi.getPrimitiveCategory());
198
      }
206
      }
199
    }
207
    }
200
  }
208
  }
201

    
   
209

   

    
   
210
  /**

    
   
211
   * Serializes a vector field(column data) into byte stream.

    
   
212
   * @param fieldOI Field object inspector

    
   
213
   * @param currentColVector ColumnVector that contains the field to serialize

    
   
214
   * @param rowIndex index of row in currentColVector that contains the field data

    
   
215
   * @param out byte steam to which the field is serialized

    
   
216
   * @param serdeParams Serde params used to serialize the field

    
   
217
   * @throws IOException

    
   
218
   * @throws SerDeException

    
   
219
   */

    
   
220
  public static void serializeVectorField(ObjectInspector fieldOI, ColumnVector currentColVector, int rowIndex,

    
   
221
      ByteStream.Output out, SerDeParameters serdeParams) throws IOException, SerDeException {

    
   
222

   

    
   
223
    switch (fieldOI.getCategory()) {

    
   
224
    case PRIMITIVE: {

    
   
225
      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) fieldOI;

    
   
226
      if (!currentColVector.noNulls

    
   
227
          && (currentColVector.isRepeating || currentColVector.isNull[rowIndex])) {

    
   
228
        // The column is null hence write null value

    
   
229
        out.write(serdeParams.getNullSequence().getBytes(), 0, serdeParams

    
   
230
            .getNullSequence().getLength());

    
   
231
      } else {

    
   
232
        // If here then the vector value is not null.

    
   
233
        if (currentColVector.isRepeating) {

    
   
234
          // If the vector has repeating values then set rowindex to zero

    
   
235
          rowIndex = 0;

    
   
236
        }

    
   
237

   

    
   
238
        switch (poi.getPrimitiveCategory()) {

    
   
239
        case BOOLEAN: {

    
   
240
          LongColumnVector lcv = (LongColumnVector) currentColVector;

    
   
241
          // In vectorization true is stored as 1 and false as 0

    
   
242
          boolean b = lcv.vector[rowIndex] == 1 ? true : false;

    
   
243
          if (b) {

    
   
244
            out.write(LazyUtils.trueBytes, 0, LazyUtils.trueBytes.length);

    
   
245
          } else {

    
   
246
            out.write(LazyUtils.trueBytes, 0, LazyUtils.trueBytes.length);

    
   
247
          }

    
   
248
        }

    
   
249
          break;

    
   
250
        case BYTE:

    
   
251
        case SHORT:

    
   
252
        case INT:

    
   
253
        case LONG:

    
   
254
          LongColumnVector lcv = (LongColumnVector) currentColVector;

    
   
255
          LazyLong.writeUTF8(out, lcv.vector[rowIndex]);

    
   
256
          break;

    
   
257
        case FLOAT:

    
   
258
        case DOUBLE:

    
   
259
          DoubleColumnVector dcv = (DoubleColumnVector) currentColVector;

    
   
260
          ByteBuffer b = Text.encode(String.valueOf(dcv.vector[rowIndex]));

    
   
261
          out.write(b.array(), 0, b.limit());

    
   
262
          break;

    
   
263
        case STRING:

    
   
264
          BytesColumnVector bcv = (BytesColumnVector) currentColVector;

    
   
265
          LazyUtils.writeEscaped(out, bcv.vector[rowIndex],

    
   
266
              bcv.start[rowIndex],

    
   
267
              bcv.length[rowIndex],

    
   
268
              serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams

    
   
269
                  .getNeedsEscape());

    
   
270
          break;

    
   
271
        case TIMESTAMP:

    
   
272
          LongColumnVector tcv = (LongColumnVector) currentColVector;

    
   
273
          long timeInNanoSec = tcv.vector[rowIndex];

    
   
274
          Timestamp t = new Timestamp(0);

    
   
275
          TimestampUtils.assignTimeInNanoSec(timeInNanoSec, t);

    
   
276
          TimestampWritable tw = new TimestampWritable();

    
   
277
          tw.set(t);

    
   
278
          LazyTimestamp.writeUTF8(out, tw);

    
   
279
          break;

    
   
280
        default:

    
   
281
          throw new UnsupportedOperationException(

    
   
282
              "Vectorizaton is not supported for datatype:"

    
   
283
                  + poi.getPrimitiveCategory());

    
   
284
        }

    
   
285
      }

    
   
286
      break;

    
   
287
    }

    
   
288
    case LIST:

    
   
289
    case MAP:

    
   
290
    case STRUCT:

    
   
291
    case UNION:

    
   
292
      throw new UnsupportedOperationException("Vectorizaton is not supported for datatype:"

    
   
293
          + fieldOI.getCategory());

    
   
294
    default:

    
   
295
      throw new SerDeException("Unknown ObjectInspector category!");

    
   
296
    }

    
   
297
  }
202
}
298
}
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java
Revision 69553d9 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedLazySimpleSerDe.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
Revision 5018ea1 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/io/CommonRCFileInputFormat.java
Revision 4bfeb20 New Change
 
ql/src/java/org/apache/hadoop/hive/ql/io/CommonTextFileInputFormat.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedLineRecordReader.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileRecordReader.java
Revision 25b3aed New Change
 
ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedTextInputFormat.java
New File
 
ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
Revision 2c20987 New Change
 
ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java
Revision 78ebb17 New Change
 
serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java
Revision d6b31a6 New Change
 
  1. ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java: Loading...
  2. ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java: Loading...
  3. ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedLazySimpleSerDe.java: Loading...
  4. ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java: Loading...
  5. ql/src/java/org/apache/hadoop/hive/ql/io/CommonRCFileInputFormat.java: Loading...
  6. ql/src/java/org/apache/hadoop/hive/ql/io/CommonTextFileInputFormat.java: Loading...
  7. ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedLineRecordReader.java: Loading...
  8. ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileRecordReader.java: Loading...
  9. ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedTextInputFormat.java: Loading...
  10. ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java: Loading...
  11. ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java: Loading...
  12. serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java: Loading...