Review Board 1.7.22


Hive-3159 Update AvroSerde to determine schema of new tables

Review Request #11925 - Created June 18, 2013 and updated

Mohammad Islam
trunk
HIVE-3159
Reviewers
hive
ashutoshc, jghoman
hive-git
Problem:
Hive doesn't support to create a Avro-based table using HQL create table command. It currently requires to specify Avro schema literal or schema file name.
For multiple cases, it is very inconvenient for user.
Some of the un-supported use cases:
1. Create table ... <Avro-SERDE etc.> as SELECT ... from <NON-AVRO FILE>
2. Create table ... <Avro-SERDE etc.> as SELECT from <AVRO TABLE>
3. Create  table  without specifying Avro schema.

Wrote a new java Test class for a new Java class. Added a new test case into existing java test class. In addition, there are 4 .q file for testing multiple use-cases.
ql/src/test/queries/clientpositive/avro_create_as_select.q
New File

    
   
1
-- verify two concepts:

    
   
2
-- 1. Can copy AVRO table into another AVRO table using CTAS.

    
   
3
-- Also Read data from new table using SELECT.

    
   
4
-- 2. Can copy NON-AVRO table structure into AVRO table using CTAS.

    
   
5
-- [Known Bug:  Can't read data from new table using SELECT - HIVE-5803]

    
   
6
-- But it can load new data into AVRO table and query using SELECT

    
   
7

   

    
   
8
CREATE TABLE doctors

    
   
9
ROW FORMAT

    
   
10
SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'

    
   
11
STORED AS

    
   
12
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'

    
   
13
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'

    
   
14
TBLPROPERTIES ('avro.schema.literal'='{

    
   
15
  "namespace": "testing.hive.avro.serde",

    
   
16
  "name": "doctors",

    
   
17
  "type": "record",

    
   
18
  "fields": [

    
   
19
    {

    
   
20
      "name":"number",

    
   
21
      "type":"int",

    
   
22
      "doc":"Order of playing the role"

    
   
23
    },

    
   
24
    {

    
   
25
      "name":"first_name",

    
   
26
      "type":"string",

    
   
27
      "doc":"first name of actor playing role"

    
   
28
    },

    
   
29
    {

    
   
30
      "name":"last_name",

    
   
31
      "type":"string",

    
   
32
      "doc":"last name of actor playing role"

    
   
33
    }

    
   
34
  ]

    
   
35
}');

    
   
36

   

    
   
37
DESCRIBE doctors;

    
   
38

   

    
   
39
LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors;

    
   
40

   

    
   
41
SELECT * FROM doctors ORDER BY number;

    
   
42

   

    
   
43
CREATE TABLE copy_doctors as SELECT * FROM doctors;

    
   
44
DESCRIBE copy_doctors;

    
   
45

   

    
   
46
SELECT * FROM copy_doctors ORDER BY number;

    
   
47

   

    
   
48
-- NON-AVRO table to AVRO Table

    
   
49

   

    
   
50
CREATE TABLE non_avro_doctors(number int, first_name STRING, last_name STRING) STORED AS TEXTFILE;

    
   
51
DESCRIBE non_avro_doctors;

    
   
52

   

    
   
53
CREATE TABLE avro_doctors

    
   
54
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'

    
   
55
STORED AS

    
   
56
   INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'

    
   
57
   OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'

    
   
58
AS SELECT * FROM non_avro_doctors;

    
   
59
DESCRIBE avro_doctors;

    
   
60

   

    
   
61
LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE avro_doctors;

    
   
62

   

    
   
63
SELECT * FROM avro_doctors ORDER BY number;

    
   
64

   

    
   
65

   

    
   
66

   
ql/src/test/queries/clientpositive/avro_nested_complex.q
New File
 
ql/src/test/queries/clientpositive/avro_nullable_fields.q
Revision f90ceb9 New Change
 
ql/src/test/queries/clientpositive/avro_without_schema.q
New File
 
ql/src/test/results/clientpositive/avro_create_as_select.q.out
New File
 
ql/src/test/results/clientpositive/avro_nested_complex.q.out
New File
 
ql/src/test/results/clientpositive/avro_nullable_fields.q.out
Revision 81950b0 New Change
 
ql/src/test/results/clientpositive/avro_without_schema.q.out
New File
 
serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
Revision 9d58d13 New Change
 
serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java
New File
 
serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerdeUtils.java
Revision 67d5570 New Change
 
serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java
New File
 
  1. ql/src/test/queries/clientpositive/avro_create_as_select.q: Loading...
  2. ql/src/test/queries/clientpositive/avro_nested_complex.q: Loading...
  3. ql/src/test/queries/clientpositive/avro_nullable_fields.q: Loading...
  4. ql/src/test/queries/clientpositive/avro_without_schema.q: Loading...
  5. ql/src/test/results/clientpositive/avro_create_as_select.q.out: Loading...
  6. ql/src/test/results/clientpositive/avro_nested_complex.q.out: Loading...
  7. ql/src/test/results/clientpositive/avro_nullable_fields.q.out: Loading...
  8. ql/src/test/results/clientpositive/avro_without_schema.q.out: Loading...
  9. serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java: Loading...
  10. serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java: Loading...
  11. serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerdeUtils.java: Loading...
  12. serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java: Loading...