Review Board 1.7.22


PIG-3015 Rewrite of AvroStorage

Review Request #8104 - Created Nov. 17, 2012 and updated

Joseph Adler
PIG-3015
Reviewers
pig
cheolsoo
pig-git
The current AvroStorage implementation has a lot of issues: it requires old versions of Avro, it copies data much more than needed, and it's verbose and complicated. (One pet peeve of mine is that old versions of Avro don't support Snappy compression.)

I rewrote AvroStorage from scratch to fix these issues. In early tests, the new implementation is significantly faster, and the code is a lot simpler. Rewriting AvroStorage also enabled me to implement support for Trevni.

This is the latest version of the patch, complete with test cases and TrevniStorage. (Test cases for TrevniStorage are still missing).

 
ivy.xml
Revision 70e8d50 New Change
[20] 169 lines
[+20]
170
      conf="compile->master;checkstyle->master"/>
170
      conf="compile->master;checkstyle->master"/>
171
     <dependency org="org.slf4j" name="slf4j-log4j12" rev="${slf4j-log4j12.version}"
171
     <dependency org="org.slf4j" name="slf4j-log4j12" rev="${slf4j-log4j12.version}"
172
      conf="compile->master;test->master"/>
172
      conf="compile->master;test->master"/>
173
    <dependency org="commons-cli" name="commons-cli" rev="${commons-cli.version}"
173
    <dependency org="commons-cli" name="commons-cli" rev="${commons-cli.version}"
174
      conf="compile->master;checkstyle->master"/>
174
      conf="compile->master;checkstyle->master"/>
175

    
   

   
176
    <dependency org="org.apache.avro" name="avro" rev="${avro.version}"
175
    <dependency org="org.apache.avro" name="avro" rev="${avro.version}"
177
      conf="compile->default;checkstyle->master"/>
176
      conf="compile->default;checkstyle->master"/>

    
   
177
    <dependency org="org.apache.avro" name="avro-mapred" rev="${avro.version}"

    
   
178
      conf="compile->default;checkstyle->master"/>

    
   
179
    <dependency org="org.apache.avro" name="trevni-core" rev="${avro.version}"

    
   
180
      conf="compile->default;checkstyle->master"/>

    
   
181
    <dependency org="org.apache.avro" name="trevni-avro" rev="${avro.version}"

    
   
182
      conf="compile->default;checkstyle->master">

    
   
183
      <exclude org="org.apache.hadoop" module="hadoop-core"/>

    
   
184
    </dependency>

    
   
185
    <dependency org="org.apache.avro" name="avro-tools" rev="${avro.version}"

    
   
186
      conf="test->default">

    
   
187
      <artifact name="nodeps" type="jar"/>

    
   
188
    </dependency>
178
    <dependency org="com.googlecode.json-simple" name="json-simple" rev="${json-simple.version}"
189
    <dependency org="com.googlecode.json-simple" name="json-simple" rev="${json-simple.version}"
179
      conf="compile->master;checkstyle->master"/>
190
      conf="compile->master;checkstyle->master"/>
180

    
   

   
181
    <dependency org="jdiff" name="jdiff" rev="${jdiff.version}"
191
    <dependency org="jdiff" name="jdiff" rev="${jdiff.version}"
182
      conf="jdiff->default"/>
192
      conf="jdiff->default"/>
183
    <dependency org="xalan" name="xalan" rev="${xalan.version}"
193
    <dependency org="xalan" name="xalan" rev="${xalan.version}"
184
      conf="test->default"/>
194
      conf="test->default"/>
185
    <dependency org="xerces" name="xercesImpl" rev="${xerces.version}"
195
    <dependency org="xerces" name="xercesImpl" rev="${xerces.version}"
[+20] [20] 79 lines
.eclipse.templates/.classpath
Revision c7b83b8 New Change
 
ivy/libraries.properties
Revision 7b07c7e New Change
 
src/org/apache/pig/builtin/AvroStorage.java
New File
 
src/org/apache/pig/builtin/TrevniStorage.java
New File
 
src/org/apache/pig/impl/util/avro/AvroArrayReader.java
New File
 
src/org/apache/pig/impl/util/avro/AvroBagWrapper.java
New File
 
src/org/apache/pig/impl/util/avro/AvroMapWrapper.java
New File
 
src/org/apache/pig/impl/util/avro/AvroRecordReader.java
New File
 
src/org/apache/pig/impl/util/avro/AvroRecordWriter.java
New File
 
src/org/apache/pig/impl/util/avro/AvroStorageDataConversionUtilities.java
New File
 
src/org/apache/pig/impl/util/avro/AvroStorageSchemaConversionUtilities.java
New File
 
src/org/apache/pig/impl/util/avro/AvroTupleWrapper.java
New File
 
test/commit-tests
Revision 5081fbc New Change
 
test/unit-tests
Revision 7cede06 New Change
 
test/org/apache/pig/builtin/TestAvroStorage.java
New File
 
test/org/apache/pig/builtin/avro/code/pig/directory_test.pig
New File
 
test/org/apache/pig/builtin/avro/code/pig/identity.pig
New File
 
test/org/apache/pig/builtin/avro/code/pig/identity_ai1_ao2.pig
New File
 
test/org/apache/pig/builtin/avro/code/pig/identity_ao2.pig
New File
 
  1. ivy.xml: Loading...
  2. .eclipse.templates/.classpath: Loading...
  3. ivy/libraries.properties: Loading...
  4. src/org/apache/pig/builtin/AvroStorage.java: Loading...
  5. src/org/apache/pig/builtin/TrevniStorage.java: Loading...
  6. src/org/apache/pig/impl/util/avro/AvroArrayReader.java: Loading...
  7. src/org/apache/pig/impl/util/avro/AvroBagWrapper.java: Loading...
  8. src/org/apache/pig/impl/util/avro/AvroMapWrapper.java: Loading...
  9. src/org/apache/pig/impl/util/avro/AvroRecordReader.java: Loading...
  10. src/org/apache/pig/impl/util/avro/AvroRecordWriter.java: Loading...
  11. src/org/apache/pig/impl/util/avro/AvroStorageDataConversionUtilities.java: Loading...
  12. src/org/apache/pig/impl/util/avro/AvroStorageSchemaConversionUtilities.java: Loading...
  13. src/org/apache/pig/impl/util/avro/AvroTupleWrapper.java: Loading...
  14. test/commit-tests: Loading...
  15. test/unit-tests: Loading...
  16. test/org/apache/pig/builtin/TestAvroStorage.java: Loading...
  17. test/org/apache/pig/builtin/avro/code/pig/directory_test.pig: Loading...
  18. test/org/apache/pig/builtin/avro/code/pig/identity.pig: Loading...
  19. test/org/apache/pig/builtin/avro/code/pig/identity_ai1_ao2.pig: Loading...
  20. test/org/apache/pig/builtin/avro/code/pig/identity_ao2.pig: Loading...
This diff has been split across 3 pages: 1 2 3 >