Review Board 1.7.22


Review of SQOOP-435

Review Request #14779 - Created Oct. 20, 2013 and updated

James Anderson
SQOOP-435
Reviewers
Sqoop
sqoop-trunk
Automatically generate an Avro schema file (*.avsc) in the JAR output directory when --as-avrodatafile is specified.
- Compared contents of *.avsc file against output from avro-tools getschema operation.
- Created an external Hive table with AvroSerDe, pointing avro.schema.url to a copy of the schema indexed in ElasticSearch.

Diff revision 2 (Latest)

1 2
1 2

  1. src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java: Loading...
  2. src/test/com/cloudera/sqoop/TestAvroImport.java: Loading...
src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java
Revision 5afd90c New Change
[20] 15 lines
[+20]
16
 * limitations under the License.
16
 * limitations under the License.
17
 */
17
 */
18

    
   
18

   
19
package org.apache.sqoop.mapreduce;
19
package org.apache.sqoop.mapreduce;
20

    
   
20

   

    
   
21
import java.io.File;
21
import java.io.IOException;
22
import java.io.IOException;
22
import java.sql.SQLException;
23
import java.sql.SQLException;

    
   
24

   
23
import org.apache.avro.Schema;
25
import org.apache.avro.Schema;

    
   
26
import org.apache.commons.io.FileUtils;
24
import org.apache.commons.logging.Log;
27
import org.apache.commons.logging.Log;
25
import org.apache.commons.logging.LogFactory;
28
import org.apache.commons.logging.LogFactory;
26
import org.apache.hadoop.io.LongWritable;
29
import org.apache.hadoop.io.LongWritable;
27
import org.apache.hadoop.io.NullWritable;
30
import org.apache.hadoop.io.NullWritable;
28
import org.apache.hadoop.io.Text;
31
import org.apache.hadoop.io.Text;
29
import org.apache.hadoop.mapreduce.InputFormat;
32
import org.apache.hadoop.mapreduce.InputFormat;
30
import org.apache.hadoop.mapreduce.Job;
33
import org.apache.hadoop.mapreduce.Job;
31
import org.apache.hadoop.mapreduce.Mapper;
34
import org.apache.hadoop.mapreduce.Mapper;
32
import org.apache.hadoop.mapreduce.OutputFormat;
35
import org.apache.hadoop.mapreduce.OutputFormat;
33
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
36
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
34
import org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities;
37
import org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities;

    
   
38

   
35
import com.cloudera.sqoop.SqoopOptions;
39
import com.cloudera.sqoop.SqoopOptions;
36
import com.cloudera.sqoop.config.ConfigurationHelper;
40
import com.cloudera.sqoop.config.ConfigurationHelper;
37
import com.cloudera.sqoop.lib.LargeObjectLoader;
41
import com.cloudera.sqoop.lib.LargeObjectLoader;
38
import com.cloudera.sqoop.manager.ConnManager;
42
import com.cloudera.sqoop.manager.ConnManager;
39
import com.cloudera.sqoop.manager.ImportJobContext;
43
import com.cloudera.sqoop.manager.ImportJobContext;
[+20] [20] 41 lines
[+20] [+] protected void configureMapper(Job job, String tableName,
81
        == SqoopOptions.FileLayout.AvroDataFile) {
85
        == SqoopOptions.FileLayout.AvroDataFile) {
82
      ConnManager connManager = getContext().getConnManager();
86
      ConnManager connManager = getContext().getConnManager();
83
      AvroSchemaGenerator generator = new AvroSchemaGenerator(options,
87
      AvroSchemaGenerator generator = new AvroSchemaGenerator(options,
84
          connManager, tableName);
88
          connManager, tableName);
85
      Schema schema = generator.generate();
89
      Schema schema = generator.generate();

    
   
90

   

    
   
91
      try {

    
   
92
        writeAvroSchema(schema);

    
   
93
      } catch (final IOException e) {

    
   
94
        LOG.error("Error while writing Avro schema.", e);

    
   
95
      }

    
   
96

   
86
      AvroJob.setMapOutputSchema(job.getConfiguration(), schema);
97
      AvroJob.setMapOutputSchema(job.getConfiguration(), schema);
87
    }
98
    }
88

    
   
99

   
89
    job.setMapperClass(getMapperClass());
100
    job.setMapperClass(getMapperClass());
90
  }
101
  }
91

    
   
102

   

    
   
103
  private void writeAvroSchema(final Schema schema) throws IOException {

    
   
104
    // Generate schema in JAR output directory.

    
   
105
    final File schemaFile = new File(options.getJarOutputDir(), schema.getName() + ".avsc");

    
   
106

   

    
   
107
    LOG.info("Writing Avro schema file: " + schemaFile);

    
   
108
    FileUtils.forceMkdir(schemaFile.getParentFile());

    
   
109
    FileUtils.writeStringToFile(schemaFile, schema.toString(true), null);

    
   
110

   

    
   
111
    // Copy schema to code output directory.

    
   
112
    try {

    
   
113
      FileUtils.moveFileToDirectory(schemaFile, new File(options.getCodeOutputDir()), true);

    
   
114
    } catch (final IOException e) {

    
   
115
      LOG.debug("Could not move Avro schema file to code output directory.", e);

    
   
116
    }

    
   
117
  }

    
   
118

   
92
  @Override
119
  @Override
93
  protected Class<? extends Mapper> getMapperClass() {
120
  protected Class<? extends Mapper> getMapperClass() {
94
    if (options.getHCatTableName() != null) {
121
    if (options.getHCatTableName() != null) {
95
      return SqoopHCatUtilities.getImportMapperClass();
122
      return SqoopHCatUtilities.getImportMapperClass();
96
    }
123
    }
[+20] [20] 146 lines
src/test/com/cloudera/sqoop/TestAvroImport.java
Revision 34a7d41 New Change
 
  1. src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java: Loading...
  2. src/test/com/cloudera/sqoop/TestAvroImport.java: Loading...