Review Board 1.7.22


SQOOP-931 - Integration of Sqoop and HCatalog

Review Request #10688 - Created April 21, 2013 and updated

Venkat Ranganathan
Reviewers
Sqoop
jarcec
sqoop-trunk
This patch implements the new feature of integrating HCatalog and Sqoop.   With this feature, it is possible to import and export data between Sqoop and HCatalog tables.   The document attached to SQOOP-931 JIRA issue discusses the high level appraches.  

With this integration, more fidelity can be brought to the process of moving data between enterprise data stores and hadoop ecosystem.
Two new integration test suites with more than 20 tests in total have been added to test various aspects of the integration.  A unit test to test the option management is also added.   All tests pass
build.xml
Revision 1c33fee New Change
[20] 48 lines
[+20]
49
    <then>
49
    <then>
50
      <property name="hadoop.version" value="0.20.2-cdh3u1" />
50
      <property name="hadoop.version" value="0.20.2-cdh3u1" />
51
      <property name="hbase.version" value="0.90.3-cdh3u1" />
51
      <property name="hbase.version" value="0.90.3-cdh3u1" />
52
      <property name="zookeeper.version" value="3.3.3-cdh3u1" />
52
      <property name="zookeeper.version" value="3.3.3-cdh3u1" />
53
      <property name="hadoop.version.full" value="0.20" />
53
      <property name="hadoop.version.full" value="0.20" />

    
   
54
      <property name="hcatalog.version" value="0.6.0-SNAPSHOT" />
54
    </then>
55
    </then>
55

    
   
56

   
56
    <elseif>
57
    <elseif>
57
      <equals arg1="${hadoopversion}" arg2="23" />
58
      <equals arg1="${hadoopversion}" arg2="23" />
58
      <then>
59
      <then>
59
        <property name="hadoop.version" value="0.23.1" />
60
        <property name="hadoop.version" value="0.23.1" />
60
        <property name="hbase.version" value="0.92.0" />
61
        <property name="hbase.version" value="0.92.0" />
61
        <property name="zookeeper.version" value="3.4.2" />
62
        <property name="zookeeper.version" value="3.4.2" />
62
        <property name="hadoop.version.full" value="0.23" />
63
        <property name="hadoop.version.full" value="0.23" />

    
   
64
        <property name="hcatalog.version" value="0.6.0-SNAPSHOT" />
63
      </then>
65
      </then>
64
    </elseif>
66
    </elseif>
65

    
   
67

   
66
    <elseif>
68
    <elseif>
67
      <equals arg1="${hadoopversion}" arg2="100" />
69
      <equals arg1="${hadoopversion}" arg2="100" />
68
      <then>
70
      <then>
69
        <property name="hadoop.version" value="1.0.0" />
71
        <property name="hadoop.version" value="1.0.0" />
70
        <property name="hbase.version" value="0.92.0" />
72
        <property name="hbase.version" value="0.92.0" />
71
        <property name="zookeeper.version" value="3.4.2" />
73
        <property name="zookeeper.version" value="3.4.2" />
72
        <property name="hadoop.version.full" value="1.0.0" />
74
        <property name="hadoop.version.full" value="1.0.0" />

    
   
75
        <property name="hcatalog.version" value="0.6.0-SNAPSHOT" />
73
      </then>
76
      </then>
74
    </elseif>
77
    </elseif>
75

    
   
78

   
76
    <elseif>
79
    <elseif>
77
      <equals arg1="${hadoopversion}" arg2="200" />
80
      <equals arg1="${hadoopversion}" arg2="200" />
78
      <then>
81
      <then>
79
        <property name="hadoop.version" value="2.0.0-alpha" />
82
        <property name="hadoop.version" value="2.0.0-alpha" />
80
        <property name="hbase.version" value="0.94.2" />
83
        <property name="hbase.version" value="0.94.2" />
81
        <property name="zookeeper.version" value="3.4.2" />
84
        <property name="zookeeper.version" value="3.4.2" />
82
        <property name="hadoop.version.full" value="2.0.0-alpha" />
85
        <property name="hadoop.version.full" value="2.0.0-alpha" />

    
   
86
        <property name="hcatalog.version" value="0.6.0-SNAPSHOT" />
83
      </then>
87
      </then>
84
    </elseif>
88
    </elseif>
85

    
   
89

   
86
    <else>
90
    <else>
87
      <fail message="Unrecognized hadoopversion. Can only be 20, 23, 100, or 200." />
91
      <fail message="Unrecognized hadoopversion. Can only be 20, 23, 100, or 200." />
[+20] [20] 510 lines
[+20]
598
        <include name="${bin.artifact.name}/**" />
602
        <include name="${bin.artifact.name}/**" />
599
      </tarfileset>
603
      </tarfileset>
600
      <tarfileset dir="${build.dir}" mode="755">
604
      <tarfileset dir="${build.dir}" mode="755">
601
        <include name="${bin.artifact.name}/bin/*" />
605
        <include name="${bin.artifact.name}/bin/*" />
602
        <include name="${bin.artifact.name}/testdata/hive/bin/*" />
606
        <include name="${bin.artifact.name}/testdata/hive/bin/*" />

    
   
607
        <include name="${bin.artifact.name}/testdata/hcatalog/conf/*" />
603
        <include name="${bin.artifact.name}/**/*.sh" />
608
        <include name="${bin.artifact.name}/**/*.sh" />
604
      </tarfileset>
609
      </tarfileset>
605
    </tar>
610
    </tar>
606
  </target>
611
  </target>
607

    
   
612

   
[+20] [20] 33 lines
[+20]
641
  <target name="srctar" depends="srcpackage" description="Create release source tarball">
646
  <target name="srctar" depends="srcpackage" description="Create release source tarball">
642
    <tar compression="gzip" longfile="gnu" destfile="${src.tar.file}">
647
    <tar compression="gzip" longfile="gnu" destfile="${src.tar.file}">
643
      <tarfileset dir="${build.dir}" mode="664">
648
      <tarfileset dir="${build.dir}" mode="664">
644
        <exclude name="${src.artifact.name}/bin/*" />
649
        <exclude name="${src.artifact.name}/bin/*" />
645
        <exclude name="${src.artifact.name}/testdata/hive/bin/*" />
650
        <exclude name="${src.artifact.name}/testdata/hive/bin/*" />

    
   
651
        <exclude name="${src.artifact.name}/testdata/hcatalog/conf/*" />
646
        <exclude name="${src.artifact.name}/**/*.sh" />
652
        <exclude name="${src.artifact.name}/**/*.sh" />
647
        <include name="${src.artifact.name}/**" />
653
        <include name="${src.artifact.name}/**" />
648
      </tarfileset>
654
      </tarfileset>
649
      <tarfileset dir="${build.dir}" mode="755">
655
      <tarfileset dir="${build.dir}" mode="755">
650
        <include name="${src.artifact.name}/bin/*" />
656
        <include name="${src.artifact.name}/bin/*" />
651
        <include name="${src.artifact.name}/testdata/hive/bin/*" />
657
        <include name="${src.artifact.name}/testdata/hive/bin/*" />

    
   
658
        <include name="${src.artifact.name}/testdata/hcatalog/conf/*" />
652
        <include name="${src.artifact.name}/**/*.sh" />
659
        <include name="${src.artifact.name}/**/*.sh" />
653
      </tarfileset>
660
      </tarfileset>
654
    </tar>
661
    </tar>
655
  </target>
662
  </target>
656

    
   
663

   
[+20] [20] 8 lines
[+20]
665
        <isset property="manual"/>
672
        <isset property="manual"/>
666
      </or>
673
      </or>
667
    </condition>
674
    </condition>
668
  </target>
675
  </target>
669

    
   
676

   

    
   
677

   

    
   
678

   
670
  <target name="test-prep-normal" unless="thirdparty_or_manual"
679
  <target name="test-prep-normal" unless="thirdparty_or_manual"
671
                                  depends="test-eval-condition">
680
                                  depends="test-eval-condition">
672
    <!-- Set this to run all the "standard" tests -->
681
    <!-- Set this to run all the "standard" tests -->
673
    <property name="test.pattern" value="Test*" />
682
    <property name="test.pattern" value="Test*" />
674
    <property name="cobertura.testset" value="base" />
683
    <property name="cobertura.testset" value="base" />

    
   
684
    <path id="hcatalog.conf.dir">

    
   
685
       <pathelement location="${basedir}/testdata/hcatalog/conf"/>

    
   
686
    </path>
675
  </target>
687
  </target>
676

    
   
688

   
677
  <target name="test-prep-thirdparty" if="thirdparty">
689
  <target name="test-prep-thirdparty" if="thirdparty">
678
    <!-- Run tests that *end* with the name Test, instead of starting with it;
690
    <!-- Run tests that *end* with the name Test, instead of starting with it;
679
         this runs non-standard tests e.g. third-party database tests. -->
691
         this runs non-standard tests e.g. third-party database tests. -->
[+20] [20] 30 lines
[+20]
710
    -->
722
    -->
711

    
   
723

   
712
    <delete dir="${test.log.dir}"/>
724
    <delete dir="${test.log.dir}"/>
713
    <mkdir dir="${test.log.dir}"/>
725
    <mkdir dir="${test.log.dir}"/>
714
    <delete dir="${build.test}/data"/>
726
    <delete dir="${build.test}/data"/>
715
    <mkdir dir="${build.test}/data" />
727
    <mkdir dir="${build.test}/data/sqoop" />
716
    <mkdir dir="${cobertura.class.dir}" />
728
    <mkdir dir="${cobertura.class.dir}" />
717
    <junit
729
    <junit
718
      printsummary="yes" showoutput="${test.output}"
730
      printsummary="yes" showoutput="${test.output}"
719
      haltonfailure="no" fork="yes" maxmemory="512m"
731
      haltonfailure="no" fork="yes" maxmemory="512m"
720
      errorProperty="tests.failed" failureProperty="tests.failed"
732
      errorProperty="tests.failed" failureProperty="tests.failed"
[+20] [20] 80 lines
[+20]
801
                   value="${java.security.krb5.realm}"/>
813
                   value="${java.security.krb5.realm}"/>
802

    
   
814

   
803
      <sysproperty key="java.security.krb5.kdc"
815
      <sysproperty key="java.security.krb5.kdc"
804
                   value="${java.security.krb5.kdc}"/>
816
                   value="${java.security.krb5.kdc}"/>
805

    
   
817

   

    
   
818
      <!-- Location of Hive logs -->

    
   
819
      <!--<sysproperty key="hive.log.dir"

    
   
820
                   value="${test.build.data}/sqoop/logs"/> -->

    
   
821

   
806
      <classpath>
822
      <classpath>
807
        <!-- instrumented classes go ahead of normal classes -->
823
        <!-- instrumented classes go ahead of normal classes -->
808
        <pathelement location="${cobertura.class.dir}" />
824
        <pathelement location="${cobertura.class.dir}" />
809

    
   
825

   

    
   
826
        <!-- Location of hive-site xml and other hadoop config files -->

    
   
827
        <path refid="hcatalog.conf.dir" />

    
   
828

   
810
        <!-- main classpath here. -->
829
        <!-- main classpath here. -->
811
        <path refid="test.classpath" />
830
        <path refid="test.classpath" />
812

    
   
831

   
813
        <!-- need thirdparty JDBC drivers for thirdparty tests -->
832
        <!-- need thirdparty JDBC drivers for thirdparty tests -->
814
        <fileset dir="${sqoop.thirdparty.lib.dir}"
833
        <fileset dir="${sqoop.thirdparty.lib.dir}"
[+20] [20] 462 lines
ivy.xml
Revision 1fa4dd1 New Change
 
ivy/ivysettings.xml
Revision c4cc561 New Change
 
src/java/org/apache/sqoop/SqoopOptions.java
Revision f18d43e New Change
 
src/java/org/apache/sqoop/config/ConfigurationConstants.java
Revision 5354063 New Change
 
src/java/org/apache/sqoop/hive/HiveImport.java
Revision 838f083 New Change
 
src/java/org/apache/sqoop/manager/ConnManager.java
Revision a1ac38e New Change
 
src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java
Revision ef1d363 New Change
 
src/java/org/apache/sqoop/mapreduce/ExportJobBase.java
Revision 1065d0b New Change
 
src/java/org/apache/sqoop/mapreduce/ImportJobBase.java
Revision 2465f3f New Change
 
src/java/org/apache/sqoop/mapreduce/JdbcExportJob.java
Revision 20636a0 New Change
 
src/java/org/apache/sqoop/mapreduce/JobBase.java
Revision 0df1156 New Change
 
src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatExportFormat.java
New File
 
src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatExportMapper.java
New File
 
src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatImportMapper.java
New File
 
src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatInputSplit.java
New File
 
src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatRecordReader.java
New File
 
src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java
New File
 
src/java/org/apache/sqoop/tool/BaseSqoopTool.java
Revision 9417d57 New Change
 
src/java/org/apache/sqoop/tool/CodeGenTool.java
Revision dd34a97 New Change
 
  1. build.xml: Loading...
  2. ivy.xml: Loading...
  3. ivy/ivysettings.xml: Loading...
  4. src/java/org/apache/sqoop/SqoopOptions.java: Loading...
  5. src/java/org/apache/sqoop/config/ConfigurationConstants.java: Loading...
  6. src/java/org/apache/sqoop/hive/HiveImport.java: Loading...
  7. src/java/org/apache/sqoop/manager/ConnManager.java: Loading...
  8. src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java: Loading...
  9. src/java/org/apache/sqoop/mapreduce/ExportJobBase.java: Loading...
  10. src/java/org/apache/sqoop/mapreduce/ImportJobBase.java: Loading...
  11. src/java/org/apache/sqoop/mapreduce/JdbcExportJob.java: Loading...
  12. src/java/org/apache/sqoop/mapreduce/JobBase.java: Loading...
  13. src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatExportFormat.java: Loading...
  14. src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatExportMapper.java: Loading...
  15. src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatImportMapper.java: Loading...
  16. src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatInputSplit.java: Loading...
  17. src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatRecordReader.java: Loading...
  18. src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java: Loading...
  19. src/java/org/apache/sqoop/tool/BaseSqoopTool.java: Loading...
  20. src/java/org/apache/sqoop/tool/CodeGenTool.java: Loading...
This diff has been split across 2 pages: 1 2 >