Review Board 1.7.22


SQOOP-931 - Integration of Sqoop and HCatalog

Review Request #10688 - Created April 21, 2013 and updated

Venkat Ranganathan
Reviewers
Sqoop
jarcec
sqoop-trunk
This patch implements the new feature of integrating HCatalog and Sqoop.   With this feature, it is possible to import and export data between Sqoop and HCatalog tables.   The document attached to SQOOP-931 JIRA issue discusses the high level appraches.  

With this integration, more fidelity can be brought to the process of moving data between enterprise data stores and hadoop ecosystem.
Two new integration test suites with more than 20 tests in total have been added to test various aspects of the integration.  A unit test to test the option management is also added.   All tests pass
build.xml
Revision 636c103 New Change
[20] 48 lines
[+20]
49
    <then>
49
    <then>
50
      <property name="hadoop.version" value="0.20.2-cdh3u1" />
50
      <property name="hadoop.version" value="0.20.2-cdh3u1" />
51
      <property name="hbase.version" value="0.90.3-cdh3u1" />
51
      <property name="hbase.version" value="0.90.3-cdh3u1" />
52
      <property name="zookeeper.version" value="3.3.3-cdh3u1" />
52
      <property name="zookeeper.version" value="3.3.3-cdh3u1" />
53
      <property name="hadoop.version.full" value="0.20" />
53
      <property name="hadoop.version.full" value="0.20" />

    
   
54
      <property name="hcatalog.version" value="0.11.0" />
54
    </then>
55
    </then>
55

    
   
56

   
56
    <elseif>
57
    <elseif>
57
      <equals arg1="${hadoopversion}" arg2="23" />
58
      <equals arg1="${hadoopversion}" arg2="23" />
58
      <then>
59
      <then>
59
        <property name="hadoop.version" value="0.23.1" />
60
        <property name="hadoop.version" value="0.23.1" />
60
        <property name="hbase.version" value="0.92.0" />
61
        <property name="hbase.version" value="0.92.0" />
61
        <property name="zookeeper.version" value="3.4.2" />
62
        <property name="zookeeper.version" value="3.4.2" />
62
        <property name="hadoop.version.full" value="0.23" />
63
        <property name="hadoop.version.full" value="0.23" />

    
   
64
        <property name="hcatalog.version" value="0.11.0" />
63
      </then>
65
      </then>
64
    </elseif>
66
    </elseif>
65

    
   
67

   
66
    <elseif>
68
    <elseif>
67
      <equals arg1="${hadoopversion}" arg2="100" />
69
      <equals arg1="${hadoopversion}" arg2="100" />
68
      <then>
70
      <then>
69
        <property name="hadoop.version" value="1.0.0" />
71
        <property name="hadoop.version" value="1.0.0" />
70
        <property name="hbase.version" value="0.92.0" />
72
        <property name="hbase.version" value="0.92.0" />
71
        <property name="zookeeper.version" value="3.4.2" />
73
        <property name="zookeeper.version" value="3.4.2" />
72
        <property name="hadoop.version.full" value="1.0.0" />
74
        <property name="hadoop.version.full" value="1.0.0" />

    
   
75
        <property name="hcatalog.version" value="0.11.0" />
73
      </then>
76
      </then>
74
    </elseif>
77
    </elseif>
75

    
   
78

   
76
    <elseif>
79
    <elseif>
77
      <equals arg1="${hadoopversion}" arg2="200" />
80
      <equals arg1="${hadoopversion}" arg2="200" />
78
      <then>
81
      <then>
79
        <property name="hadoop.version" value="2.0.4-alpha" />
82
        <property name="hadoop.version" value="2.0.4-alpha" />
80
        <property name="hbase.version" value="0.94.2" />
83
        <property name="hbase.version" value="0.94.2" />
81
        <property name="zookeeper.version" value="3.4.2" />
84
        <property name="zookeeper.version" value="3.4.2" />
82
        <property name="hadoop.version.full" value="2.0.4-alpha" />
85
        <property name="hadoop.version.full" value="2.0.4-alpha" />

    
   
86
        <property name="hcatalog.version" value="0.11.0" />
83
      </then>
87
      </then>
84
    </elseif>
88
    </elseif>
85

    
   
89

   
86
    <else>
90
    <else>
87
      <fail message="Unrecognized hadoopversion. Can only be 20, 23, 100 or 200." />
91
      <fail message="Unrecognized hadoopversion. Can only be 20, 23, 100 or 200." />
[+20] [20] 510 lines
[+20]
598
        <include name="${bin.artifact.name}/**" />
602
        <include name="${bin.artifact.name}/**" />
599
      </tarfileset>
603
      </tarfileset>
600
      <tarfileset dir="${build.dir}" mode="755">
604
      <tarfileset dir="${build.dir}" mode="755">
601
        <include name="${bin.artifact.name}/bin/*" />
605
        <include name="${bin.artifact.name}/bin/*" />
602
        <include name="${bin.artifact.name}/testdata/hive/bin/*" />
606
        <include name="${bin.artifact.name}/testdata/hive/bin/*" />

    
   
607
        <include name="${bin.artifact.name}/testdata/hcatalog/conf/*" />
603
        <include name="${bin.artifact.name}/**/*.sh" />
608
        <include name="${bin.artifact.name}/**/*.sh" />
604
      </tarfileset>
609
      </tarfileset>
605
    </tar>
610
    </tar>
606
  </target>
611
  </target>
607

    
   
612

   
[+20] [20] 33 lines
[+20]
641
  <target name="srctar" depends="srcpackage" description="Create release source tarball">
646
  <target name="srctar" depends="srcpackage" description="Create release source tarball">
642
    <tar compression="gzip" longfile="gnu" destfile="${src.tar.file}">
647
    <tar compression="gzip" longfile="gnu" destfile="${src.tar.file}">
643
      <tarfileset dir="${build.dir}" mode="664">
648
      <tarfileset dir="${build.dir}" mode="664">
644
        <exclude name="${src.artifact.name}/bin/*" />
649
        <exclude name="${src.artifact.name}/bin/*" />
645
        <exclude name="${src.artifact.name}/testdata/hive/bin/*" />
650
        <exclude name="${src.artifact.name}/testdata/hive/bin/*" />

    
   
651
        <exclude name="${src.artifact.name}/testdata/hcatalog/conf/*" />
646
        <exclude name="${src.artifact.name}/**/*.sh" />
652
        <exclude name="${src.artifact.name}/**/*.sh" />
647
        <include name="${src.artifact.name}/**" />
653
        <include name="${src.artifact.name}/**" />
648
      </tarfileset>
654
      </tarfileset>
649
      <tarfileset dir="${build.dir}" mode="755">
655
      <tarfileset dir="${build.dir}" mode="755">
650
        <include name="${src.artifact.name}/bin/*" />
656
        <include name="${src.artifact.name}/bin/*" />
651
        <include name="${src.artifact.name}/testdata/hive/bin/*" />
657
        <include name="${src.artifact.name}/testdata/hive/bin/*" />

    
   
658
        <include name="${src.artifact.name}/testdata/hcatalog/conf/*" />
652
        <include name="${src.artifact.name}/**/*.sh" />
659
        <include name="${src.artifact.name}/**/*.sh" />
653
      </tarfileset>
660
      </tarfileset>
654
    </tar>
661
    </tar>
655
  </target>
662
  </target>
656

    
   
663

   
657
  <!-- set variables that configure the actual test -->
664
  <!-- set variables that configure the actual test -->
658
  <target name="test-prep" depends="test-prep-normal,test-prep-thirdparty,
665
  <target name="test-prep" depends="test-prep-normal,test-prep-thirdparty,
659
                                    test-prep-manual"/>
666
                                    test-prep-manual"/>
660

    
   
667

   

    
   
668
  <path id="hcatalog.conf.dir">

    
   
669
     <pathelement location="${basedir}/testdata/hcatalog/conf"/>

    
   
670
  </path>
661
  <target name="test-eval-condition">
671
  <target name="test-eval-condition">
662
    <condition property="thirdparty_or_manual">
672
    <condition property="thirdparty_or_manual">
663
      <or>
673
      <or>
664
        <isset property="thirdparty"/>
674
        <isset property="thirdparty"/>
665
        <isset property="manual"/>
675
        <isset property="manual"/>
666
      </or>
676
      </or>
667
    </condition>
677
    </condition>
668
  </target>
678
  </target>
669

    
   
679

   

    
   
680

   

    
   
681

   
670
  <target name="test-prep-normal" unless="thirdparty_or_manual"
682
  <target name="test-prep-normal" unless="thirdparty_or_manual"
671
                                  depends="test-eval-condition">
683
                                  depends="test-eval-condition">
672
    <!-- Set this to run all the "standard" tests -->
684
    <!-- Set this to run all the "standard" tests -->
673
    <property name="test.pattern" value="Test*" />
685
    <property name="test.pattern" value="Test*" />
674
    <property name="cobertura.testset" value="base" />
686
    <property name="cobertura.testset" value="base" />
[+20] [20] 35 lines
[+20]
710
    -->
722
    -->
711

    
   
723

   
712
    <delete dir="${test.log.dir}"/>
724
    <delete dir="${test.log.dir}"/>
713
    <mkdir dir="${test.log.dir}"/>
725
    <mkdir dir="${test.log.dir}"/>
714
    <delete dir="${build.test}/data"/>
726
    <delete dir="${build.test}/data"/>
715
    <mkdir dir="${build.test}/data" />
727
    <mkdir dir="${build.test}/data/sqoop" />
716
    <mkdir dir="${cobertura.class.dir}" />
728
    <mkdir dir="${cobertura.class.dir}" />
717
    <junit
729
    <junit
718
      printsummary="yes" showoutput="${test.output}"
730
      printsummary="yes" showoutput="${test.output}"
719
      haltonfailure="no" fork="yes" maxmemory="512m"
731
      haltonfailure="no" fork="yes" maxmemory="512m"
720
      errorProperty="tests.failed" failureProperty="tests.failed"
732
      errorProperty="tests.failed" failureProperty="tests.failed"
[+20] [20] 80 lines
[+20]
801
                   value="${java.security.krb5.realm}"/>
813
                   value="${java.security.krb5.realm}"/>
802

    
   
814

   
803
      <sysproperty key="java.security.krb5.kdc"
815
      <sysproperty key="java.security.krb5.kdc"
804
                   value="${java.security.krb5.kdc}"/>
816
                   value="${java.security.krb5.kdc}"/>
805

    
   
817

   

    
   
818
      <!-- Location of Hive logs -->

    
   
819
      <!--<sysproperty key="hive.log.dir"

    
   
820
                   value="${test.build.data}/sqoop/logs"/> -->

    
   
821

   
806
      <classpath>
822
      <classpath>
807
        <!-- instrumented classes go ahead of normal classes -->
823
        <!-- instrumented classes go ahead of normal classes -->
808
        <pathelement location="${cobertura.class.dir}" />
824
        <pathelement location="${cobertura.class.dir}" />
809

    
   
825

   

    
   
826
        <!-- Location of hive-site xml and other hadoop config files -->

    
   
827
        <path refid="hcatalog.conf.dir" />

    
   
828

   
810
        <!-- main classpath here. -->
829
        <!-- main classpath here. -->
811
        <path refid="test.classpath" />
830
        <path refid="test.classpath" />
812

    
   
831

   
813
        <!-- need thirdparty JDBC drivers for thirdparty tests -->
832
        <!-- need thirdparty JDBC drivers for thirdparty tests -->
814
        <fileset dir="${sqoop.thirdparty.lib.dir}"
833
        <fileset dir="${sqoop.thirdparty.lib.dir}"
[+20] [20] 462 lines
ivy.xml
Revision 1fa4dd1 New Change
 
bin/configure-sqoop.cmd
Revision f5fd608 New Change
 
bin/configure-sqoop
Revision 61ff3f2 New Change
 
ivy/ivysettings.xml
Revision c4cc561 New Change
 
src/docs/user/SqoopUserGuide.txt
Revision 01ac1cf New Change
 
src/docs/user/hcatalog.txt
New File
 
src/java/org/apache/sqoop/SqoopOptions.java
Revision f18d43e New Change
 
src/java/org/apache/sqoop/config/ConfigurationConstants.java
Revision 5354063 New Change
 
src/java/org/apache/sqoop/hive/HiveImport.java
Revision 838f083 New Change
 
src/java/org/apache/sqoop/manager/ConnManager.java
Revision a1ac38e New Change
 
src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java
Revision ef1d363 New Change
 
src/java/org/apache/sqoop/mapreduce/ExportJobBase.java
Revision 1065d0b New Change
 
src/java/org/apache/sqoop/mapreduce/ImportJobBase.java
Revision 2465f3f New Change
 
src/java/org/apache/sqoop/mapreduce/JdbcExportJob.java
Revision 20636a0 New Change
 
src/java/org/apache/sqoop/mapreduce/JobBase.java
Revision 0df1156 New Change
 
src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatExportFormat.java
New File
 
src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatExportMapper.java
New File
 
src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatImportMapper.java
New File
 
src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatInputSplit.java
New File
 
  1. build.xml: Loading...
  2. ivy.xml: Loading...
  3. bin/configure-sqoop.cmd: Loading...
  4. bin/configure-sqoop: Loading...
  5. ivy/ivysettings.xml: Loading...
  6. src/docs/user/SqoopUserGuide.txt: Loading...
  7. src/docs/user/hcatalog.txt: Loading...
  8. src/java/org/apache/sqoop/SqoopOptions.java: Loading...
  9. src/java/org/apache/sqoop/config/ConfigurationConstants.java: Loading...
  10. src/java/org/apache/sqoop/hive/HiveImport.java: Loading...
  11. src/java/org/apache/sqoop/manager/ConnManager.java: Loading...
  12. src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java: Loading...
  13. src/java/org/apache/sqoop/mapreduce/ExportJobBase.java: Loading...
  14. src/java/org/apache/sqoop/mapreduce/ImportJobBase.java: Loading...
  15. src/java/org/apache/sqoop/mapreduce/JdbcExportJob.java: Loading...
  16. src/java/org/apache/sqoop/mapreduce/JobBase.java: Loading...
  17. src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatExportFormat.java: Loading...
  18. src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatExportMapper.java: Loading...
  19. src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatImportMapper.java: Loading...
  20. src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatInputSplit.java: Loading...
This diff has been split across 2 pages: 1 2 >