Review Board 1.7.22


SQOOP-658 Solve hadoop dependency distribution/loading on server side

Review Request #7860 - Created Nov. 4, 2012 and submitted

Jarek Cecho
SQOOP-658
Reviewers
Sqoop
sqoop-sqoop2
I've ported Oozie's script for populating hadoop binaries to Oozie server to Sqoop (and Sqoop server). Basically user have to manually execute command ./bin/addtowar.sh to install hadoop binaries. Right now, only hadoop 2.0.0 is supported.
I've tested it on CDH 4.1.1 with following execution:

./bin/addtowar.sh -hadoop 2.0 /usr/lib/hadoop/client/ -jars /usr/lib/sqoop/lib/mysql-connector-java-5.1.21-bin.jar:/usr/lib/hadoop-0.20-mapreduce/hadoop-core-2.0.0-mr1-cdh4.1.1.jar
* Base hadoop version is 2.0
* Adding MySQL JDBC driver to test MySQL data import
* Adding MR1 implementation that is specific to CDH

Diff revision 1 (Latest)

  1. dist/src/main/bin/addtowar.sh: Loading...
dist/src/main/bin/addtowar.sh
New File

    
   
1
#!/bin/bash

    
   
2
#

    
   
3
# Licensed to the Apache Software Foundation (ASF) under one

    
   
4
# or more contributor license agreements.  See the NOTICE file

    
   
5
# distributed with this work for additional information

    
   
6
# regarding copyright ownership.  The ASF licenses this file

    
   
7
# to you under the Apache License, Version 2.0 (the

    
   
8
# "License"); you may not use this file except in compliance

    
   
9
# with the License.  You may obtain a copy of the License at

    
   
10
#

    
   
11
#      http://www.apache.org/licenses/LICENSE-2.0

    
   
12
#

    
   
13
# Unless required by applicable law or agreed to in writing, software

    
   
14
# distributed under the License is distributed on an "AS IS" BASIS,

    
   
15
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

    
   
16
# See the License for the specific language governing permissions and

    
   
17
# limitations under the License.

    
   
18
#

    
   
19

   

    
   
20
# Creating temporary directory

    
   
21
function prepare() {

    
   
22
  tmpDir=/tmp/sqoop-war-packing-$$

    
   
23
  rm -rf ${tmpDir}

    
   
24
  mkdir ${tmpDir}

    
   
25
  tmpWarDir=${tmpDir}/sqoop-war

    
   
26
  mkdir ${tmpWarDir}

    
   
27
  checkExec "Creating staging directory ${tmpDir}"

    
   
28
}

    
   
29

   

    
   
30
# Cleans up temporary directory

    
   
31
function cleanUp() {

    
   
32
  if [ ! "${tmpDir}" = "" ]; then

    
   
33
    rm -rf ${tmpDir}

    
   
34
    checkExec "Deleting staging directory ${tmpDir}"

    
   
35
  fi

    
   
36
}

    
   
37

   

    
   
38
# Check execution of command

    
   
39
function checkExec() {

    
   
40
  if [ $? -ne 0 ]

    
   
41
  then

    
   
42
    echo

    
   
43
    echo "Failed: $1"

    
   
44
    echo

    
   
45
    cleanUp

    
   
46
    exit -1;

    
   
47
  fi

    
   
48
}

    
   
49

   

    
   
50
# Check that a file/path exists

    
   
51
function checkFileExists() {

    
   
52
  if [ ! -e ${1} ]; then

    
   
53
    echo

    
   
54
    echo "File/Dir does no exist: ${1}"

    
   
55
    echo

    
   
56
    cleanUp

    
   
57
    exit -1

    
   
58
  fi

    
   
59
}

    
   
60

   

    
   
61
# Check that a file/path does not exist

    
   
62
function checkFileDoesNotExist() {

    
   
63
  if [ -e ${1} ]; then

    
   
64
    echo

    
   
65
    echo "File/Dir already exists: ${1}"

    
   
66
    echo

    
   
67
    cleanUp

    
   
68
    exit -1

    
   
69
  fi

    
   
70
}

    
   
71

   

    
   
72
# Finds a file under a directory any depth, file returns in variable RET

    
   
73
function findFile() {

    
   
74
   RET=`find -H ${1} -name ${2} | grep -e "[0-9.a${hadoopJarsSuffix}].jar"`

    
   
75
   RET=`echo ${RET} | sed "s/ .*//"`

    
   
76
   if [ "${RET}" = "" ]; then

    
   
77
     echo

    
   
78
     echo "File '${2}' not found in '${1}'"

    
   
79
     echo

    
   
80
     cleanUp

    
   
81
     exit -1;

    
   
82
   fi

    
   
83
}

    
   
84

   

    
   
85
function checkOption() {

    
   
86
  if [ "$2" = "" ]; then

    
   
87
    echo

    
   
88
    echo "Missing option: ${1}"

    
   
89
    echo

    
   
90
    printUsage

    
   
91
    exit -1

    
   
92
  fi

    
   
93
}

    
   
94

   

    
   
95
# Get the list of hadoop jars that will be injected based on the hadoop version

    
   
96
# TODO(jarcec): Add configuration specific to Hadoop 1.x

    
   
97
function getHadoopJars() {

    
   
98
  version=$1

    
   
99
#   Commented distributions are not tested

    
   
100
#  if [ "${version}" = "0.20.1" ]; then

    
   
101
#    #List is separated by ":"

    
   
102
#    hadoopJars="hadoop-core*.jar"

    
   
103
#  elif [ "${version}" = "0.20.2" ]; then

    
   
104
#    #List is separated by ":"

    
   
105
#    hadoopJars="hadoop-core*.jar"

    
   
106
#  elif [ "${version}" = "0.20.104" ]; then

    
   
107
#    #List is separated by ":"

    
   
108
#    hadoopJars="hadoop-core*.jar:jackson-core-asl-*.jar:jackson-mapper-asl-*.jar"

    
   
109
#  elif [ "${version}" = "0.20.200" ]; then

    
   
110
#    #List is separated by ":"

    
   
111
#    hadoopJars="hadoop-core*.jar:jackson-core-asl-*.jar:jackson-mapper-asl-*.jar:commons-configuration-*.jar"

    
   
112
#  elif [[ "${version}" =~ .*23 ]]; then

    
   
113
#    suffix="-[0-9.]*"

    
   
114
#    #List is separated by ":"

    
   
115
#    hadoopJars="hadoop-mapreduce-client-core${suffix}.jar:hadoop-mapreduce-client-common${suffix}.jar:hadoop-mapreduce-client-jobclient${suffix}.jar:hadoop-mapreduce-client-app${suffix}.jar:hadoop-yarn-common${suffix}.jar:hadoop-yarn-api${suffix}.jar:hadoop-hdfs${suffix}.jar:hadoop-common${suffix}.jar:hadoop-auth${suffix}.jar:guava*.jar:protobuf-*.jar:avro-ipc-*.jar:jackson-core-asl-*.jar:jackson-mapper-asl-*.jar:commons-configuration-*.jar"

    
   
116
#  elif [[ "${version}" =~ 2.* ]]; then

    
   
117
  if [[ "${version}" =~ 2.* ]]; then

    
   
118
    suffix="-[0-9.]*"

    
   
119
    # List is separated by ":"

    
   
120
    # Removed hadoop-yarn-client${suffix}.jar, my distribution do not have such artifact?

    
   
121
    hadoopJars="hadoop-mapreduce-client-core${suffix}.jar:hadoop-mapreduce-client-common${suffix}.jar:hadoop-mapreduce-client-jobclient${suffix}.jar:hadoop-mapreduce-client-app${suffix}.jar:hadoop-yarn-common${suffix}.jar:hadoop-yarn-api${suffix}.jar:hadoop-hdfs${suffix}.jar:hadoop-common${suffix}.jar:hadoop-auth${suffix}.jar:guava*.jar:protobuf-*.jar:jackson-core-asl-*.jar:jackson-mapper-asl-*.jar:commons-configuration-*.jar:commons-cli-*.jar:commons-logging-*.jar:slf4j-api-*.jar:slf4j-log4j*.jar:avro-*.jar"

    
   
122
  else

    
   
123
    echo

    
   
124
    echo "Exiting: Unsupported Hadoop version '${hadoopVer}', supported versions: 2.x"

    
   
125
#    echo "Exiting: Unsupported Hadoop version '${hadoopVer}', supported versions: 0.20.1, 0.20.2, 0.20.104, 0.20.200, 0.23.x and 2.x"

    
   
126
    echo

    
   
127
    cleanUp

    
   
128
    exit -1;

    
   
129
  fi

    
   
130
}

    
   
131

   

    
   
132
function printUsage() {

    
   
133
  echo " Usage  : addtowar.sh <OPTIONS>"

    
   
134
  echo " Options: -hadoop HADOOP_VERSION HADOOP_PATH"

    
   
135
  echo "          [-jars JARS_PATH] (multiple JAR path separated by ':')"

    
   
136
  echo "          [-war SQOOP_WAR]"

    
   
137
  echo

    
   
138
}

    
   
139

   

    
   
140
# We need at least some arguments

    
   
141
if [ $# -eq 0 ]; then

    
   
142
  echo

    
   
143
  echo "Missing options"

    
   
144
  echo

    
   
145
  printUsage

    
   
146
  exit -1

    
   
147
fi

    
   
148

   

    
   
149
# Variables that will be populated by our command line arguments

    
   
150
addHadoop=""

    
   
151
addJars=""

    
   
152
hadoopVersion=""

    
   
153
hadoopHome=""

    
   
154
jarsPath=""

    
   
155
warPath="`dirname $0`/../server/webapps/sqoop.war"

    
   
156

   

    
   
157
# Parse command line arguments

    
   
158
while [ $# -gt 0 ]

    
   
159
do

    
   
160
  if [ "$1" = "-hadoop" ]; then

    
   
161
    shift

    
   
162
    if [ $# -eq 0 ]; then

    
   
163
      echo

    
   
164
      echo "Missing option value, Hadoop version"

    
   
165
      echo

    
   
166
      printUsage

    
   
167
      exit -1

    
   
168
    fi

    
   
169
    hadoopVersion=$1

    
   
170
    shift

    
   
171
    if [ $# -eq 0 ]; then

    
   
172
      echo

    
   
173
      echo "Missing option value, Hadoop path"

    
   
174
      echo

    
   
175
      printUsage

    
   
176
      exit -1

    
   
177
    fi

    
   
178
    hadoopHome=$1

    
   
179
    addHadoop=true

    
   
180
  elif [ "$1" = "-jars" ]; then

    
   
181
    shift

    
   
182
    if [ $# -eq 0 ]; then

    
   
183
      echo

    
   
184
      echo "Missing option value, JARs path"

    
   
185
      echo

    
   
186
      printUsage

    
   
187
      exit -1

    
   
188
    fi

    
   
189
    jarsPath=$1

    
   
190
    addJars=true

    
   
191
  elif [ "$1" = "-war" ]; then

    
   
192
    shift

    
   
193
    if [ $# -eq 0 ]; then

    
   
194
      echo

    
   
195
      echo "Missing option value, Input Sqoop WAR path"

    
   
196
      echo

    
   
197
      printUsage

    
   
198
      exit -1

    
   
199
    fi

    
   
200
    warPath=$1

    
   
201
  fi

    
   
202

   

    
   
203
  shift

    
   
204
done

    
   
205

   

    
   
206
# Check that we have something to do

    
   
207
if [ "${addHadoop}${addJars}" == "" ]; then

    
   
208
  echo

    
   
209
  echo "Nothing to do"

    
   
210
  echo

    
   
211
  printUsage

    
   
212
  exit -1

    
   
213
fi

    
   
214

   

    
   
215
prepare

    
   
216

   

    
   
217
checkOption "-war" ${warPath}

    
   
218
checkFileExists ${warPath}

    
   
219

   

    
   
220
if [ "${addHadoop}" = "true" ]; then

    
   
221
  checkFileExists ${hadoopHome}

    
   
222
  getHadoopJars ${hadoopVersion}

    
   
223
fi

    
   
224

   

    
   
225
if [ "${addJars}" = "true" ]; then

    
   
226
  for jarPath in ${jarsPath//:/$'\n'}

    
   
227
  do

    
   
228
    checkFileExists ${jarPath}

    
   
229
  done

    
   
230
fi

    
   
231

   

    
   
232
# Unpacking original war

    
   
233
unzip ${warPath} -d ${tmpWarDir} > /dev/null

    
   
234
checkExec "Unzipping Sqoop WAR"

    
   
235

   

    
   
236
components=""

    
   
237

   

    
   
238
# Adding hadoop binaries to WAR file

    
   
239
if [ "${addHadoop}" = "true" ]; then

    
   
240
  components="Hadoop JARs";

    
   
241
  found=`ls ${tmpWarDir}/WEB-INF/lib/hadoop*core*jar 2> /dev/null | wc -l`

    
   
242
  checkExec "Looking for Hadoop JARs in WAR file"

    
   
243
  if [ ! $found = 0 ]; then

    
   
244
    echo

    
   
245
    echo "Specified Sqoop WAR '${warPath}' already contains Hadoop JAR files"

    
   
246
    echo

    
   
247
    cleanUp

    
   
248
    exit -1

    
   
249
  fi

    
   
250
  ## adding hadoop

    
   
251
  echo "Injecting following Hadoop JARs"

    
   
252
  echo

    
   
253
  for jar in ${hadoopJars//:/$'\n'}

    
   
254
  do

    
   
255
    findFile ${hadoopHome} ${jar}

    
   
256
    jar=${RET}

    
   
257
    echo ${jar}

    
   
258
    cp ${jar} ${tmpWarDir}/WEB-INF/lib/

    
   
259
    checkExec "Copying jar ${jar} to staging"

    
   
260
  done

    
   
261
fi

    
   
262

   

    
   
263
# Adding new jars to WAR file

    
   
264
if [ "${addJars}" = "true" ]; then

    
   
265
  if [ ! "${components}" = "" ];then

    
   
266
    components="${components}, "

    
   
267
  fi

    
   
268
  components="${components}JARs"

    
   
269

   

    
   
270
  for jarPath in ${jarsPath//:/$'\n'}

    
   
271
  do

    
   
272
    found=`ls ${tmpWarDir}/WEB-INF/lib/${jarPath} 2> /dev/null | wc -l`

    
   
273
    checkExec "Looking for JAR ${jarPath} in WAR path"

    
   
274
    if [ ! $found = 0 ]; then

    
   
275
      echo

    
   
276
      echo "Specified Sqoop WAR '${inputWar}' already contains JAR ${jarPath}"

    
   
277
      echo

    
   
278
      cleanUp

    
   
279
      exit -1

    
   
280
    fi

    
   
281
    cp ${jarPath} ${tmpWarDir}/WEB-INF/lib/

    
   
282
    checkExec "Copying jar ${jarPath} to staging"

    
   
283
  done

    
   
284
fi

    
   
285

   

    
   
286
# Creating new Sqoop WAR

    
   
287
currentDir=`pwd`

    
   
288
cd ${tmpWarDir}

    
   
289
zip -r sqoop.war * > /dev/null

    
   
290
checkExec "Creating new Sqoop WAR"

    
   
291
cd ${currentDir}

    
   
292

   

    
   
293
# Save original WAR file as a backup in case that something went wrong

    
   
294
backupPath="${warPath}_`date +%Y-%m-%d_%H:%M:%S.%N`"

    
   
295
echo

    
   
296
echo "Backing up original WAR file to $backupPath"

    
   
297
mv $warPath $backupPath

    
   
298
checkExec "Backing up original WAR file to $backupPath"

    
   
299

   

    
   
300
# Move our jar to new position

    
   
301
mv ${tmpWarDir}/sqoop.war ${warPath}

    
   
302
checkExec "Moving generated WAR file to original location"

    
   
303

   

    
   
304
echo

    
   
305
echo "New Sqoop WAR file with added '${components}' at ${warPath}"

    
   
306
echo

    
   
307
cleanUp

    
   
308
exit 0
  1. dist/src/main/bin/addtowar.sh: Loading...