Review Board 1.7.22


SQOOP-720 Improve error handling when exporting malformed text data

Review Request #8175 - Created Nov. 22, 2012 and submitted

Jarek Cecho
SQOOP-720
Reviewers
Sqoop
sqoop-trunk
I've significantly improved level of information that is printed into the log. I believe that user will get all information needed to properly investigate the malformed data.
I've tested this on real cluster causing the error to be printed out.

Diff revision 1 (Latest)

  1. src/java/org/apache/sqoop/mapreduce/TextExportMapper.java: Loading...
src/java/org/apache/sqoop/mapreduce/TextExportMapper.java
Revision 7b7f3314ff58bfd7b780aa78447f65d67be901f0 New Change
[20] 20 lines
[+20]
21
import java.io.IOException;
21
import java.io.IOException;
22
import org.apache.hadoop.conf.Configuration;
22
import org.apache.hadoop.conf.Configuration;
23
import org.apache.hadoop.io.LongWritable;
23
import org.apache.hadoop.io.LongWritable;
24
import org.apache.hadoop.io.NullWritable;
24
import org.apache.hadoop.io.NullWritable;
25
import org.apache.hadoop.io.Text;
25
import org.apache.hadoop.io.Text;

    
   
26
import org.apache.hadoop.mapreduce.InputSplit;

    
   
27
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
26
import org.apache.hadoop.util.ReflectionUtils;
28
import org.apache.hadoop.util.ReflectionUtils;
27
import com.cloudera.sqoop.lib.RecordParser;

   
28
import com.cloudera.sqoop.lib.SqoopRecord;
29
import com.cloudera.sqoop.lib.SqoopRecord;
29
import com.cloudera.sqoop.mapreduce.AutoProgressMapper;
30
import com.cloudera.sqoop.mapreduce.AutoProgressMapper;

    
   
31
import org.apache.commons.logging.Log;

    
   
32
import org.apache.commons.logging.LogFactory;
30

    
   
33

   
31
/**
34
/**
32
 * Converts an input record from a string representation to a parsed Sqoop
35
 * Converts an input record from a string representation to a parsed Sqoop
33
 * record and emits that DBWritable to the OutputFormat for writeback to the
36
 * record and emits that DBWritable to the OutputFormat for writeback to the
34
 * database.
37
 * database.
35
 */
38
 */
36
public class TextExportMapper
39
public class TextExportMapper
37
    extends AutoProgressMapper<LongWritable, Text, SqoopRecord, NullWritable> {
40
    extends AutoProgressMapper<LongWritable, Text, SqoopRecord, NullWritable> {
38

    
   
41

   

    
   
42
  public static final Log LOG =

    
   
43
    LogFactory.getLog(TextExportMapper.class.getName());

    
   
44

   
39
  private SqoopRecord recordImpl;
45
  private SqoopRecord recordImpl;
40

    
   
46

   
41
  public TextExportMapper() {
47
  public TextExportMapper() {
42
  }
48
  }
43

    
   
49

   
[+20] [20] 30 lines
[+20] [+] protected void setup(Context context)
74
  public void map(LongWritable key, Text val, Context context)
80
  public void map(LongWritable key, Text val, Context context)
75
      throws IOException, InterruptedException {
81
      throws IOException, InterruptedException {
76
    try {
82
    try {
77
      recordImpl.parse(val);
83
      recordImpl.parse(val);
78
      context.write(recordImpl, NullWritable.get());
84
      context.write(recordImpl, NullWritable.get());
79
    } catch (RecordParser.ParseError pe) {
85
    } catch (Exception e) {
80
      throw new IOException("Could not parse record: " + val, pe);
86
      // Something bad has happened

    
   
87
      LOG.error("");

    
   
88
      LOG.error("Exception raised during data export");

    
   
89
      LOG.error("");

    
   
90

   

    
   
91
      LOG.error("Exception: ", e);

    
   
92
      LOG.error("On input: " + val);

    
   
93

   

    
   
94
      InputSplit is = context.getInputSplit();

    
   
95
      if (is instanceof FileSplit) {

    
   
96
        LOG.error("On input file: " + ((FileSplit)is).getPath());

    
   
97
      } else if (is instanceof CombineFileSplit) {

    
   
98
        LOG.error("On input file: "

    
   
99
          + context.getConfiguration().get("map.input.file"));

    
   
100
      }

    
   
101
      LOG.error("At position " + key);

    
   
102

   

    
   
103
      LOG.error("");

    
   
104
      LOG.error("Currently processing split:");

    
   
105
      LOG.error(is);

    
   
106

   

    
   
107
      LOG.error("");

    
   
108
      LOG.error("This issue might not necessarily be caused by current input");

    
   
109
      LOG.error("due to the batching nature of export.");

    
   
110
      LOG.error("");

    
   
111

   

    
   
112
      throw new IOException("Can't export data, please check task tracker logs",

    
   
113
        e);
81
    }
114
    }
82
  }
115
  }
83
}
116
}
  1. src/java/org/apache/sqoop/mapreduce/TextExportMapper.java: Loading...