Review Board 1.7.22


FLUME-1277: Error parsing Syslog rfc 3164 messages with null values

Review Request #7780 - Created Oct. 30, 2012 and submitted

Brock Noland
trunk
FLUME-1277
Reviewers
Flume
flume-git
The SyslogUtils class doesn't properly parse rfc 3164 style messages containing a null (hyphen) value. e.g., 
<10>Apr 1 13:14:04 ubuntu-11.cloudera.com - rest_of_message

It tries to parse it as a 5424 style message, skips over the date information, and interprets the first hyphen as a null timestamp. Part of the problem is the use of a Scanner and regex. This skips over a properly formatted 3164 style message until it finds anything that matches the 5424 regex, including a hyphen.
Unit test added and units pass
flume-ng-core/src/main/java/org/apache/flume/source/SyslogUtils.java
Revision 4cded11 New Change
[20] 16 lines
[+20]
17
 * under the License.
17
 * under the License.
18
 */
18
 */
19

    
   
19

   
20
package org.apache.flume.source;
20
package org.apache.flume.source;
21

    
   
21

   
Moved from 33

    
   
22
import org.apache.flume.Event;
Moved from 34

    
   
23
import org.apache.flume.event.EventBuilder;
Moved from 35

    
   
24
import org.jboss.netty.buffer.ChannelBuffer;
Moved from 36

    
   
25
import org.slf4j.Logger;
Moved from 37

    
   
26
import org.slf4j.LoggerFactory;

    
   
27

   
22
import java.io.ByteArrayOutputStream;
28
import java.io.ByteArrayOutputStream;
23
import java.io.IOException;
29
import java.io.IOException;
24
import java.text.ParseException;
30
import java.text.ParseException;
25
import java.text.SimpleDateFormat;
31
import java.text.SimpleDateFormat;
26
import java.util.ArrayList;
32
import java.util.ArrayList;
27
import java.util.Calendar;
33
import java.util.Calendar;
28
import java.util.HashMap;
34
import java.util.HashMap;
29
import java.util.Map;
35
import java.util.Map;
30
import java.util.Scanner;

   
31
import java.util.regex.MatchResult;
36
import java.util.regex.MatchResult;

    
   
37
import java.util.regex.Matcher;

    
   
38
import java.util.regex.Pattern;
32

    
   
39

   
33
import org.apache.flume.Event;
Moved to 22

   
34
import org.apache.flume.event.EventBuilder;
Moved to 23

   
35
import org.jboss.netty.buffer.ChannelBuffer;
Moved to 24

   
36
import org.slf4j.Logger;
Moved to 25

   
37
import org.slf4j.LoggerFactory;
Moved to 26

   
38

    
   

   
39
public class SyslogUtils {
40
public class SyslogUtils {
40
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_2 = "yyyy-MM-dd'T'HH:mm:ss.SZ";
41
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_2 = "yyyy-MM-dd'T'HH:mm:ss.SZ";
41
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_1 = "yyyy-MM-dd'T'HH:mm:ss.S";
42
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_1 = "yyyy-MM-dd'T'HH:mm:ss.S";
42
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_3 = "yyyy-MM-dd'T'HH:mm:ssZ";
43
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_3 = "yyyy-MM-dd'T'HH:mm:ssZ";
43
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_4 = "yyyy-MM-dd'T'HH:mm:ss";
44
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_4 = "yyyy-MM-dd'T'HH:mm:ss";
44
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC3164_1 = "yyyyMMM d HH:mm:ss";
45
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC3164_1 = "yyyyMMM d HH:mm:ss";
45

    
   
46

   
46
  final public static String SYSLOG_MSG_RFC5424_0 =
47
  final public static String SYSLOG_MSG_RFC5424_0 =
47
      "[(?:\\d\\s)]?" +// version
48
      "(?:\\d\\s)?" +// version
48
  // yyyy-MM-dd'T'HH:mm:ss.SZ or yyyy-MM-dd'T'HH:mm:ss.S+hh:mm or - (null stamp)
49
  // yyyy-MM-dd'T'HH:mm:ss.SZ or yyyy-MM-dd'T'HH:mm:ss.S+hh:mm or - (null stamp)
49
      "(?:(\\d{4}[-]\\d{2}[-]\\d{2}[T]\\d{2}[:]\\d{2}[:]\\d{2}(?:\\.\\d{1,6})?(?:[+-]\\d{2}[:]\\d{2}|Z)?)|-)" + // stamp
50
      "(?:(\\d{4}[-]\\d{2}[-]\\d{2}[T]\\d{2}[:]\\d{2}[:]\\d{2}(?:\\.\\d{1,6})?(?:[+-]\\d{2}[:]\\d{2}|Z)?)|-)" + // stamp
50
      "\\s" + // separator
51
      "\\s" + // separator
51
      "(?:([\\w][\\w\\d\\.@-]*)|-)" + // host name or - (null)
52
      "(?:([\\w][\\w\\d\\.@-]*)|-)" + // host name or - (null)
52
      "\\s" + // separator
53
      "\\s" + // separator
[+20] [20] 24 lines
[+20] public class SyslogUtils {
77
  private final boolean isUdp;
78
  private final boolean isUdp;
78
  private boolean isBadEvent;
79
  private boolean isBadEvent;
79
  private boolean isIncompleteEvent;
80
  private boolean isIncompleteEvent;
80
  private Integer maxSize;
81
  private Integer maxSize;
81

    
   
82

   
82
  private class SyslogFormater {
83
  private class SyslogFormatter {
83
    public String regexPattern;
84
    public Pattern regexPattern;
84
    public ArrayList<String> searchPattern = new ArrayList<String>();
85
    public ArrayList<String> searchPattern = new ArrayList<String>();
85
    public ArrayList<String> replacePattern = new ArrayList<String>();
86
    public ArrayList<String> replacePattern = new ArrayList<String>();
86
    public ArrayList<SimpleDateFormat> dateFormat = new ArrayList<SimpleDateFormat>();
87
    public ArrayList<SimpleDateFormat> dateFormat = new ArrayList<SimpleDateFormat>();
87
    public boolean addYear;
88
    public boolean addYear;
88
  }
89
  }
89
  private ArrayList<SyslogFormater> formats = new ArrayList<SyslogFormater>();
90
  private ArrayList<SyslogFormatter> formats = new ArrayList<SyslogFormatter>();
90

    
   
91

   
91
  private String timeStamp = null;
92
  private String timeStamp = null;
92
  private String hostName = null;
93
  private String hostName = null;
93
  private String msgBody = null;
94
  private String msgBody = null;
94

    
   
95

   
[+20] [20] 18 lines
[+20] private class SyslogFormater { private class SyslogFormatter {
113
  public void addFormats(Map<String, String> formatProp) {
114
  public void addFormats(Map<String, String> formatProp) {
114
    if (formatProp.isEmpty() || !formatProp.containsKey(
115
    if (formatProp.isEmpty() || !formatProp.containsKey(
115
        SyslogSourceConfigurationConstants.CONFIG_REGEX)) {
116
        SyslogSourceConfigurationConstants.CONFIG_REGEX)) {
116
      return;
117
      return;
117
    }
118
    }
118
    SyslogFormater fmt1 = new SyslogFormater();
119
    SyslogFormatter fmt1 = new SyslogFormatter();
119
    fmt1.regexPattern = formatProp.get(
120
    fmt1.regexPattern = Pattern.compile( formatProp.get(
120
        SyslogSourceConfigurationConstants.CONFIG_REGEX);
121
        SyslogSourceConfigurationConstants.CONFIG_REGEX) );
121
    if (formatProp.containsKey(
122
    if (formatProp.containsKey(
122
        SyslogSourceConfigurationConstants.CONFIG_SEARCH)) {
123
        SyslogSourceConfigurationConstants.CONFIG_SEARCH)) {
123
      fmt1.searchPattern.add(formatProp.get(
124
      fmt1.searchPattern.add(formatProp.get(
124
          SyslogSourceConfigurationConstants.CONFIG_SEARCH));
125
          SyslogSourceConfigurationConstants.CONFIG_SEARCH));
125
    }
126
    }
[+20] [20] 11 lines
[+20] public void addFormats(Map<String, String> formatProp) {
137
  }
138
  }
138

    
   
139

   
139
  // setup built-in formats
140
  // setup built-in formats
140
  private void initHeaderFormats() {
141
  private void initHeaderFormats() {
141
    // setup RFC5424 formater
142
    // setup RFC5424 formater
142
    SyslogFormater fmt1 = new SyslogFormater();
143
    SyslogFormatter fmt1 = new SyslogFormatter();
143
    fmt1.regexPattern = SYSLOG_MSG_RFC5424_0;
144
    fmt1.regexPattern = Pattern.compile(SYSLOG_MSG_RFC5424_0);
144
    // 'Z' in timestamp indicates UTC zone, so replace it it with '+0000' for date formatting
145
    // 'Z' in timestamp indicates UTC zone, so replace it it with '+0000' for date formatting
145
    fmt1.searchPattern.add("Z");
146
    fmt1.searchPattern.add("Z");
146
    fmt1.replacePattern.add("+0000");
147
    fmt1.replacePattern.add("+0000");
147
    // timezone in RFC5424 is [+-]tt:tt, so remove the ':' for java date formatting
148
    // timezone in RFC5424 is [+-]tt:tt, so remove the ':' for java date formatting
148
    fmt1.searchPattern.add("([+-])(\\d{2})[:](\\d{2})");
149
    fmt1.searchPattern.add("([+-])(\\d{2})[:](\\d{2})");
149
    fmt1.replacePattern.add("$1$2$3");
150
    fmt1.replacePattern.add("$1$2$3");
150
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_1));
151
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_1));
151
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_2));
152
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_2));
152
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_3));
153
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_3));
153
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_4));
154
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_4));
154
    fmt1.addYear = false;
155
    fmt1.addYear = false;
155

    
   
156

   
156
    // setup RFC3164 formater
157
    // setup RFC3164 formater
157
    SyslogFormater fmt2 = new SyslogFormater();
158
    SyslogFormatter fmt2 = new SyslogFormatter();
158
    fmt2.regexPattern = SYSLOG_MSG_RFC3164_0;
159
    fmt2.regexPattern = Pattern.compile(SYSLOG_MSG_RFC3164_0);
159
    // the single digit date has two spaces, so trim it
160
    // the single digit date has two spaces, so trim it
160
    fmt2.searchPattern.add("  ");
161
    fmt2.searchPattern.add("  ");
161
    fmt2.replacePattern.add(" ");
162
    fmt2.replacePattern.add(" ");
162
    fmt2.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC3164_1));
163
    fmt2.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC3164_1));
163
    fmt2.addYear = true;
164
    fmt2.addYear = true;
[+20] [20] 64 lines
[+20] [+] public String getSyslogStatus(){
228
    return EventBuilder.withBody(body, headers);
229
    return EventBuilder.withBody(body, headers);
229
  }
230
  }
230

    
   
231

   
231
  // Apply each known pattern to message
232
  // Apply each known pattern to message
232
  private void formatHeaders() {
233
  private void formatHeaders() {
233
    Scanner scanner = new Scanner(baos.toString());
234
    String eventStr = baos.toString();
234
    MatchResult res = null;

   
235

    
   

   
236
    for(int p=0; p < formats.size(); p++) {
235
    for(int p=0; p < formats.size(); p++) {
237
      SyslogFormater fmt = formats.get(p);
236
      SyslogFormatter fmt = formats.get(p);
238
      try {
237
      Pattern pattern = fmt.regexPattern;
239
        scanner.findInLine(fmt.regexPattern);
238
      Matcher matcher = pattern.matcher(eventStr);
240
        res = scanner.match();
239
      if (! matcher.matches()) {
241
      } catch (IllegalStateException e) {

   
242
        // Ignore and move on ..

   
243
        continue;
240
        continue;
244
      }
241
      }

    
   
242
      MatchResult res = matcher.toMatchResult();
245
      for (int grp=1; grp <= res.groupCount(); grp++) {
243
      for (int grp=1; grp <= res.groupCount(); grp++) {
246
        String value = res.group(grp);
244
        String value = res.group(grp);
247
        if (grp == SYSLOG_TIMESTAMP_POS) {
245
        if (grp == SYSLOG_TIMESTAMP_POS) {
248
          // apply available format replacements to timestamp
246
          // apply available format replacements to timestamp
249
          if (value != null) {
247
          if (value != null) {
[+20] [20] 136 lines
flume-ng-core/src/test/java/org/apache/flume/source/TestSyslogUtils.java
Revision acfb29d New Change
 
  1. flume-ng-core/src/main/java/org/apache/flume/source/SyslogUtils.java: Loading...
  2. flume-ng-core/src/test/java/org/apache/flume/source/TestSyslogUtils.java: Loading...