Review Board 1.7.22


PIG-2248 Pig parser does not detect when a macro name masks a UDF name

Review Request #10435 - Created April 12, 2013 and updated

Johnny Zhang
trunk
PIG-2248
Reviewers
pig
pig-git
Pig accepts a macro like:
define COUNT(in_relation, min_gpa) returns c {
                           b = filter $in_relation by gpa >= $min_gpa;
                           $c = foreach b generate age, name;
                       }
This should produce a warning that it is masking a UDF.
I tested it with two macro defined in one Pig script, there is only one warning message for each macro. The message looks like
[main] WARN  org.apache.pig.parser.PigMacro - macro name 'COUNT' masks a builtin UDF org.apache.pig.builtin.COUNT
[main] WARN  org.apache.pig.parser.PigMacro - macro name 'ABS' masks a builtin UDF org.apache.pig.builtin.ABS

Diff revision 2 (Latest)

1 2
1 2

  1. src/org/apache/pig/parser/PigMacro.java: Loading...
src/org/apache/pig/parser/PigMacro.java
Revision 435fc13 New Change
[20] 15 lines
[+20]
16
 * limitations under the License.
16
 * limitations under the License.
17
 */
17
 */
18
package org.apache.pig.parser;
18
package org.apache.pig.parser;
19

    
   
19

   
20
import java.io.BufferedReader;
20
import java.io.BufferedReader;

    
   
21
import java.io.File;
21
import java.io.IOException;
22
import java.io.IOException;
22
import java.io.StreamTokenizer;
23
import java.io.StreamTokenizer;
23
import java.io.StringReader;
24
import java.io.StringReader;
24
import java.io.StringWriter;
25
import java.io.StringWriter;

    
   
26
import java.net.URL;
25
import java.util.ArrayList;
27
import java.util.ArrayList;

    
   
28
import java.util.Enumeration;
26
import java.util.HashSet;
29
import java.util.HashSet;

    
   
30
import java.util.Hashtable;
27
import java.util.List;
31
import java.util.List;
28
import java.util.Map;
32
import java.util.Map;
29
import java.util.Set;
33
import java.util.Set;
30

    
   
34

   
31
import org.antlr.runtime.CharStream;
35
import org.antlr.runtime.CharStream;
32
import org.antlr.runtime.CommonTokenStream;
36
import org.antlr.runtime.CommonTokenStream;
33
import org.antlr.runtime.RecognitionException;
37
import org.antlr.runtime.RecognitionException;
34
import org.antlr.runtime.tree.CommonTree;
38
import org.antlr.runtime.tree.CommonTree;
35
import org.antlr.runtime.tree.CommonTreeNodeStream;
39
import org.antlr.runtime.tree.CommonTreeNodeStream;
36
import org.antlr.runtime.tree.Tree;
40
import org.antlr.runtime.tree.Tree;
37
import org.apache.commons.logging.Log;
41
import org.apache.commons.logging.Log;
38
import org.apache.commons.logging.LogFactory;
42
import org.apache.commons.logging.LogFactory;

    
   
43
import org.apache.pig.impl.PigContext;
39
import org.apache.pig.parser.PigParserNode.InvocationPoint;
44
import org.apache.pig.parser.PigParserNode.InvocationPoint;
40
import org.apache.pig.tools.parameters.ParameterSubstitutionPreprocessor;
45
import org.apache.pig.tools.parameters.ParameterSubstitutionPreprocessor;
41

    
   
46

   

    
   
47
import com.google.common.collect.ImmutableList;
42
import com.google.common.collect.ImmutableSet;
48
import com.google.common.collect.ImmutableSet;
43

    
   
49

   
44
class PigMacro {
50
class PigMacro {
45

    
   
51

   
46
    private static final Log LOG = LogFactory.getLog(PigMacro.class);
52
    private static final Log LOG = LogFactory.getLog(PigMacro.class);
[+20] [20] 4 lines
[+20] class PigMacro {
51
    private List<String> params;
57
    private List<String> params;
52
    private List<String> rets;
58
    private List<String> rets;
53
    private Map<String, PigMacro> seen;
59
    private Map<String, PigMacro> seen;
54
    private Set<String> macroStack;
60
    private Set<String> macroStack;
55
    private long idx = 0;
61
    private long idx = 0;

    
   
62
    private static Hashtable<String, Integer> nameMacro = new Hashtable<String, Integer>();
56
    
63
    
57
    // The start line number of this macro in the script
64
    // The start line number of this macro in the script
58
    private int startLine = 0;
65
    private int startLine = 0;
59

    
   
66

   
60
    PigMacro(String name, String file, List<String> params,
67
    PigMacro(String name, String file, List<String> params,
61
            List<String> returns, String body, Map<String, PigMacro> seen) {
68
            List<String> returns, String body, Map<String, PigMacro> seen) {
62
        this.name = name;
69
        this.name = name;
63
        this.params = (params == null) ? new ArrayList<String>() : params;
70
        this.params = (params == null) ? new ArrayList<String>() : params;
64
        this.rets = (returns == null) ? new ArrayList<String>() : returns;
71
        this.rets = (returns == null) ? new ArrayList<String>() : returns;
65
        this.fileName = file;
72
        this.fileName = file;
66
        this.body = body;
73
        this.body = body;
67
        this.seen = seen;
74
        this.seen = seen;
68
        this.macroStack = new HashSet<String>(); 
75
        this.macroStack = new HashSet<String>();
69
        LOG.debug("Macro '" + name + "' is defined");
76
        LOG.debug("Macro '" + name + "' is defined");
70
    }
77
    }
71
    
78
    
72
    String getName() { return name; }
79
    String getName() { return name; }
73
    
80
    
[+20] [20] 332 lines
[+20] [+] private static void traverseMacro(Tree t, List<CommonTree> nodes,
406
        for (int i = 0; i < n; i++) {
413
        for (int i = 0; i < n; i++) {
407
            Tree t0 = t.getChild(i);
414
            Tree t0 = t.getChild(i);
408
            traverseMacro(t0, nodes, nodeType);
415
            traverseMacro(t0, nodes, nodeType);
409
        }
416
        }
410
    }
417
    }
411
     
418

   

    
   
419
    /**

    
   
420
     * check if macro name duplicates with any existing builtin UDF

    
   
421
     */

    
   
422
    private static void checkMacroNameMasking(String name, int line, String file)

    
   
423
            throws ParserException {

    
   
424
        try {

    
   
425
            ClassLoader classLoader = Thread.currentThread()

    
   
426
                    .getContextClassLoader();

    
   
427
            ImmutableList<String> imporPackages = ImmutableSet.copyOf(

    
   
428
                    PigContext.getPackageImportList()).asList();

    
   
429
            for (String imporPackage : imporPackages) {

    
   
430
                if (imporPackage.endsWith(".")) {

    
   
431
                    String path = imporPackage.replace('.', '/');

    
   
432
                    Enumeration<URL> resources = classLoader.getResources(path);

    
   
433
                    List<File> dirs = new ArrayList<File>();

    
   
434
                    ArrayList<Class> classes = new ArrayList<Class>();

    
   
435
                    while (resources.hasMoreElements()) {

    
   
436
                        URL resource = resources.nextElement();

    
   
437
                        if (resource.toString().contains("build/classes/" + path)) {

    
   
438
                            dirs.add(new File(resource.getFile()));

    
   
439
                        }

    
   
440
                    }

    
   
441
                    if (dirs.size() > 0) {

    
   
442
                        classes.addAll(findClasses(dirs.get(0), imporPackage));

    
   
443
                        ImmutableList<Class> builtinUDF = ImmutableSet.copyOf(classes)

    
   
444
                                .asList();

    
   
445
                        if (builtinUDF.contains(Class.forName(imporPackage + name))) {

    
   
446
                            nameMacro.put(name, 1);

    
   
447
                            LOG.warn("macro name '" + name + "' masks a builtin UDF "

    
   
448
                                    + imporPackage + name);

    
   
449
                        }

    
   
450
                    }

    
   
451
                }

    
   
452
            }

    
   
453
        } catch (ClassNotFoundException e) {

    
   
454
            LOG.debug("the macro name " + name

    
   
455
                    + " doesn't match any classes under current lookup package");

    
   
456
        } catch (IOException e) {

    
   
457
            String msg = getErrorMessage(file, line, "Resouce doesn't exist",

    
   
458
                    e.getMessage() + "\n");

    
   
459
            throw new ParserException(msg);

    
   
460
        }

    
   
461
    }

    
   
462

   

    
   
463
    /**

    
   
464
     * Recursive method used to find all classes in a given directory and

    
   
465
     * subdirs.

    
   
466
     */

    
   
467
    private static List<Class> findClasses(File directory, String packageName)

    
   
468
            throws ClassNotFoundException {

    
   
469
        List<Class> classes = new ArrayList<Class>();

    
   
470
        File[] files = directory.listFiles();

    
   
471
        for (File file : files) {

    
   
472
            if (file.isDirectory()) {

    
   
473
                classes.addAll(findClasses(file, packageName + file.getName() + "."));

    
   
474
            } else if (file.getName().endsWith(".class")

    
   
475
                    && !file.getName().contains("$")

    
   
476
                    && !file.getName().contains("Test")) {

    
   
477
                classes.add(Class.forName(packageName

    
   
478
                        + file.getName().substring(0, file.getName().length() - 6)));

    
   
479
            }

    
   
480
        }

    
   
481
        return classes;

    
   
482
    }

    
   
483

   
412
    /*
484
    /*
413
     * Macro inline nodes have the following form:
485
     * Macro inline nodes have the following form:
414
     * 
486
     * 
415
     * (MACRO_INLINE <name> (RETURN_VAL <values>) (PARAMS <values>)) 
487
     * (MACRO_INLINE <name> (RETURN_VAL <values>) (PARAMS <values>)) 
416
     * 
488
     * 
[+20] [20] 15 lines
[+20] [+] static CommonTree macroInline(CommonTree t, List<PigMacro> macroDefs, Set<String> macroStack)
432
                break;
504
                break;
433
            }
505
            }
434
        }
506
        }
435

    
   
507

   
436
        String file = ((PigParserNode)t).getFileName();
508
        String file = ((PigParserNode)t).getFileName();

    
   
509

   

    
   
510
        // check if macro masks a UDF

    
   
511
        if (!nameMacro.containsKey(mn)) {

    
   
512
            checkMacroNameMasking(mn, t.getLine(), file);

    
   
513
        }
437
        
514
        
438
        if (macro == null) {
515
        if (macro == null) {
439
            String msg = getErrorMessage(file, t.getLine(),
516
            String msg = getErrorMessage(file, t.getLine(),
440
                    "Cannot expand macro '" + mn + "'",
517
                    "Cannot expand macro '" + mn + "'",
441
                    "Macro must be defined before expansion.");
518
                    "Macro must be defined before expansion.");
[+20] [20] 46 lines
  1. src/org/apache/pig/parser/PigMacro.java: Loading...