Review Board 1.7.22


[PIG-3204] Reduce the number of getSchema calls during script parsing

Review Request #13535 - Created Aug. 13, 2013 and submitted

Rohini Palaniswamy
PIG-3204
Reviewers
pig
pig
Change parsing from line by line to whole script at once.
New unit tests added to track the number of times a line is parsed. TestGrunt and TestShortcuts test failures fixed.
http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/PigServer.java
Revision 1510960 New Change
[20] 152 lines
[+20] [+] public class PigServer {
153

    
   
153

   
154
    private boolean isMultiQuery = true;
154
    private boolean isMultiQuery = true;
155
    private boolean aggregateWarning = true;
155
    private boolean aggregateWarning = true;
156

    
   
156

   
157
    private boolean validateEachStatement = false;
157
    private boolean validateEachStatement = false;

    
   
158
    private boolean skipParseInRegisterForBatch = false;
158

    
   
159

   
159
    private String constructScope() {
160
    private String constructScope() {
160
        // scope servers for now as a session id
161
        // scope servers for now as a session id
161

    
   
162

   
162
        // String user = System.getProperty("user.name", "DEFAULT_USER_ID");
163
        // String user = System.getProperty("user.name", "DEFAULT_USER_ID");
[+20] [20] 405 lines
[+20] [+] public void registerCode(String path, String scriptingLang, String namespace)
568
     * @param startLine
569
     * @param startLine
569
     *            line number of the query within the whole script
570
     *            line number of the query within the whole script
570
     * @throws IOException
571
     * @throws IOException
571
     */
572
     */
572
    public void registerQuery(String query, int startLine) throws IOException {
573
    public void registerQuery(String query, int startLine) throws IOException {
573
        currDAG.registerQuery(query, startLine, validateEachStatement);
574
        currDAG.registerQuery(query, startLine, validateEachStatement, skipParseInRegisterForBatch);
574
    }
575
    }
575

    
   
576

   
576
    /**
577
    /**
577
     * Register a query with the Pig runtime. The query is parsed and registered, but it is not
578
     * Register a query with the Pig runtime. The query is parsed and registered, but it is not
578
     * executed until it is needed.  Equivalent to calling {@link #registerQuery(String, int)}
579
     * executed until it is needed.  Equivalent to calling {@link #registerQuery(String, int)}
[+20] [20] 985 lines
[+20] [+] private void skipStores() throws IOException {
1564

    
   
1565

   
1565
        /**
1566
        /**
1566
         * Accumulate the given statement to previous query statements and generate
1567
         * Accumulate the given statement to previous query statements and generate
1567
         * an overall (raw) plan.
1568
         * an overall (raw) plan.
1568
         */
1569
         */
1569
        void registerQuery(String query, int startLine, boolean validateEachStatement)
1570
        void registerQuery(String query, int startLine, boolean validateEachStatement,
1570
        throws IOException {
1571
                boolean skipParseForBatch) throws IOException {
1571
            if( batchMode ) {
1572
            if( batchMode ) {
1572
                if( startLine == currentLineNum ) {
1573
                if( startLine == currentLineNum ) {
1573
                    String line = scriptCache.remove( scriptCache.size() - 1 );
1574
                    String line = scriptCache.remove( scriptCache.size() - 1 );
1574
                    scriptCache.add( line + query );
1575
                    scriptCache.add( line + query );
1575
                } else {
1576
                } else {
[+20] [20] 7 lines
[+20] private void skipStores() throws IOException {
1583
                        scriptCache.add(line);
1584
                        scriptCache.add(line);
1584
                        currentLineNum++;
1585
                        currentLineNum++;
1585
                        line = br.readLine();
1586
                        line = br.readLine();
1586
                    }
1587
                    }
1587
                }
1588
                }

    
   
1589
                if (skipParseForBatch) {

    
   
1590
                    return;

    
   
1591
                }
1588
            } else {
1592
            } else {
1589
                scriptCache.add( query );
1593
                scriptCache.add( query );
1590
            }
1594
            }
1591

    
   
1595

   
1592
            if(validateEachStatement){
1596
            if(validateEachStatement){
[+20] [20] 207 lines
[+20] [+] protected Graph duplicate() {
1800
                for (Iterator<String> it = scriptCache.iterator(); it.hasNext(); lineNumber++) {
1804
                for (Iterator<String> it = scriptCache.iterator(); it.hasNext(); lineNumber++) {
1801
                    // always doing registerQuery irrespective of the batch mode
1805
                    // always doing registerQuery irrespective of the batch mode
1802
                    // TODO: Need to figure out if anything different needs to happen if batch
1806
                    // TODO: Need to figure out if anything different needs to happen if batch
1803
                    // mode is not on
1807
                    // mode is not on
1804
                    // Don't have to do the validation again, so set validateEachStatement param to false
1808
                    // Don't have to do the validation again, so set validateEachStatement param to false
1805
                    graph.registerQuery(it.next(), lineNumber, false);
1809
                    graph.registerQuery(it.next(), lineNumber, false, false);
1806
                }
1810
                }
1807
                graph.postProcess();
1811
                graph.postProcess();
1808
            } catch (IOException ioe) {
1812
            } catch (IOException ioe) {
1809
                ioe.printStackTrace();
1813
                ioe.printStackTrace();
1810
                graph = null;
1814
                graph = null;
[+20] [20] 10 lines
[+20] protected Graph duplicate() {
1821
     */
1825
     */
1822
    public void setValidateEachStatement(boolean validateEachStatement) {
1826
    public void setValidateEachStatement(boolean validateEachStatement) {
1823
        this.validateEachStatement = validateEachStatement;
1827
        this.validateEachStatement = validateEachStatement;
1824
    }
1828
    }
1825

    
   
1829

   

    
   
1830
    /**

    
   
1831
     * Set whether to skip parsing while registering the query in batch mode

    
   
1832
     * @param skipParseInRegisterForBatch

    
   
1833
     */

    
   
1834
    public void setSkipParseInRegisterForBatch(boolean skipParseInRegisterForBatch) {

    
   
1835
        this.skipParseInRegisterForBatch = skipParseInRegisterForBatch;

    
   
1836
    }

    
   
1837

   
1826
    public String getLastRel() {
1838
    public String getLastRel() {
1827
        return currDAG.getLastRel();
1839
        return currDAG.getLastRel();
1828
    }
1840
    }
1829
}
1841
}
http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/tools/grunt/GruntParser.java
Revision 1510960 New Change
 
http://svn.apache.org/repos/asf/pig/trunk/test/org/apache/pig/test/TestGrunt.java
Revision 1510960 New Change
 
http://svn.apache.org/repos/asf/pig/trunk/test/org/apache/pig/test/TestPigServer.java
Revision 1510960 New Change
 
  1. http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/PigServer.java: Loading...
  2. http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/tools/grunt/GruntParser.java: Loading...
  3. http://svn.apache.org/repos/asf/pig/trunk/test/org/apache/pig/test/TestGrunt.java: Loading...
  4. http://svn.apache.org/repos/asf/pig/trunk/test/org/apache/pig/test/TestPigServer.java: Loading...