Review Board 1.7.22


PIG-3395 Large filter expression makes Pig hang

Review Request #13186 - Created Aug. 1, 2013 and updated

Cheolsoo Park
PIG-3395
Reviewers
pig
pig-git
Refactors PColFilterExtractor so that a large filter expression can be processed w/ reasonable heap size. Currently, a large filter expression that consists of more than 50 or/and expressions chokes Pig.
Adds new test cases to TestPartitionFilterPushDown.

All unit tests pass.
src/org/apache/pig/newplan/PColFilterExtractor.java
Revision 32ba788 New Change
[20] 181 lines
[+20] [+] protected void visit(ProjectExpression project) throws FrontendException {
182
		} else {
182
		} else {
183
			sawNonKeyCol = true;
183
			sawNonKeyCol = true;
184
		}
184
		}
185
	}
185
	}
186

    
   
186

   

    
   
187
     /**

    
   
188
      * Detect whether a non-partition column is present in the expression.

    
   
189
      * @param binOp

    
   
190
      * @return true or false

    
   
191
      * @throws FrontendException

    
   
192
      */

    
   
193
     private boolean detectNonPartitionColumn(BinaryExpression binOp) throws FrontendException {

    
   
194
         LogicalExpression lhs = binOp.getLhs();

    
   
195
         LogicalExpression rhs = binOp.getRhs();

    
   
196
         if (lhs instanceof ProjectExpression) {

    
   
197
             String fieldName = ((ProjectExpression)lhs).getFieldSchema().alias;

    
   
198
             if(!partitionCols.contains(fieldName)) {

    
   
199
                 return true;

    
   
200
             }

    
   
201
         }

    
   
202
         if (rhs instanceof ProjectExpression) {

    
   
203
             String fieldName = ((ProjectExpression)rhs).getFieldSchema().alias;

    
   
204
             if(!partitionCols.contains(fieldName)) {

    
   
205
                 return true;

    
   
206
             }

    
   
207
         }

    
   
208
 

    
   
209
         boolean lhsSawNonKeyCol = false;

    
   
210
         boolean rhsSawNonKeyCol = false;

    
   
211
         if (lhs instanceof BinaryExpression) {

    
   
212
             lhsSawNonKeyCol = detectNonPartitionColumn((BinaryExpression)lhs);

    
   
213
         }

    
   
214
         if (rhs instanceof BinaryExpression) {

    
   
215
             rhsSawNonKeyCol = detectNonPartitionColumn((BinaryExpression)rhs);

    
   
216
         }

    
   
217
 

    
   
218
         return lhsSawNonKeyCol || rhsSawNonKeyCol;

    
   
219
     }

    
   
220
 

    
   
221
     /**

    
   
222
      * Detect and/or expressions that contain both partition and non-partition

    
   
223
      * conditions such as '(pcond and non-pcond) or (pcond and non-pcond)'.

    
   
224
      * @param binOp

    
   
225
      * @return true or false

    
   
226
      * @throws FrontendException

    
   
227
      */

    
   
228
     private boolean detectAndOrConditionWithMixedColumns(BinaryExpression binOp) throws FrontendException {

    
   
229
         LogicalExpression lhs = binOp.getLhs();

    
   
230
         LogicalExpression rhs = binOp.getRhs();

    
   
231
 

    
   
232
         if ( (binOp instanceof OrExpression) &&

    
   
233
              ( (lhs instanceof AndExpression && rhs instanceof AndExpression) ||

    
   
234
                (lhs instanceof OrExpression || rhs instanceof OrExpression) ) ) {

    
   
235
             return detectNonPartitionColumn(binOp);

    
   
236
         }

    
   
237
 

    
   
238
         return false;

    
   
239
     }

    
   
240
 
187
	private void visit(BinaryExpression binOp) throws FrontendException {
241
	private void visit(BinaryExpression binOp) throws FrontendException {
188
		boolean lhsSawKey = false;
242
		boolean lhsSawKey = false;
189
		boolean rhsSawKey = false;
243
		boolean rhsSawKey = false;
190
		boolean lhsSawNonKeyCol = false;
244
		boolean lhsSawNonKeyCol = false;
191
		boolean rhsSawNonKeyCol = false;
245
		boolean rhsSawNonKeyCol = false;
192
        sawKey = false;
246
        sawKey = false;
193
        sawNonKeyCol = false;
247
        sawNonKeyCol = false;
194

    
   
248

   
195
        LogicalExpression binLHS = binOp.getLhs();
249
        if (detectAndOrConditionWithMixedColumns(binOp)) {
196
        LogicalExpression binRHS = binOp.getRhs();

   
197
        // Take care of nested OR as in

   
198
        // ((cond1 and cond2) or (cond3 and cond4) or (cond5 and cond6)) or (cond7 and cond8)

   
199
        if (binOp instanceof OrExpression &&

   
200
                ((binLHS instanceof AndExpression && binRHS instanceof AndExpression) ||

   
201
                  binLHS instanceof OrExpression || binRHS instanceof OrExpression)) {

   
202
            visit(binLHS);

   
203
            lhsSawNonKeyCol = sawNonKeyCol;

   
204
            this.replaceSide = Side.NONE;

   
205
            visit(binRHS);

   
206
            rhsSawNonKeyCol = sawNonKeyCol;

   
207
            this.replaceSide = Side.NONE;

   
208
            if (lhsSawNonKeyCol || rhsSawNonKeyCol || !canPushDown) {

   
209
                sawKey = false;

   
210
                sawNonKeyCol = true;
250
            sawNonKeyCol = true;
211
                // Don't set canPushDown to false. If there are other AND
251
            // Don't set canPushDown to false. If there are other AND
212
                // conditions on a partition column we want to push that down
252
            // conditions on a partition column we want to push that down
213
                LOG.warn("No partition filter push down: You have partition and non-partition "
253
            LOG.warn("No partition filter push down: You have partition and non-partition "
214
                        + "columns  in a construction like: "
254
                    + "columns  in a construction like: "
215
                        + "(pcond and non-pcond ..) or (pcond and non-pcond ...) "
255
                    + "(pcond and non-pcond ..) or (pcond and non-pcond ...) "
216
                        + "where pcond is a condition on a partition column and "
256
                    + "where pcond is a condition on a partition column and "
217
                        + "non-pcond is a condition on a non-partition column.");
257
                    + "non-pcond is a condition on a non-partition column.");
218
                return;
258
            return;
219
            }
259
        }
220
        }

   
221
		visit( binOp.getLhs() );
260
		visit( binOp.getLhs() );
222
		replaceChild(binOp.getLhs());
261
		replaceChild(binOp.getLhs());
223
		lhsSawKey = sawKey;
262
		lhsSawKey = sawKey;
224
		lhsSawNonKeyCol = sawNonKeyCol;
263
		lhsSawNonKeyCol = sawNonKeyCol;
225

    
   
264

   
[+20] [20] 334 lines
test/org/apache/pig/test/TestPartitionFilterPushDown.java
Revision b6b26f4 New Change
 
  1. src/org/apache/pig/newplan/PColFilterExtractor.java: Loading...
  2. test/org/apache/pig/test/TestPartitionFilterPushDown.java: Loading...