Review Board 1.7.22


select * may incorrectly return empty columns for hbase-handler.

Review Request #13263 - Created Aug. 4, 2013 and updated

Swarnim Kulkarni
HIVE-4995
Reviewers
hive
hive-git
select * may incorrectly return empty columns for hbase-handler. Please refer to the JIRA for a detailed description of the issue.
Additional integration tests added to prove that the attached patch resolves the issue.
hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
Revision e3a8133 New Change
[20] 106 lines
[+20] [+] public class HiveHBaseTableInputFormat extends TableInputFormatBase
107

    
   
107

   
108
    boolean addAll = (readColIDs.size() == 0);
108
    boolean addAll = (readColIDs.size() == 0);
109
    Scan scan = new Scan();
109
    Scan scan = new Scan();
110
    boolean empty = true;
110
    boolean empty = true;
111

    
   
111

   

    
   
112
    // The list of families that have been added to the scan

    
   
113
    List<String> addedFamilies = new ArrayList<String>();

    
   
114

   
112
    if (!addAll) {
115
    if (!addAll) {
113
      for (int i : readColIDs) {
116
      for (int i : readColIDs) {
114
        ColumnMapping colMap = columnsMapping.get(i);
117
        ColumnMapping colMap = columnsMapping.get(i);
115
        if (colMap.hbaseRowKey) {
118
        if (colMap.hbaseRowKey) {
116
          continue;
119
          continue;
117
        }
120
        }
118

    
   
121

   
119
        if (colMap.qualifierName == null) {
122
        if (colMap.qualifierName == null) {
120
          scan.addFamily(colMap.familyNameBytes);
123
          scan.addFamily(colMap.familyNameBytes);

    
   
124
          addedFamilies.add(colMap.familyName);
121
        } else {
125
        } else {

    
   
126
          if(!addedFamilies.contains(colMap.familyName)){

    
   
127
            // add only if the corresponding family has not already been added
122
          scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
128
            scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
123
        }
129
          }

    
   
130
        }
124

    
   
131

   
125
        empty = false;
132
        empty = false;
126
      }
133
      }
127
    }
134
    }
128

    
   
135

   
[+20] [20] 327 lines
[+20] [+] public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
456
      throw new IOException(e);
463
      throw new IOException(e);
457
    }
464
    }
458

    
   
465

   
459
    Scan scan = new Scan();
466
    Scan scan = new Scan();
460

    
   
467

   

    
   
468
    // The list of families that have been added to the scan

    
   
469
    List<String> addedFamilies = new ArrayList<String>();

    
   
470

   
461
    // REVIEW:  are we supposed to be applying the getReadColumnIDs
471
    // REVIEW:  are we supposed to be applying the getReadColumnIDs
462
    // same as in getRecordReader?
472
    // same as in getRecordReader?
463
    for (int i = 0; i <columnsMapping.size(); i++) {
473
    for (int i = 0; i <columnsMapping.size(); i++) {
464
      ColumnMapping colMap = columnsMapping.get(i);
474
      ColumnMapping colMap = columnsMapping.get(i);
465
      if (colMap.hbaseRowKey) {
475
      if (colMap.hbaseRowKey) {
466
        continue;
476
        continue;
467
      }
477
      }
468

    
   
478

   
469
      if (colMap.qualifierName == null) {
479
      if (colMap.qualifierName == null) {
470
        scan.addFamily(colMap.familyNameBytes);
480
        scan.addFamily(colMap.familyNameBytes);

    
   
481
        addedFamilies.add(colMap.familyName);
471
      } else {
482
      } else {

    
   
483
        if(!addedFamilies.contains(colMap.familyName)){

    
   
484
          // add the column only if the family has not already been added
472
        scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
485
          scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
473
      }
486
        }
474
    }
487
      }

    
   
488
    }
475

    
   
489

   
476
    // Take filter pushdown into account while calculating splits; this
490
    // Take filter pushdown into account while calculating splits; this
477
    // allows us to prune off regions immediately.  Note that although
491
    // allows us to prune off regions immediately.  Note that although
478
    // the Javadoc for the superclass getSplits says that it returns one
492
    // the Javadoc for the superclass getSplits says that it returns one
479
    // split per region, the implementation actually takes the scan
493
    // split per region, the implementation actually takes the scan
[+20] [20] 46 lines
hbase-handler/src/test/queries/positive/hbase_binary_map_queries_prefix.q
New File
 
hbase-handler/src/test/results/positive/hbase_binary_map_queries_prefix.q.out
New File
 
  1. hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java: Loading...
  2. hbase-handler/src/test/queries/positive/hbase_binary_map_queries_prefix.q: Loading...
  3. hbase-handler/src/test/results/positive/hbase_binary_map_queries_prefix.q.out: Loading...