Review Board 1.7.22


HBASE-2214 per scan max buffersize

Review Request #4726 - Created April 15, 2012 and updated

ferdy
0.94
HBASE-2214
Reviewers
hbase
tedyu
hbase
HBASE-2214 per scan max buffersize.
It works when running this test:



    new HBaseTestingUtility(conf).startMiniCluster();
 
    HBaseAdmin admin = new HBaseAdmin(conf);
    if (!admin.tableExists("test")) {
      HTableDescriptor tableDesc = new HTableDescriptor("test");
      tableDesc.addFamily(new HColumnDescriptor("fam"));
      admin.createTable(tableDesc);
    }
    
    
    HTable table = new HTable(conf, "test");
    Put put; 
    
    put = new Put(Bytes.toBytes("row1"));
    put.add(Bytes.toBytes("fam"),Bytes.toBytes("qual1"),Bytes.toBytes("val1"));
    table.put(put);
    
    put = new Put(Bytes.toBytes("row2"));
    put.add(Bytes.toBytes("fam"),Bytes.toBytes("qual2"),Bytes.toBytes("val2"));
    table.put(put);
    
    put = new Put(Bytes.toBytes("row3"));
    put.add(Bytes.toBytes("fam"),Bytes.toBytes("qual3"),Bytes.toBytes("val3"));
    table.put(put);
    
    table.flushCommits();
    //put a logging statement to ClientScanner#next() to see the effect.
    {
      System.out.println("returns all rows at once because of the caching");
      Scan scan = new Scan();
      scan.setCaching(100);
      ResultScanner scanner = table.getScanner(scan);
      scanner.next(100);
    }
    {
      System.out.println("returns one row at a time because of the maxResultSize");
      Scan scan = new Scan();
      scan.setCaching(100);
      scan.setMaxResultSize(1);
      ResultScanner scanner = table.getScanner(scan);
      scanner.next(100);
    }



See output:

returns all rows at once because of the caching
2012-04-25 22:18:47,494 DEBUG [main] client.ClientScanner(94): Creating scanner over test starting at key ''
2012-04-25 22:18:47,494 DEBUG [main] client.ClientScanner(206): Advancing internal scanner to startKey at ''
2012-04-25 22:18:47,499 DEBUG [main] client.ClientScanner(323): Rows returned 3
2012-04-25 22:18:47,502 DEBUG [main] client.ClientScanner(193): Finished with scanning at {NAME => 'test,,1335385126388.ed23a82f3d6ca2eab571918843796259.', STARTKEY => '', ENDKEY => '', ENCODED => ed23a82f3d6ca2eab571918843796259,}
returns one row at a time because of the maxResultSize
2012-04-25 22:18:47,504 DEBUG [main] client.ClientScanner(94): Creating scanner over test starting at key ''
2012-04-25 22:18:47,505 DEBUG [main] client.ClientScanner(206): Advancing internal scanner to startKey at ''
2012-04-25 22:18:47,514 DEBUG [main] client.ClientScanner(323): Rows returned 1
2012-04-25 22:18:47,517 DEBUG [main] client.ClientScanner(323): Rows returned 1
2012-04-25 22:18:47,522 DEBUG [main] client.ClientScanner(323): Rows returned 1
Review request changed
Updated (May 1, 2012, 7:50 a.m.)
  • It works when running this test:
    
    
    
        new HBaseTestingUtility(conf).startMiniCluster();
     
        HBaseAdmin admin = new HBaseAdmin(conf);
        if (!admin.tableExists("test")) {
          HTableDescriptor tableDesc = new HTableDescriptor("test");
          tableDesc.addFamily(new HColumnDescriptor("fam"));
          admin.createTable(tableDesc);
        }
        
        
        HTable table = new HTable(conf, "test");
        Put put; 
        
        put = new Put(Bytes.toBytes("row1"));
        put.add(Bytes.toBytes("fam"),Bytes.toBytes("qual1"),Bytes.toBytes("val1"));
        table.put(put);
        
        put = new Put(Bytes.toBytes("row2"));
        put.add(Bytes.toBytes("fam"),Bytes.toBytes("qual2"),Bytes.toBytes("val2"));
        table.put(put);
        
        put = new Put(Bytes.toBytes("row3"));
        put.add(Bytes.toBytes("fam"),Bytes.toBytes("qual3"),Bytes.toBytes("val3"));
        table.put(put);
        
        table.flushCommits();
        {
          System.out.println("returns all rows at once because of the caching");
          Scan scan = new Scan();
          scan.setCaching(100);
          ResultScanner scanner = table.getScanner(scan);
          scanner.next(100);
        }
        {
          System.out.println("returns one row at a time because of the maxResultSize");
          Scan scan = new Scan();
          scan.setCaching(100);
          scan.setMaxResultSize(1);
          ResultScanner scanner = table.getScanner(scan);
          scanner.next(100);
        }
    
    
    
    See output:
    
    returns all rows at once because of the caching
    2012-04-25 22:18:47,494 DEBUG [main] client.ClientScanner(94): Creating scanner over test starting at key ''
    2012-04-25 22:18:47,494 DEBUG [main] client.ClientScanner(206): Advancing internal scanner to startKey at ''
    2012-04-25 22:18:47,499 DEBUG [main] client.ClientScanner(323): Rows returned 3
    2012-04-25 22:18:47,502 DEBUG [main] client.ClientScanner(193): Finished with scanning at {NAME => 'test,,1335385126388.ed23a82f3d6ca2eab571918843796259.', STARTKEY => '', ENDKEY => '', ENCODED => ed23a82f3d6ca2eab571918843796259,}
    returns one row at a time because of the maxResultSize
    2012-04-25 22:18:47,504 DEBUG [main] client.ClientScanner(94): Creating scanner over test starting at key ''
    2012-04-25 22:18:47,505 DEBUG [main] client.ClientScanner(206): Advancing internal scanner to startKey at ''
    2012-04-25 22:18:47,514 DEBUG [main] client.ClientScanner(323): Rows returned 1
    2012-04-25 22:18:47,517 DEBUG [main] client.ClientScanner(323): Rows returned 1
    2012-04-25 22:18:47,522 DEBUG [main] client.ClientScanner(323): Rows returned 1

    It works when running this test:
    
    
    
        new HBaseTestingUtility(conf).startMiniCluster();
     
        HBaseAdmin admin = new HBaseAdmin(conf);
        if (!admin.tableExists("test")) {
          HTableDescriptor tableDesc = new HTableDescriptor("test");
          tableDesc.addFamily(new HColumnDescriptor("fam"));
          admin.createTable(tableDesc);
        }
        
        
        HTable table = new HTable(conf, "test");
        Put put; 
        
        put = new Put(Bytes.toBytes("row1"));
        put.add(Bytes.toBytes("fam"),Bytes.toBytes("qual1"),Bytes.toBytes("val1"));
        table.put(put);
        
        put = new Put(Bytes.toBytes("row2"));
        put.add(Bytes.toBytes("fam"),Bytes.toBytes("qual2"),Bytes.toBytes("val2"));
        table.put(put);
        
        put = new Put(Bytes.toBytes("row3"));
        put.add(Bytes.toBytes("fam"),Bytes.toBytes("qual3"),Bytes.toBytes("val3"));
        table.put(put);
        
        table.flushCommits();
        //put a logging statement to ClientScanner#next() to see the effect.
        {
          System.out.println("returns all rows at once because of the caching");
          Scan scan = new Scan();
          scan.setCaching(100);
          ResultScanner scanner = table.getScanner(scan);
          scanner.next(100);
        }
        {
          System.out.println("returns one row at a time because of the maxResultSize");
          Scan scan = new Scan();
          scan.setCaching(100);
          scan.setMaxResultSize(1);
          ResultScanner scanner = table.getScanner(scan);
          scanner.next(100);
        }
    
    
    
    See output:
    
    returns all rows at once because of the caching
    2012-04-25 22:18:47,494 DEBUG [main] client.ClientScanner(94): Creating scanner over test starting at key ''
    2012-04-25 22:18:47,494 DEBUG [main] client.ClientScanner(206): Advancing internal scanner to startKey at ''
    2012-04-25 22:18:47,499 DEBUG [main] client.ClientScanner(323): Rows returned 3
    2012-04-25 22:18:47,502 DEBUG [main] client.ClientScanner(193): Finished with scanning at {NAME => 'test,,1335385126388.ed23a82f3d6ca2eab571918843796259.', STARTKEY => '', ENDKEY => '', ENCODED => ed23a82f3d6ca2eab571918843796259,}
    returns one row at a time because of the maxResultSize
    2012-04-25 22:18:47,504 DEBUG [main] client.ClientScanner(94): Creating scanner over test starting at key ''
    2012-04-25 22:18:47,505 DEBUG [main] client.ClientScanner(206): Advancing internal scanner to startKey at ''
    2012-04-25 22:18:47,514 DEBUG [main] client.ClientScanner(323): Rows returned 1
    2012-04-25 22:18:47,517 DEBUG [main] client.ClientScanner(323): Rows returned 1
    2012-04-25 22:18:47,522 DEBUG [main] client.ClientScanner(323): Rows returned 1
v5 (updated to head and removed debug "rows returned" statement)