From 744e859f79b00f464109c92311acfb272e7f5844 Mon Sep 17 00:00:00 2001
From: Connor McCoy <connormccoy@google.com>
Date: Fri, 18 Sep 2015 17:00:56 -0700
Subject: [PATCH] [hbase] Use a PageFilter in HBase scans.

Other bindings limit the number of results retrieved from the server.
The HBase bindings just close the scanner once they have received the
desired number of records. Adding a PageFilter matches the behavior of
other bindings, and may improve performance.
---
 .../main/java/com/yahoo/ycsb/db/HBaseClient.java  | 15 ++++++---------
 .../java/com/yahoo/ycsb/db/HBaseClient10.java     |  5 +++++
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/hbase098/src/main/java/com/yahoo/ycsb/db/HBaseClient.java b/hbase098/src/main/java/com/yahoo/ycsb/db/HBaseClient.java
index 4dc5bc72..bd1299db 100644
--- a/hbase098/src/main/java/com/yahoo/ycsb/db/HBaseClient.java
+++ b/hbase098/src/main/java/com/yahoo/ycsb/db/HBaseClient.java
@@ -17,32 +17,25 @@
 
 package com.yahoo.ycsb.db;
 
-
 import com.yahoo.ycsb.DBException;
 import com.yahoo.ycsb.ByteIterator;
 import com.yahoo.ycsb.ByteArrayByteIterator;
+import com.yahoo.ycsb.measurements.Measurements;
 
 import java.io.IOException;
 import java.util.*;
-//import java.util.HashMap;
-//import java.util.Properties;
-//import java.util.Set;
-//import java.util.Vector;
 
-import com.yahoo.ycsb.measurements.Measurements;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.client.HTable;
-//import org.apache.hadoop.hbase.client.Scanner;
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Delete;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.ResultScanner;
-//import org.apache.hadoop.hbase.io.Cell;
-//import org.apache.hadoop.hbase.io.RowResult;
+import org.apache.hadoop.hbase.filter.PageFilter;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 
@@ -246,6 +239,7 @@ public class HBaseClient extends com.yahoo.ycsb.DB
         //HBase has no record limit.  Here, assume recordcount is small enough to bring back in one call.
         //We get back recordcount records
         s.setCaching(recordcount);
+        s.setFilter(new PageFilter(recordcount));
 
         //add specified fields or else all fields
         if (fields == null)
@@ -284,6 +278,9 @@ public class HBaseClient extends com.yahoo.ycsb.DB
                 //add rowResult to result vector
                 result.add(rowResult);
                 numResults++;
+
+                // PageFilter does not guarantee that the number of results is <= pageSize, so this
+                // break is required.
                 if (numResults >= recordcount) //if hit recordcount, bail out
                 {
                     break;
diff --git a/hbase10/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java b/hbase10/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java
index f77e37c0..16189add 100644
--- a/hbase10/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java
+++ b/hbase10/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.ResultScanner;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.PageFilter;
 import org.apache.hadoop.hbase.util.Bytes;
 
 import java.io.IOException;
@@ -291,6 +292,7 @@ public class HBaseClient10 extends com.yahoo.ycsb.DB
         //HBase has no record limit.  Here, assume recordcount is small enough to bring back in one call.
         //We get back recordcount records
         s.setCaching(recordcount);
+        s.setFilter(new PageFilter(recordcount));
 
         //add specified fields or else all fields
         if (fields == null)
@@ -332,6 +334,9 @@ public class HBaseClient10 extends com.yahoo.ycsb.DB
                 //add rowResult to result vector
                 result.add(rowResult);
                 numResults++;
+
+                // PageFilter does not guarantee that the number of results is <= pageSize, so this
+                // break is required.
                 if (numResults >= recordcount) //if hit recordcount, bail out
                 {
                     break;
-- 
GitLab