From eaff913d28ab736d517fb4df2488ecb563fd2fc2 Mon Sep 17 00:00:00 2001 From: Solomon Duskis <sduskis@google.com> Date: Tue, 6 Jun 2017 09:47:40 -0400 Subject: [PATCH] Upgrading googlebigtable to the latest version. The API used by googlebigtable has had quite a bit of churn. This is the minimal set of changes required for the upgrade. Signed-off-by: Chris Larsen <clarsen@yahoo-inc.com> --- googlebigtable/README.md | 23 ++-- googlebigtable/pom.xml | 8 +- .../yahoo/ycsb/db/GoogleBigtableClient.java | 100 ++++++++++-------- pom.xml | 2 +- 4 files changed, 72 insertions(+), 61 deletions(-) diff --git a/googlebigtable/README.md b/googlebigtable/README.md index 3938b525..81b6cf48 100644 --- a/googlebigtable/README.md +++ b/googlebigtable/README.md @@ -21,9 +21,9 @@ This driver provides a YCSB workload binding for Google's hosted Bigtable, the i ## Quickstart -### 1. Setup a Bigtable Cluster +### 1. Setup a Bigtable Instance -Login to the Google Cloud Console and follow the [Creating Cluster](https://cloud.google.com/bigtable/docs/creating-cluster) steps. Make a note of your cluster name, zone and project ID. +Login to the Google Cloud Console and follow the [Creating Instance](https://cloud.google.com/bigtable/docs/creating-instance) steps. Make a note of your instance ID and project ID. ### 2. Launch the Bigtable Shell @@ -40,29 +40,25 @@ hbase(main):002:0> create 'usertable', 'cf', {SPLITS => (1..n_splits).map {|i| " Make a note of the column family, in this example it's `cf``. -### 4. Fetch the Proper ALPN Boot Jar - -The Bigtable protocol uses HTTP/2 which requires an ALPN protocol negotiation implementation. On JVM instantiation the implementation must be loaded before attempting to connect to the cluster. If you're using Java 7 or 8, use this [Jetty Version Table](http://www.eclipse.org/jetty/documentation/current/alpn-chapter.html#alpn-versions) to determine the version appropriate for your JVM. (ALPN is included in JDK 9+). Download the proper jar from [Maven](http://search.maven.org/#search%7Cgav%7C1%7Cg%3A%22org.mortbay.jetty.alpn%22%20AND%20a%3A%22alpn-boot%22) somewhere on your system. - -### 5. Download JSON Credentials +### 4. Download JSON Credentials Follow these instructions for [Generating a JSON key](https://cloud.google.com/bigtable/docs/installing-hbase-shell#service-account) and save it to your host. -### 6. Load a Workload +### 5. Load a Workload -Switch to the root of the YCSB repo and choose the workload you want to run and `load` it first. With the CLI you must provide the column family, cluster properties and the ALPN jar to load. +Switch to the root of the YCSB repo and choose the workload you want to run and `load` it first. With the CLI you must provide the column family and instance properties to load. ``` -bin/ycsb load googlebigtable -p columnfamily=cf -p google.bigtable.project.id=<PROJECT_ID> -p google.bigtable.cluster.name=<CLUSTER> -p google.bigtable.zone.name=<ZONE> -p google.bigtable.auth.service.account.enable=true -p google.bigtable.auth.json.keyfile=<PATH_TO_JSON_KEY> -jvm-args='-Xbootclasspath/p:<PATH_TO_ALPN_JAR>' -P workloads/workloada +bin/ycsb load googlebigtable -p columnfamily=cf -p google.bigtable.project.id=<PROJECT_ID> -p google.bigtable.instance.id=<INSTANCE> -p google.bigtable.auth.json.keyfile=<PATH_TO_JSON_KEY> -P workloads/workloada ``` -Make sure to replace the variables in the angle brackets above with the proper value from your cluster. Additional configuration parameters are available below. +Make sure to replace the variables in the angle brackets above with the proper value from your instance. Additional configuration parameters are available below. The `load` step only executes inserts into the datastore. After loading data, run the same workload to mix reads with writes. ``` -bin/ycsb run googlebigtable -p columnfamily=cf -p google.bigtable.project.id=<PROJECT_ID> -p google.bigtable.cluster.name=<CLUSTER> -p google.bigtable.zone.name=<ZONE> -p google.bigtable.auth.service.account.enable=true -p google.bigtable.auth.json.keyfile=<PATH_TO_JSON_KEY> -jvm-args='-Xbootclasspath/p:<PATH_TO_ALPN_JAR>' -P workloads/workloada +bin/ycsb run googlebigtable -p columnfamily=cf -p google.bigtable.project.id=<PROJECT_ID> -p google.bigtable.instance.id=<INSTANCE> -p google.bigtable.auth.json.keyfile=<PATH_TO_JSON_KEY> -P workloads/workloada ``` @@ -72,8 +68,7 @@ The following options can be configured using CLI (using the `-p` parameter) or * `columnfamily`: (Required) The Bigtable column family to target. * `google.bigtable.project.id`: (Required) The ID of a Bigtable project. -* `google.bigtable.cluster.name`: (Required) The name of a Bigtable cluster. -* `google.bigtable.zone.name`: (Required) Zone where the Bigtable cluster is running. +* `google.bigtable.instance.id`: (Required) The name of a Bigtable instance. * `google.bigtable.auth.service.account.enable`: Whether or not to authenticate with a service account. The default is true. * `google.bigtable.auth.json.keyfile`: (Required) A service account key for authentication. * `debug`: If true, prints debug information to standard out. The default is false. diff --git a/googlebigtable/pom.xml b/googlebigtable/pom.xml index 61eb48c4..7353e53a 100644 --- a/googlebigtable/pom.xml +++ b/googlebigtable/pom.xml @@ -36,6 +36,12 @@ LICENSE file. <version>${googlebigtable.version}</version> </dependency> + <dependency> + <groupId>io.netty</groupId> + <artifactId>netty-tcnative-boringssl-static</artifactId> + <version>1.1.33.Fork26</version> + </dependency> + <dependency> <groupId>com.yahoo.ycsb</groupId> <artifactId>core</artifactId> @@ -44,4 +50,4 @@ LICENSE file. </dependency> </dependencies> -</project> \ No newline at end of file +</project> diff --git a/googlebigtable/src/main/java/com/yahoo/ycsb/db/GoogleBigtableClient.java b/googlebigtable/src/main/java/com/yahoo/ycsb/db/GoogleBigtableClient.java index d0d21dda..c035032c 100644 --- a/googlebigtable/src/main/java/com/yahoo/ycsb/db/GoogleBigtableClient.java +++ b/googlebigtable/src/main/java/com/yahoo/ycsb/db/GoogleBigtableClient.java @@ -34,23 +34,24 @@ import java.util.Vector; import java.util.concurrent.ExecutionException; import com.google.bigtable.repackaged.com.google.protobuf.ByteString; -import com.google.bigtable.repackaged.com.google.protobuf.ServiceException; -import com.google.bigtable.v1.Column; -import com.google.bigtable.v1.Family; -import com.google.bigtable.v1.MutateRowRequest; -import com.google.bigtable.v1.Mutation; -import com.google.bigtable.v1.ReadRowsRequest; -import com.google.bigtable.v1.Row; -import com.google.bigtable.v1.RowFilter; -import com.google.bigtable.v1.RowRange; -import com.google.bigtable.v1.Mutation.DeleteFromRow; -import com.google.bigtable.v1.Mutation.SetCell; -import com.google.bigtable.v1.RowFilter.Chain.Builder; +import com.google.bigtable.v2.Column; +import com.google.bigtable.v2.Family; +import com.google.bigtable.v2.MutateRowRequest; +import com.google.bigtable.v2.Mutation; +import com.google.bigtable.v2.ReadRowsRequest; +import com.google.bigtable.v2.Row; +import com.google.bigtable.v2.RowFilter; +import com.google.bigtable.v2.RowRange; +import com.google.bigtable.v2.RowSet; +import com.google.bigtable.v2.Mutation.DeleteFromRow; +import com.google.bigtable.v2.Mutation.SetCell; +import com.google.bigtable.v2.RowFilter.Chain.Builder; import com.google.cloud.bigtable.config.BigtableOptions; import com.google.cloud.bigtable.grpc.BigtableDataClient; import com.google.cloud.bigtable.grpc.BigtableSession; +import com.google.cloud.bigtable.grpc.BigtableTableName; import com.google.cloud.bigtable.grpc.async.AsyncExecutor; -import com.google.cloud.bigtable.grpc.async.HeapSizeManager; +import com.google.cloud.bigtable.grpc.async.BulkMutation; import com.google.cloud.bigtable.hbase.BigtableOptionsFactory; import com.google.cloud.bigtable.util.ByteStringer; import com.yahoo.ycsb.ByteArrayByteIterator; @@ -89,7 +90,6 @@ public class GoogleBigtableClient extends com.yahoo.ycsb.DB { /** Thread loacal Bigtable native API objects. */ private BigtableDataClient client; - private HeapSizeManager heapSizeManager; private AsyncExecutor asyncExecutor; /** The column family use for the workload. */ @@ -105,13 +105,21 @@ public class GoogleBigtableClient extends com.yahoo.ycsb.DB { */ private boolean clientSideBuffering = false; + private BulkMutation bulkMutation; + @Override public void init() throws DBException { Properties props = getProperties(); // Defaults the user can override if needed - CONFIG.set("google.bigtable.auth.service.account.enable", "true"); - + if (getProperties().containsKey(ASYNC_MUTATOR_MAX_MEMORY)) { + CONFIG.set(BigtableOptionsFactory.BIGTABLE_BUFFERED_MUTATOR_MAX_MEMORY_KEY, + getProperties().getProperty(ASYNC_MUTATOR_MAX_MEMORY)); + } + if (getProperties().containsKey(ASYNC_MAX_INFLIGHT_RPCS)) { + CONFIG.set(BigtableOptionsFactory.BIGTABLE_BULK_MAX_ROW_KEY_COUNT, + getProperties().getProperty(ASYNC_MAX_INFLIGHT_RPCS)); + } // make it easy on ourselves by copying all CLI properties into the config object. final Iterator<Entry<Object, Object>> it = props.entrySet().iterator(); while (it.hasNext()) { @@ -143,14 +151,7 @@ public class GoogleBigtableClient extends com.yahoo.ycsb.DB { } if (clientSideBuffering) { - heapSizeManager = new HeapSizeManager( - Long.parseLong( - getProperties().getProperty(ASYNC_MUTATOR_MAX_MEMORY, - Long.toString(AsyncExecutor.ASYNC_MUTATOR_MAX_MEMORY_DEFAULT))), - Integer.parseInt( - getProperties().getProperty(ASYNC_MAX_INFLIGHT_RPCS, - Integer.toString(AsyncExecutor.MAX_INFLIGHT_RPCS_DEFAULT)))); - asyncExecutor = new AsyncExecutor(client, heapSizeManager); + asyncExecutor = session.createAsyncExecutor(); } } @@ -169,6 +170,13 @@ public class GoogleBigtableClient extends com.yahoo.ycsb.DB { @Override public void cleanup() throws DBException { + if (bulkMutation != null) { + try { + bulkMutation.flush(); + } catch(RuntimeException e){ + throw new DBException(e); + } + } if (asyncExecutor != null) { try { asyncExecutor.flush(); @@ -226,7 +234,8 @@ public class GoogleBigtableClient extends com.yahoo.ycsb.DB { final ReadRowsRequest.Builder rrr = ReadRowsRequest.newBuilder() .setTableNameBytes(ByteStringer.wrap(lastTableBytes)) .setFilter(filter) - .setRowKey(ByteStringer.wrap(key.getBytes())); + .setRows(RowSet.newBuilder() + .addRowKeys(ByteStringer.wrap(key.getBytes()))); List<Row> rows; try { @@ -292,13 +301,17 @@ public class GoogleBigtableClient extends com.yahoo.ycsb.DB { } final RowRange range = RowRange.newBuilder() - .setStartKey(ByteStringer.wrap(startkey.getBytes())) + .setStartKeyClosed(ByteStringer.wrap(startkey.getBytes())) .build(); - + + final RowSet rowSet = RowSet.newBuilder() + .addRowRanges(range) + .build(); + final ReadRowsRequest.Builder rrr = ReadRowsRequest.newBuilder() .setTableNameBytes(ByteStringer.wrap(lastTableBytes)) .setFilter(filter) - .setRowRange(range); + .setRows(rowSet); List<Row> rows; try { @@ -372,19 +385,14 @@ public class GoogleBigtableClient extends com.yahoo.ycsb.DB { try { if (clientSideBuffering) { - asyncExecutor.mutateRowAsync(rowMutation.build()); + bulkMutation.add(rowMutation.build()); } else { client.mutateRow(rowMutation.build()); } return Status.OK; - } catch (ServiceException e) { + } catch (RuntimeException e) { System.err.println("Failed to insert key: " + key + " " + e.getMessage()); return Status.ERROR; - } catch (InterruptedException e) { - System.err.println("Interrupted while inserting key: " + key + " " - + e.getMessage()); - Thread.currentThread().interrupt(); - return Status.ERROR; // never get here, but lets make the compiler happy } } @@ -410,19 +418,14 @@ public class GoogleBigtableClient extends com.yahoo.ycsb.DB { try { if (clientSideBuffering) { - asyncExecutor.mutateRowAsync(rowMutation.build()); + bulkMutation.add(rowMutation.build()); } else { client.mutateRow(rowMutation.build()); } return Status.OK; - } catch (ServiceException e) { + } catch (RuntimeException e) { System.err.println("Failed to delete key: " + key + " " + e.getMessage()); return Status.ERROR; - } catch (InterruptedException e) { - System.err.println("Interrupted while delete key: " + key + " " - + e.getMessage()); - Thread.currentThread().interrupt(); - return Status.ERROR; // never get here, but lets make the compiler happy } } @@ -434,11 +437,18 @@ public class GoogleBigtableClient extends com.yahoo.ycsb.DB { private void setTable(final String table) { if (!lastTable.equals(table)) { lastTable = table; - lastTableBytes = options - .getClusterName() - .toTableName(table) + BigtableTableName tableName = options + .getInstanceName() + .toTableName(table); + lastTableBytes = tableName .toString() .getBytes(); + synchronized(this) { + if (bulkMutation != null) { + bulkMutation.flush(); + } + bulkMutation = session.createBulkMutation(tableName, asyncExecutor); + } } } diff --git a/pom.xml b/pom.xml index 96ea2ceb..9910871e 100644 --- a/pom.xml +++ b/pom.xml @@ -77,7 +77,7 @@ LICENSE file. <cassandra.cql.version>3.0.0</cassandra.cql.version> <geode.version>1.2.0</geode.version> <azuredocumentdb.version>1.8.1</azuredocumentdb.version> - <googlebigtable.version>0.2.3</googlebigtable.version> + <googlebigtable.version>0.9.7</googlebigtable.version> <infinispan.version>7.2.2.Final</infinispan.version> <kudu.version>1.1.0</kudu.version> <openjpa.jdbc.version>2.1.1</openjpa.jdbc.version> -- GitLab