diff --git a/bin/ycsb b/bin/ycsb index 8036dc71bbb7f8116f1cf6cd273e8ff953615234..54dcfbbdc77cf566f6e68f48e84f575afb1fd632 100755 --- a/bin/ycsb +++ b/bin/ycsb @@ -56,6 +56,7 @@ DATABASES = { "dynamodb" : "com.yahoo.ycsb.db.DynamoDBClient", "elasticsearch": "com.yahoo.ycsb.db.ElasticSearchClient", "gemfire" : "com.yahoo.ycsb.db.GemFireClient", + "googledatastore" : "com.yahoo.ycsb.db.GoogleDatastoreClient", "hbase094" : "com.yahoo.ycsb.db.HBaseClient", "hbase098" : "com.yahoo.ycsb.db.HBaseClient", "hbase10" : "com.yahoo.ycsb.db.HBaseClient10", diff --git a/distribution/pom.xml b/distribution/pom.xml index 0920467e7ad8cfcabb8703ab2fae69058188fd73..9c11636af9d992b493c94b5f8d6509cc1dcb90c8 100644 --- a/distribution/pom.xml +++ b/distribution/pom.xml @@ -74,6 +74,11 @@ LICENSE file. <artifactId>gemfire-binding</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>com.yahoo.ycsb</groupId> + <artifactId>googledatastore-binding</artifactId> + <version>${project.version}</version> + </dependency> <dependency> <groupId>com.yahoo.ycsb</groupId> <artifactId>hbase094-binding</artifactId> diff --git a/googledatastore/README.md b/googledatastore/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a6755a6522ee100c5ab1a3c3ebad088a2ba31d7b --- /dev/null +++ b/googledatastore/README.md @@ -0,0 +1,91 @@ +<!-- +Copyright (c) 2015 YCSB contributors. +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you +may not use this file except in compliance with the License. You +may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied. See the License for the specific language governing +permissions and limitations under the License. See accompanying +LICENSE file. +--> + +# Google Cloud Datastore Binding + +https://cloud.google.com/datastore/docs/concepts/overview?hl=en + +## Configure + + YCSB_HOME - YCSB home directory + DATASTORE_HOME - Google Cloud Datastore YCSB client package files + +Please refer to https://github.com/brianfrankcooper/YCSB/wiki/Using-the-Database-Libraries +for more information on setup. + +# Benchmark + + $YCSB_HOME/bin/ycsb load googledatastore -P workloads/workloada -P googledatastore.properties + $YCSB_HOME/bin/ycsb run googledatastore -P workloads/workloada -P googledatastore.properties + +# Properties + + $DATASTORE_HOME/conf/googledatastore.properties + +# Details + +A. Configuration and setup: + +See this link for instructions about setting up Google Cloud Datastore and +authentication: + +https://cloud.google.com/datastore/docs/getstarted/start_java/ + +After you setup your environment, you will have 3 pieces of information ready: +- datasetId, +- service account email, and +- a private key file in P12 format. + +These will be configured via corresponding properties in the googledatastore.properties file. + +B. EntityGroupingMode + +In Google Datastore, Entity Group is the unit in which the user can +perform strongly consistent query on multiple items; Meanwhile, Entity group +also has certain limitations in performance, especially with write QPS. + +We support two modes here: + +1. [default] One entity per group (ONE_ENTITY_PER_GROUP) + +In this mode, every entity is a "root" entity and sits in one group, +and every entity group has only one entity. Write QPS is high in this +mode (and there is no documented limitation on this). But query across +multiple entities are eventually consistent. + +When this mode is set, every entity is created with no ancestor key (meaning +the entity itself is the "root" entity). + +2. Multiple entities per group (MULTI_ENTITY_PER_GROUP) + +In this mode, all entities in one benchmark run are placed under one +ancestor (root) node therefore inside one entity group. Query/scan +performed on these entities will be strongly consistent but write QPS +will be subject to documented limitation (current is at 1 QPS). + +Because of the write QPS limit, it's highly recommended that you rate +limit your benchmark's test rate to avoid excessive errors. + +The goal of this MULTI_ENTITY_PER_GROUP mode is to allow user to +benchmark and understand performance characteristics of a single entity +group of the Google Datastore. + +While in this mode, one can optionally specify a root key name. If not +specified, a default name will be used. + + diff --git a/googledatastore/conf/googledatastore.properties b/googledatastore/conf/googledatastore.properties new file mode 100644 index 0000000000000000000000000000000000000000..ac95b570c4ec4f469d6d6ceda0c6c13b3bdd9d29 --- /dev/null +++ b/googledatastore/conf/googledatastore.properties @@ -0,0 +1,56 @@ +# Copyright (c) 2015 YCSB contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. + +# +# Sample property file for Google Cloud Datastore DB client + +## Mandatory parameters +# +# Your credentials to Google datastore. See README.md for details. +# +# googledatastore.datasetId=<string id of your dataset> +# googledatastore.privateKeyFile=<full path to your private key file> +# googledatastore.serviceAccountEmail=<Your service account email> + +# Google Cloud Datastore's read and update APIs do not support +# reading or updating a select subset of properties for an entity. +# (as of version v1beta2-rev1-3.0.2) +# Therefore, it's recommended that you set writeallfields and readallfields +# to true to get stable and comparable performance numbers. +writeallfields = true +readallfields = true + +## Optional parameters +# +# Decides the consistency level of read requests. Acceptable values are: +# EVENTUAL, STRONG (default is STRONG) +# +# googledatastore.readConsistency=STRONG + +# Decides how we group entities into entity groups. +# (See the details section in README.md for documentation) +# +# googledatastore.entityGroupingMode=ONE_ENTITY_PER_GROUP + +# If you set the googledatastore.entityGroupingMode property to +# MULTI_ENTITY_PER_GROUP, you can optionally specify the name of the root entity +# +# googledatastore.rootEntityName="YCSB_ROOT_ENTITY" + +# Strongly recommended to set to uniform. +# requestdistribution = uniform + +# Enable/disable debug message, default is false. +# googledatastore.debug = false \ No newline at end of file diff --git a/googledatastore/pom.xml b/googledatastore/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..e7ab3eac4c9f9b57bb2d63d8ab3499df3ae63586 --- /dev/null +++ b/googledatastore/pom.xml @@ -0,0 +1,73 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- +Copyright (c) 2015 YCSB contributors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you +may not use this file except in compliance with the License. You +may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied. See the License for the specific language governing +permissions and limitations under the License. See accompanying +LICENSE file. +--> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>com.yahoo.ycsb</groupId> + <artifactId>binding-parent</artifactId> + <version>0.6.0-SNAPSHOT</version> + <relativePath>../binding-parent</relativePath> + </parent> + + <artifactId>googledatastore-binding</artifactId> + <name>Google Cloud Datastore Binding</name> + <url>https://github.com/GoogleCloudPlatform/google-cloud-datastore</url> + <dependencies> + <dependency> + <groupId>com.google.apis</groupId> + <artifactId>google-api-services-datastore-protobuf</artifactId> + <version>v1beta2-rev1-3.0.2</version> + </dependency> + <dependency> + <groupId>log4j</groupId> + <artifactId>log4j</artifactId> + <version>1.2.17</version> + </dependency> + <dependency> + <groupId>com.yahoo.ycsb</groupId> + <artifactId>core</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + </dependencies> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-checkstyle-plugin</artifactId> + <version>2.15</version> + <configuration> + <consoleOutput>true</consoleOutput> + <configLocation>../checkstyle.xml</configLocation> + <failOnViolation>true</failOnViolation> + <failsOnError>false</failsOnError> + </configuration> + <executions> + <execution> + <id>validate</id> + <phase>validate</phase> + <goals> + <goal>checkstyle</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> +</project> diff --git a/googledatastore/src/main/java/com/yahoo/ycsb/db/GoogleDatastoreClient.java b/googledatastore/src/main/java/com/yahoo/ycsb/db/GoogleDatastoreClient.java new file mode 100644 index 0000000000000000000000000000000000000000..fb3a81ecb1d7c1ebf491b148db14b494a2315c30 --- /dev/null +++ b/googledatastore/src/main/java/com/yahoo/ycsb/db/GoogleDatastoreClient.java @@ -0,0 +1,338 @@ +/* + * Copyright 2015 YCSB contributors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package com.yahoo.ycsb.db; + +import com.google.api.client.auth.oauth2.Credential; +import com.google.api.services.datastore.DatastoreV1.*; +import com.google.api.services.datastore.DatastoreV1.CommitRequest.Mode; +import com.google.api.services.datastore.DatastoreV1.ReadOptions + .ReadConsistency; +import com.google.api.services.datastore.client.Datastore; +import com.google.api.services.datastore.client.DatastoreException; +import com.google.api.services.datastore.client.DatastoreFactory; +import com.google.api.services.datastore.client.DatastoreHelper; +import com.google.api.services.datastore.client.DatastoreOptions; + +import com.yahoo.ycsb.ByteIterator; +import com.yahoo.ycsb.DB; +import com.yahoo.ycsb.DBException; +import com.yahoo.ycsb.Status; +import com.yahoo.ycsb.StringByteIterator; + +import org.apache.log4j.Level; +import org.apache.log4j.Logger; + +import java.io.IOException; +import java.security.GeneralSecurityException; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.Vector; + +import javax.annotation.Nullable; + +/** + * Google Cloud Datastore Client for YCSB. + */ + +public class GoogleDatastoreClient extends DB { + /** + * Defines a MutationType used in this class. + */ + private enum MutationType { + UPSERT, + UPDATE, + DELETE + } + + /** + * Defines a EntityGroupingMode enum used in this class. + */ + private enum EntityGroupingMode { + ONE_ENTITY_PER_GROUP, + MULTI_ENTITY_PER_GROUP + } + + private static Logger logger = + Logger.getLogger(GoogleDatastoreClient.class); + + // Read consistency defaults to "STRONG" per YCSB guidance. + // User can override this via configure. + private ReadConsistency readConsistency = ReadConsistency.STRONG; + + private EntityGroupingMode entityGroupingMode = + EntityGroupingMode.ONE_ENTITY_PER_GROUP; + + private String rootEntityName; + + private Datastore datastore = null; + + public GoogleDatastoreClient() {} + + /** + * Initialize any state for this DB. Called once per DB instance; there is + * one DB instance per client thread. + */ + @Override + public void init() throws DBException { + String debug = getProperties().getProperty("googledatastore.debug", null); + if (null != debug && "true".equalsIgnoreCase(debug)) { + logger.setLevel(Level.DEBUG); + } + + // We need the following 3 essential properties to initialize datastore: + // + // - DatasetId, + // - Path to private key file, + // - Service account email address. + String datasetId = getProperties().getProperty( + "googledatastore.datasetId", null); + if (datasetId == null) { + throw new DBException( + "Required property \"datasetId\" missing."); + } + + String privateKeyFile = getProperties().getProperty( + "googledatastore.privateKeyFile", null); + if (privateKeyFile == null) { + throw new DBException( + "Required property \"privateKeyFile\" missing."); + } + + String serviceAccountEmail = getProperties().getProperty( + "googledatastore.serviceAccountEmail", null); + if (serviceAccountEmail == null) { + throw new DBException( + "Required property \"serviceAccountEmail\" missing."); + } + + // Below are properties related to benchmarking. + + String readConsistencyConfig = getProperties().getProperty( + "googledatastore.readConsistency", null); + if (readConsistencyConfig != null) { + try { + this.readConsistency = ReadConsistency.valueOf( + readConsistencyConfig.trim().toUpperCase()); + } catch (IllegalArgumentException e) { + throw new DBException("Invalid read consistency specified: " + + readConsistencyConfig + ". Expecting STRONG or EVENTUAL."); + } + } + + // + // Entity Grouping Mode (googledatastore.entitygroupingmode), see + // documentation in conf/googledatastore.properties. + // + String entityGroupingConfig = getProperties().getProperty( + "googledatastore.entityGroupingMode", null); + if (entityGroupingConfig != null) { + try { + this.entityGroupingMode = EntityGroupingMode.valueOf( + entityGroupingConfig.trim().toUpperCase()); + } catch (IllegalArgumentException e) { + throw new DBException("Invalid entity grouping mode specified: " + + entityGroupingConfig + ". Expecting ONE_ENTITY_PER_GROUP or " + + "MULTI_ENTITY_PER_GROUP."); + } + } + + this.rootEntityName = getProperties().getProperty( + "googledatastore.rootEntityName", "YCSB_ROOT_ENTITY"); + + try { + // Setup the connection to Google Cloud Datastore with the credentials + // obtained from the configure. + DatastoreOptions.Builder options = new DatastoreOptions.Builder(); + Credential credential = DatastoreHelper.getServiceAccountCredential( + serviceAccountEmail, privateKeyFile); + logger.info("Using JWT Service Account credential."); + logger.info("DatasetID: " + datasetId + ", Service Account Email: " + + serviceAccountEmail + ", Private Key File Path: " + privateKeyFile); + + datastore = DatastoreFactory.get().create( + options.credential(credential).dataset(datasetId).build()); + + } catch (GeneralSecurityException exception) { + throw new DBException("Security error connecting to the datastore: " + + exception.getMessage(), exception); + + } catch (IOException exception) { + throw new DBException("I/O error connecting to the datastore: " + + exception.getMessage(), exception); + } + + logger.info("Datastore client instance created: " + + datastore.toString()); + } + + @Override + public Status read(String table, String key, Set<String> fields, + HashMap<String, ByteIterator> result) { + LookupRequest.Builder lookupRequest = LookupRequest.newBuilder(); + lookupRequest.addKey(buildPrimaryKey(table, key)); + lookupRequest.getReadOptionsBuilder().setReadConsistency( + this.readConsistency); + // Note above, datastore lookupRequest always reads the entire entity, it + // does not support reading a subset of "fields" (properties) of an entity. + + logger.debug("Built lookup request as: " + lookupRequest.toString()); + + LookupResponse response = null; + try { + response = datastore.lookup(lookupRequest.build()); + + } catch (DatastoreException exception) { + logger.error( + String.format("Datastore Exception when reading (%s): %s %s", + exception.getMessage(), + exception.getMethodName(), + exception.getCode())); + + // DatastoreException.getCode() returns an HTTP response code which we + // will bubble up to the user as part of the YCSB Status "name". + return new Status("ERROR-" + exception.getCode(), exception.getMessage()); + } + + if (response.getFoundCount() == 0) { + return new Status("ERROR-404", "Not Found, key is: " + key); + } else if (response.getFoundCount() > 1) { + // We only asked to lookup for one key, shouldn't have got more than one + // entity back. Unexpected State. + return Status.UNEXPECTED_STATE; + } + + Entity entity = response.getFound(0).getEntity(); + logger.debug("Read entity: " + entity.toString()); + + Map<String, Value> properties = DatastoreHelper.getPropertyMap(entity); + Set<String> propertiesToReturn = + (fields == null ? properties.keySet() : fields); + + for (String name : propertiesToReturn) { + if (properties.containsKey(name)) { + result.put(name, new StringByteIterator(properties.get(name) + .getStringValue())); + } + } + + return Status.OK; + } + + @Override + public Status scan(String table, String startkey, int recordcount, + Set<String> fields, Vector<HashMap<String, ByteIterator>> result) { + // TODO: Implement Scan as query on primary key. + return Status.NOT_IMPLEMENTED; + } + + @Override + public Status update(String table, String key, + HashMap<String, ByteIterator> values) { + + return doSingleItemMutation(table, key, values, MutationType.UPDATE); + } + + @Override + public Status insert(String table, String key, + HashMap<String, ByteIterator> values) { + // Use Upsert to allow overwrite of existing key instead of failing the + // load (or run) just because the DB already has the key. + // This is the same behavior as what other DB does here (such as + // the DynamoDB client). + return doSingleItemMutation(table, key, values, MutationType.UPSERT); + } + + @Override + public Status delete(String table, String key) { + return doSingleItemMutation(table, key, null, MutationType.DELETE); + } + + private Key.Builder buildPrimaryKey(String table, String key) { + Key.Builder result = Key.newBuilder(); + + if (this.entityGroupingMode == EntityGroupingMode.MULTI_ENTITY_PER_GROUP) { + // All entities are in side the same group when we are in this mode. + result.addPathElement(Key.PathElement.newBuilder().setKind(table). + setName(rootEntityName)); + } + + return result.addPathElement(Key.PathElement.newBuilder().setKind(table) + .setName(key)); + } + + private Status doSingleItemMutation(String table, String key, + @Nullable HashMap<String, ByteIterator> values, + MutationType mutationType) { + // First build the key. + Key.Builder datastoreKey = buildPrimaryKey(table, key); + + // Build a commit request in non-transactional mode. + // Single item mutation to google datastore + // is always atomic and strongly consistent. Transaction is only necessary + // for multi-item mutation, or Read-modify-write operation. + CommitRequest.Builder commitRequest = CommitRequest.newBuilder(); + commitRequest.setMode(Mode.NON_TRANSACTIONAL); + + if (mutationType == MutationType.DELETE) { + commitRequest.getMutationBuilder().addDelete(datastoreKey); + + } else { + // If this is not for delete, build the entity. + Entity.Builder entityBuilder = Entity.newBuilder(); + entityBuilder.setKey(datastoreKey); + for (Entry<String, ByteIterator> val : values.entrySet()) { + entityBuilder.addProperty(Property.newBuilder() + .setName(val.getKey()) + .setValue(Value.newBuilder() + .setStringValue(val.getValue().toString()))); + } + Entity entity = entityBuilder.build(); + logger.debug("entity built as: " + entity.toString()); + + if (mutationType == MutationType.UPSERT) { + commitRequest.getMutationBuilder().addUpsert(entity); + } else if (mutationType == MutationType.UPDATE){ + commitRequest.getMutationBuilder().addUpdate(entity); + } else { + throw new RuntimeException("Impossible MutationType, code bug."); + } + } + + try { + datastore.commit(commitRequest.build()); + logger.debug("successfully committed."); + + } catch (DatastoreException exception) { + // Catch all Datastore rpc errors. + // Log the exception, the name of the method called and the error code. + logger.error( + String.format("Datastore Exception when committing (%s): %s %s", + exception.getMessage(), + exception.getMethodName(), + exception.getCode())); + + // DatastoreException.getCode() returns an HTTP response code which we + // will bubble up to the user as part of the YCSB Status "name". + return new Status("ERROR-" + exception.getCode(), exception.getMessage()); + } + + return Status.OK; + } +} diff --git a/googledatastore/src/main/java/com/yahoo/ycsb/db/package-info.java b/googledatastore/src/main/java/com/yahoo/ycsb/db/package-info.java new file mode 100644 index 0000000000000000000000000000000000000000..bccdf1e648e6b9cab0f7c84660c2e884efa2a5db --- /dev/null +++ b/googledatastore/src/main/java/com/yahoo/ycsb/db/package-info.java @@ -0,0 +1,22 @@ +/** + * Copyright (c) 2015 YCSB contributors. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +/** + * YCSB binding for +<a href="https://cloud.google.com/datastore/">Google Cloud Datastore</a>. + */ +package com.yahoo.ycsb.db; diff --git a/googledatastore/src/main/resources/log4j.properties b/googledatastore/src/main/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..f8f8689ef8d7d311f1f3e6e77e343c9fab303482 --- /dev/null +++ b/googledatastore/src/main/resources/log4j.properties @@ -0,0 +1,25 @@ +# Copyright (c) 2015 YCSB contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. + +#define the console appender +log4j.appender.consoleAppender = org.apache.log4j.ConsoleAppender + +# now define the layout for the appender +log4j.appender.consoleAppender.layout = org.apache.log4j.PatternLayout +log4j.appender.consoleAppender.layout.ConversionPattern=%-4r [%t] %-5p %c %x -%m%n + +# now map our console appender as a root logger, means all log messages will go +# to this appender +log4j.rootLogger = INFO, consoleAppender diff --git a/pom.xml b/pom.xml index b47ab1faaf2403310c0c43aa483ec65d6d054a5b..a948a5de7b3c22f8d5e679d75acc78686de32d20 100644 --- a/pom.xml +++ b/pom.xml @@ -108,6 +108,7 @@ LICENSE file. <module>dynamodb</module> <module>elasticsearch</module> <module>gemfire</module> + <module>googledatastore</module> <module>hbase094</module> <module>hbase098</module> <module>hbase10</module>