From f64194ba25b987515ed7b33b3d1ddca49d5a430c Mon Sep 17 00:00:00 2001 From: Michael Nitschinger Date: Wed, 6 Apr 2016 08:32:30 +0200 Subject: [PATCH 1/9] [couchbase2] Add Support for Couchbase SDK 2.x --- bin/ycsb | 1 + couchbase2/README.md | 115 +++ couchbase2/pom.xml | 48 + .../ycsb/db/couchbase2/Couchbase2Client.java | 896 ++++++++++++++++++ .../ycsb/db/couchbase2/package-info.java | 22 + distribution/pom.xml | 5 + pom.xml | 2 + 7 files changed, 1089 insertions(+) create mode 100644 couchbase2/README.md create mode 100644 couchbase2/pom.xml create mode 100644 couchbase2/src/main/java/com/yahoo/ycsb/db/couchbase2/Couchbase2Client.java create mode 100644 couchbase2/src/main/java/com/yahoo/ycsb/db/couchbase2/package-info.java diff --git a/bin/ycsb b/bin/ycsb index 16c5a82a..a1a4ea20 100755 --- a/bin/ycsb +++ b/bin/ycsb @@ -58,6 +58,7 @@ DATABASES = { "cassandra-cql": "com.yahoo.ycsb.db.CassandraCQLClient", "cassandra2-cql": "com.yahoo.ycsb.db.CassandraCQLClient", "couchbase" : "com.yahoo.ycsb.db.CouchbaseClient", + "couchbase2" : "com.yahoo.ycsb.db.couchbase2.Couchbase2Client", "dynamodb" : "com.yahoo.ycsb.db.DynamoDBClient", "elasticsearch": "com.yahoo.ycsb.db.ElasticsearchClient", "geode" : "com.yahoo.ycsb.db.GeodeClient", diff --git a/couchbase2/README.md b/couchbase2/README.md new file mode 100644 index 00000000..455a4eea --- /dev/null +++ b/couchbase2/README.md @@ -0,0 +1,115 @@ + + +# Couchbase (SDK 2.x) Driver for YCSB +This driver is a binding for the YCSB facilities to operate against a Couchbase Server cluster. It uses the official +Couchbase Java SDK (version 2.x) and provides a rich set of configuration options, including support for the N1QL +query language. + +## Quickstart + +### 1. Start Couchbase Server +You need to start a single node or a cluster to point the client at. Please see [http://couchbase.com](couchbase.com) +for more details and instructions. + +### 2. Set up YCSB +You can either download the release zip and run it, or just clone from master. + +``` +git clone git://github.com/brianfrankcooper/YCSB.git +cd YCSB +mvn clean package +``` + +### 3. Run the Workload +Before you can actually run the workload, you need to "load" the data first. + +``` +bin/ycsb load couchbase2 -s -P workloads/workloada +``` + +Then, you can run the workload: + +``` +bin/ycsb run couchbase2 -s -P workloads/workloada +``` + +Please see the general instructions in the `doc` folder if you are not sure how it all works. You can apply a property +(as seen in the next section) like this: + +``` +bin/ycsb run couchbase -s -P workloads/workloada -p couchbase.epoll=true +``` + +## N1QL Index Setup +In general, every time N1QL is used (either implicitly through using `workloade` or through setting `kv=false`) some +kind of index must be present to make it work. Depending on the workload and data size, choosing the right index is +crucial at runtime in order to get the best performance. If in doubt, please ask at the +[forums](http://forums.couchbase.com) or get in touch with our team at Couchbase. + +For `workloade` and the default `readallfields=true` we recommend creating the following index, and if using Couchbase +Server 4.5 or later with the "Memory Optimized Index" setting on the bucket. + +``` +CREATE INDEX wle_idx ON `bucketname`(meta().id); +``` + +For other workloads, different index setups might be even more performant. + +## Performance Considerations +As it is with any benchmark, there are lot of knobs to tune in order to get great or (if you are reading +this and trying to write a competitor benchmark ;-)) bad performance. + +The first setting you should consider, if you are running on Linux 64bit is setting `-p couchbase.epoll=true`. This will +then turn on the Epoll IO mechanisms in the underlying Netty library which provides better performance since it has less +synchronization to do than the NIO default. This only works on Linux, but you are benchmarking on the OS you are +deploying to, right? + +The second option, `boost`, sounds more magic than it actually is. By default this benchmark trades CPU for throughput, +but this can be disabled by setting `-p couchbase.boost=0`. This defaults to 3, and 3 is the number of event loops run +in the IO layer. 3 is a reasonable default but you should set it to the number of **physical** cores you have available +on the machine if you only plan to run one YCSB instance. Make sure (using profiling) to max out your cores, but don't +overdo it. + +## Sync vs Async +By default, since YCSB is sync the code will always wait for the operation to complete. In some cases it can be useful +to just "drive load" and disable the waiting. Note that when the "-p couchbase.syncMutationResponse=false" option is +used, the measured results by YCSB can basically be thrown away. Still helpful sometimes during load phases to speed +them up :) + +## Configuration Options +Since no setup is the same and the goal of YCSB is to deliver realistic benchmarks, here are some setups that you can +tune. Note that if you need more flexibility (let's say a custom transcoder), you still need to extend this driver and +implement the facilities on your own. + +You can set the following properties (with the default settings applied): + + - couchbase.host=127.0.0.1: The hostname from one server. + - couchbase.bucket=default: The bucket name to use. + - couchbase.password=: The password of the bucket. + - couchbase.syncMutationResponse=true: If mutations should wait for the response to complete. + - couchbase.persistTo=0: Persistence durability requirement + - couchbase.replicateTo=0: Replication durability requirement + - couchbase.upsert=false: Use upsert instead of insert or replace. + - couchbase.adhoc=false: If set to true, prepared statements are not used. + - couchbase.kv=true: If set to false, mutation operations will also be performed through N1QL. + - couchbase.maxParallelism=1: The server parallelism for all n1ql queries. + - couchbase.kvEndpoints=1: The number of KV sockets to open per server. + - couchbase.queryEndpoints=5: The number of N1QL Query sockets to open per server. + - couchbase.epoll=false: If Epoll instead of NIO should be used (only available for linux. + - couchbase.boost=3: If > 0 trades CPU for higher throughput. N is the number of event loops, ideally + set to the number of physical cores. Setting higher than that will likely degrade performance. \ No newline at end of file diff --git a/couchbase2/pom.xml b/couchbase2/pom.xml new file mode 100644 index 00000000..f152a856 --- /dev/null +++ b/couchbase2/pom.xml @@ -0,0 +1,48 @@ + + + + + 4.0.0 + + com.yahoo.ycsb + binding-parent + 0.9.0-SNAPSHOT + ../binding-parent + + + couchbase2-binding + Couchbase Java SDK 2.x Binding + jar + + + + com.couchbase.client + java-client + ${couchbase2.version} + + + com.yahoo.ycsb + core + ${project.version} + provided + + + + diff --git a/couchbase2/src/main/java/com/yahoo/ycsb/db/couchbase2/Couchbase2Client.java b/couchbase2/src/main/java/com/yahoo/ycsb/db/couchbase2/Couchbase2Client.java new file mode 100644 index 00000000..41695243 --- /dev/null +++ b/couchbase2/src/main/java/com/yahoo/ycsb/db/couchbase2/Couchbase2Client.java @@ -0,0 +1,896 @@ +/** + * Copyright (c) 2016 Yahoo! Inc. All rights reserved. + *

+ * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package com.yahoo.ycsb.db.couchbase2; + +import com.couchbase.client.core.env.DefaultCoreEnvironment; +import com.couchbase.client.core.env.resources.IoPoolShutdownHook; +import com.couchbase.client.core.logging.CouchbaseLogger; +import com.couchbase.client.core.logging.CouchbaseLoggerFactory; +import com.couchbase.client.deps.com.fasterxml.jackson.core.JsonFactory; +import com.couchbase.client.deps.com.fasterxml.jackson.core.JsonGenerator; +import com.couchbase.client.deps.com.fasterxml.jackson.databind.JsonNode; +import com.couchbase.client.deps.com.fasterxml.jackson.databind.node.ObjectNode; +import com.couchbase.client.deps.io.netty.channel.DefaultSelectStrategyFactory; +import com.couchbase.client.deps.io.netty.channel.EventLoopGroup; +import com.couchbase.client.deps.io.netty.channel.SelectStrategy; +import com.couchbase.client.deps.io.netty.channel.SelectStrategyFactory; +import com.couchbase.client.deps.io.netty.channel.epoll.EpollEventLoopGroup; +import com.couchbase.client.deps.io.netty.channel.nio.NioEventLoopGroup; +import com.couchbase.client.deps.io.netty.util.IntSupplier; +import com.couchbase.client.deps.io.netty.util.concurrent.DefaultThreadFactory; +import com.couchbase.client.java.Bucket; +import com.couchbase.client.java.Cluster; +import com.couchbase.client.java.CouchbaseCluster; +import com.couchbase.client.java.PersistTo; +import com.couchbase.client.java.ReplicateTo; +import com.couchbase.client.java.document.Document; +import com.couchbase.client.java.document.RawJsonDocument; +import com.couchbase.client.java.document.json.JsonArray; +import com.couchbase.client.java.document.json.JsonObject; +import com.couchbase.client.java.env.CouchbaseEnvironment; +import com.couchbase.client.java.env.DefaultCouchbaseEnvironment; +import com.couchbase.client.java.error.TemporaryFailureException; +import com.couchbase.client.java.query.*; +import com.couchbase.client.java.transcoder.JacksonTransformers; +import com.couchbase.client.java.util.Blocking; +import com.yahoo.ycsb.ByteIterator; +import com.yahoo.ycsb.DB; +import com.yahoo.ycsb.DBException; +import com.yahoo.ycsb.Status; +import com.yahoo.ycsb.StringByteIterator; +import rx.Observable; +import rx.Subscriber; +import rx.functions.Action1; +import rx.functions.Func1; + +import java.io.StringWriter; +import java.io.Writer; +import java.nio.channels.spi.SelectorProvider; +import java.util.*; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.LockSupport; + +/** + * A class that wraps the 2.x Couchbase SDK to be used with YCSB. + * + *

The following options can be passed when using this database client to override the defaults. + * + *

+ */ +public class Couchbase2Client extends DB { + + static { + // No need to send the full encoded_plan for this benchmark workload, less network overhead! + System.setProperty("com.couchbase.query.encodedPlanEnabled", "false"); + } + + private static final CouchbaseLogger LOGGER = CouchbaseLoggerFactory.getInstance(Couchbase2Client.class); + private static final Object INIT_COORDINATOR = new Object(); + + private static volatile CouchbaseEnvironment env = null; + + private Cluster cluster; + private Bucket bucket; + private String bucketName; + private boolean upsert; + private PersistTo persistTo; + private ReplicateTo replicateTo; + private boolean syncMutResponse; + private boolean epoll; + private long kvTimeout; + private boolean adhoc; + private boolean kv; + private int maxParallelism; + private String host; + private int kvEndpoints; + private int queryEndpoints; + private int boost; + + @Override + public void init() throws DBException { + Properties props = getProperties(); + + host = props.getProperty("couchbase.host", "127.0.0.1"); + bucketName = props.getProperty("couchbase.bucket", "default"); + String bucketPassword = props.getProperty("couchbase.password", ""); + + upsert = props.getProperty("couchbase.upsert", "false").equals("true"); + persistTo = parsePersistTo(props.getProperty("couchbase.persistTo", "0")); + replicateTo = parseReplicateTo(props.getProperty("couchbase.replicateTo", "0")); + syncMutResponse = props.getProperty("couchbase.syncMutationResponse", "true").equals("true"); + adhoc = props.getProperty("couchbase.adhoc", "false").equals("true"); + kv = props.getProperty("couchbase.kv", "true").equals("true"); + maxParallelism = Integer.parseInt(props.getProperty("couchbase.maxParallelism", "1")); + kvEndpoints = Integer.parseInt(props.getProperty("couchbase.kvEndpoints", "1")); + queryEndpoints = Integer.parseInt(props.getProperty("couchbase.queryEndpoints", "5")); + epoll = props.getProperty("couchbase.epoll", "false").equals("true"); + boost = Integer.parseInt(props.getProperty("couchbase.boost", "3")); + + try { + synchronized (INIT_COORDINATOR) { + if (env == null) { + DefaultCouchbaseEnvironment.Builder builder = DefaultCouchbaseEnvironment + .builder() + .queryEndpoints(queryEndpoints) + .callbacksOnIoPool(true) + .kvEndpoints(kvEndpoints); + + // Tune boosting and epoll based on settings + SelectStrategyFactory factory = boost > 0 ? + new BackoffSelectStrategyFactory() : DefaultSelectStrategyFactory.INSTANCE; + + int poolSize = boost > 0 ? boost : Integer.parseInt( + System.getProperty("com.couchbase.ioPoolSize", Integer.toString(DefaultCoreEnvironment.IO_POOL_SIZE)) + ); + ThreadFactory threadFactory = new DefaultThreadFactory("cb-io", true); + + EventLoopGroup group = epoll ? new EpollEventLoopGroup(poolSize, threadFactory, factory) + : new NioEventLoopGroup(poolSize, threadFactory, SelectorProvider.provider(), factory); + builder.ioPool(group, new IoPoolShutdownHook(group)); + + env = builder.build(); + logParams(); + } + } + + cluster = CouchbaseCluster.create(env, host); + bucket = cluster.openBucket(bucketName, bucketPassword); + kvTimeout = env.kvTimeout(); + } catch (Exception ex) { + throw new DBException("Could not connect to Couchbase Bucket.", ex); + } + + if (!kv && !syncMutResponse) { + throw new DBException("Not waiting for N1QL responses on mutations not yet implemented."); + } + } + + /** + * Helper method to log the CLI params so that on the command line debugging is easier. + */ + private void logParams() { + StringBuilder sb = new StringBuilder(); + + sb.append("host=").append(host); + sb.append(", bucket=").append(bucketName); + sb.append(", upsert=").append(upsert); + sb.append(", persistTo=").append(persistTo); + sb.append(", replicateTo=").append(replicateTo); + sb.append(", syncMutResponse=").append(syncMutResponse); + sb.append(", adhoc=").append(adhoc); + sb.append(", kv=").append(kv); + sb.append(", maxParallelism=").append(maxParallelism); + sb.append(", queryEndpoints=").append(queryEndpoints); + sb.append(", kvEndpoints=").append(kvEndpoints); + sb.append(", queryEndpoints=").append(queryEndpoints); + sb.append(", epoll=").append(epoll); + sb.append(", boost=").append(boost); + + LOGGER.info("===> Using Params: " + sb.toString()); + } + + @Override + public Status read(final String table, final String key, Set fields, + final HashMap result) { + try { + String docId = formatId(table, key); + if (kv) { + return readKv(docId, fields, result); + } else { + return readN1ql(docId, fields, result); + } + } catch (Exception ex) { + ex.printStackTrace(); + return Status.ERROR; + } + } + + /** + * Performs the {@link #read(String, String, Set, HashMap)} operation via Key/Value ("get"). + * + * @param docId the document ID + * @param fields the fields to be loaded + * @param result the result map where the doc needs to be converted into + * @return The result of the operation. + */ + private Status readKv(final String docId, final Set fields, final HashMap result) + throws Exception { + RawJsonDocument loaded = bucket.get(docId, RawJsonDocument.class); + if (loaded == null) { + return Status.NOT_FOUND; + } + decode(loaded.content(), fields, result); + return Status.OK; + } + + /** + * Performs the {@link #read(String, String, Set, HashMap)} operation via N1QL ("SELECT"). + * + * If this option should be used, the "-p couchbase.kv=false" property must be set. + * + * @param docId the document ID + * @param fields the fields to be loaded + * @param result the result map where the doc needs to be converted into + * @return The result of the operation. + */ + private Status readN1ql(final String docId, Set fields, final HashMap result) + throws Exception { + String readQuery = "SELECT " + joinFields(fields) + " FROM `" + bucketName + "` USE KEYS [$1]"; + N1qlQueryResult queryResult = bucket.query(N1qlQuery.parameterized( + readQuery, + JsonArray.from(docId), + N1qlParams.build().adhoc(adhoc).maxParallelism(maxParallelism) + )); + + if (!queryResult.parseSuccess() || !queryResult.finalSuccess()) { + throw new DBException("Error while parsing N1QL Result. Query: " + readQuery + + ", Errors: " + queryResult.errors()); + } + + N1qlQueryRow row; + try { + row = queryResult.rows().next(); + } catch (NoSuchElementException ex) { + return Status.NOT_FOUND; + } + + JsonObject content = row.value(); + if (fields == null) { + content = content.getObject(bucketName); // n1ql result set scoped under *.bucketName + fields = content.getNames(); + } + + for (String field : fields) { + Object value = content.get(field); + result.put(field, new StringByteIterator(value != null ? value.toString() : "")); + } + + return Status.OK; + } + + @Override + public Status update(final String table, final String key, final HashMap values) { + if (upsert) { + return upsert(table, key, values); + } + + try { + String docId = formatId(table, key); + if (kv) { + return updateKv(docId, values); + } else { + return updateN1ql(docId, values); + } + } catch (Exception ex) { + ex.printStackTrace(); + return Status.ERROR; + } + } + + /** + * Performs the {@link #update(String, String, HashMap)} operation via Key/Value ("replace"). + * + * @param docId the document ID + * @param values the values to update the document with. + * @return The result of the operation. + */ + private Status updateKv(final String docId, final HashMap values) { + waitForMutationResponse(bucket.async().replace( + RawJsonDocument.create(docId, encode(values)), + persistTo, + replicateTo + )); + return Status.OK; + } + + /** + * Performs the {@link #update(String, String, HashMap)} operation via N1QL ("UPDATE"). + * + * If this option should be used, the "-p couchbase.kv=false" property must be set. + * + * @param docId the document ID + * @param values the values to update the document with. + * @return The result of the operation. + */ + private Status updateN1ql(final String docId, final HashMap values) + throws Exception { + String fields = encodeN1qlFields(values); + String updateQuery = "UPDATE `" + bucketName + "` USE KEYS [$1] SET " + fields; + + N1qlQueryResult queryResult = bucket.query(N1qlQuery.parameterized( + updateQuery, + JsonArray.from(docId), + N1qlParams.build().adhoc(adhoc).maxParallelism(maxParallelism) + )); + + if (!queryResult.parseSuccess() || !queryResult.finalSuccess()) { + throw new DBException("Error while parsing N1QL Result. Query: " + updateQuery + + ", Errors: " + queryResult.errors()); + } + return Status.OK; + } + + @Override + public Status insert(final String table, final String key, final HashMap values) { + if (upsert) { + return upsert(table, key, values); + } + + try { + String docId = formatId(table, key); + if (kv) { + return insertKv(docId, values); + } else { + return insertN1ql(docId, values); + } + } catch (Exception ex) { + ex.printStackTrace(); + return Status.ERROR; + } + } + + /** + * Performs the {@link #insert(String, String, HashMap)} operation via Key/Value ("INSERT"). + * + * Note that during the "load" phase it makes sense to retry TMPFAILS (so that even if the server is + * overloaded temporarily the ops will succeed eventually). The current code will retry TMPFAILs + * for maximum of one minute and then bubble up the error. + * + * @param docId the document ID + * @param values the values to update the document with. + * @return The result of the operation. + */ + private Status insertKv(final String docId, final HashMap values) { + int tries = 60; // roughly 60 seconds with the 1 second sleep, not 100% accurate. + + for(int i = 0; i < tries; i++) { + try { + waitForMutationResponse(bucket.async().insert( + RawJsonDocument.create(docId, encode(values)), + persistTo, + replicateTo + )); + return Status.OK; + } catch (TemporaryFailureException ex) { + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + throw new RuntimeException("Interrupted while sleeping on TMPFAIL backoff.", ex); + } + } + } + + throw new RuntimeException("Still receiving TMPFAIL from the server after trying " + tries + " times. " + + "Check your server."); + } + + /** + * Performs the {@link #insert(String, String, HashMap)} operation via N1QL ("INSERT"). + * + * If this option should be used, the "-p couchbase.kv=false" property must be set. + * + * @param docId the document ID + * @param values the values to update the document with. + * @return The result of the operation. + */ + private Status insertN1ql(final String docId, final HashMap values) + throws Exception { + String insertQuery = "INSERT INTO `" + bucketName + "`(KEY,VALUE) VALUES ($1,$2)"; + + N1qlQueryResult queryResult = bucket.query(N1qlQuery.parameterized( + insertQuery, + JsonArray.from(docId, valuesToJsonObject(values)), + N1qlParams.build().adhoc(adhoc).maxParallelism(maxParallelism) + )); + + if (!queryResult.parseSuccess() || !queryResult.finalSuccess()) { + throw new DBException("Error while parsing N1QL Result. Query: " + insertQuery + + ", Errors: " + queryResult.errors()); + } + return Status.OK; + } + + /** + * Performs an upsert instead of insert or update using either Key/Value or N1QL. + * + * If this option should be used, the "-p couchbase.upsert=true" property must be set. + * + * @param table The name of the table + * @param key The record key of the record to insert. + * @param values A HashMap of field/value pairs to insert in the record + * @return The result of the operation. + */ + private Status upsert(final String table, final String key, final HashMap values) { + try { + String docId = formatId(table, key); + if (kv) { + return upsertKv(docId, values); + } else { + return upsertN1ql(docId, values); + } + } catch (Exception ex) { + ex.printStackTrace(); + return Status.ERROR; + } + } + + /** + * Performs the {@link #upsert(String, String, HashMap)} operation via Key/Value ("upsert"). + * + * If this option should be used, the "-p couchbase.upsert=true" property must be set. + * + * @param docId the document ID + * @param values the values to update the document with. + * @return The result of the operation. + */ + private Status upsertKv(final String docId, final HashMap values) { + waitForMutationResponse(bucket.async().upsert( + RawJsonDocument.create(docId, encode(values)), + persistTo, + replicateTo + )); + return Status.OK; + } + + /** + * Performs the {@link #upsert(String, String, HashMap)} operation via N1QL ("UPSERT"). + * + * If this option should be used, the "-p couchbase.upsert=true -p couchbase.kv=false" properties must be set. + * + * @param docId the document ID + * @param values the values to update the document with. + * @return The result of the operation. + */ + private Status upsertN1ql(final String docId, final HashMap values) + throws Exception { + String upsertQuery = "UPSERT INTO `" + bucketName + "`(KEY,VALUE) VALUES ($1,$2)"; + + N1qlQueryResult queryResult = bucket.query(N1qlQuery.parameterized( + upsertQuery, + JsonArray.from(docId, valuesToJsonObject(values)), + N1qlParams.build().adhoc(adhoc).maxParallelism(maxParallelism) + )); + + if (!queryResult.parseSuccess() || !queryResult.finalSuccess()) { + throw new DBException("Error while parsing N1QL Result. Query: " + upsertQuery + + ", Errors: " + queryResult.errors()); + } + return Status.OK; + } + + @Override + public Status delete(final String table, final String key) { + try { + String docId = formatId(table, key); + if (kv) { + return deleteKv(docId); + } else { + return deleteN1ql(docId); + } + } catch (Exception ex) { + ex.printStackTrace(); + return Status.ERROR; + } + } + + /** + * Performs the {@link #delete(String, String)} (String, String)} operation via Key/Value ("remove"). + * + * @param docId the document ID. + * @return The result of the operation. + */ + private Status deleteKv(final String docId) { + waitForMutationResponse(bucket.async().remove( + docId, + persistTo, + replicateTo + )); + return Status.OK; + } + + /** + * Performs the {@link #delete(String, String)} (String, String)} operation via N1QL ("DELETE"). + * + * If this option should be used, the "-p couchbase.kv=false" property must be set. + * + * @param docId the document ID. + * @return The result of the operation. + */ + private Status deleteN1ql(final String docId) throws Exception { + String deleteQuery = "DELETE FROM `" + bucketName + "` USE KEYS [$1]"; + N1qlQueryResult queryResult = bucket.query(N1qlQuery.parameterized( + deleteQuery, + JsonArray.from(docId), + N1qlParams.build().adhoc(adhoc).maxParallelism(maxParallelism) + )); + + if (!queryResult.parseSuccess() || !queryResult.finalSuccess()) { + throw new DBException("Error while parsing N1QL Result. Query: " + deleteQuery + + ", Errors: " + queryResult.errors()); + } + return Status.OK; + } + + @Override + public Status scan(final String table, final String startkey, final int recordcount, final Set fields, + final Vector> result) { + try { + if (fields == null || fields.isEmpty()) { + return scanAllFields(table, startkey, recordcount, result); + } else { + return scanSpecificFields(table, startkey, recordcount, fields, result); + } + } catch (Exception ex) { + ex.printStackTrace(); + return Status.ERROR; + } + } + + /** + * Performs the {@link #scan(String, String, int, Set, Vector)} operation, optimized for all fields. + * + * Since the full document bodies need to be loaded anyways, it makes sense to just grab the document IDs + * from N1QL and then perform the bulk loading via KV for better performance. This is a usual pattern with + * Couchbase and shows the benefits of using both N1QL and KV together. + * + * @param table The name of the table + * @param startkey The record key of the first record to read. + * @param recordcount The number of records to read + * @param result A Vector of HashMaps, where each HashMap is a set field/value pairs for one record + * @return The result of the operation. + */ + private Status scanAllFields(final String table, final String startkey, final int recordcount, + final Vector> result) { + final String scanQuery = "SELECT meta().id as id FROM `" + bucketName + "` WHERE meta().id >= '$1' LIMIT $2"; + Collection> documents = bucket.async() + .query(N1qlQuery.parameterized( + scanQuery, + JsonArray.from(formatId(table, startkey), recordcount), + N1qlParams.build().adhoc(adhoc).maxParallelism(maxParallelism) + )) + .doOnNext(new Action1() { + @Override + public void call(AsyncN1qlQueryResult result) { + if (!result.parseSuccess()) { + throw new RuntimeException("Error while parsing N1QL Result. Query: " + scanQuery + + ", Errors: " + result.errors()); + } + } + }) + .flatMap(new Func1>() { + @Override + public Observable call(AsyncN1qlQueryResult result) { + return result.rows(); + } + }) + .flatMap(new Func1>() { + @Override + public Observable call(AsyncN1qlQueryRow row) { + return bucket.async().get(row.value().getString("id"), RawJsonDocument.class); + } + }) + .map(new Func1>() { + @Override + public HashMap call(RawJsonDocument document) { + HashMap tuple = new HashMap(); + decode(document.content(), null, tuple); + return tuple; + } + }) + .toList() + .toBlocking() + .single(); + + result.addAll(documents); + return Status.OK; + } + + /** + * Performs the {@link #scan(String, String, int, Set, Vector)} operation N1Ql only for a subset of the fields. + * + * @param table The name of the table + * @param startkey The record key of the first record to read. + * @param recordcount The number of records to read + * @param fields The list of fields to read, or null for all of them + * @param result A Vector of HashMaps, where each HashMap is a set field/value pairs for one record + * @return The result of the operation. + */ + private Status scanSpecificFields(final String table, final String startkey, final int recordcount, + final Set fields, final Vector> result) { + String scanQuery = "SELECT " + joinFields(fields) + " FROM `" + bucketName + "` WHERE meta().id >= '$1' LIMIT $2"; + N1qlQueryResult queryResult = bucket.query(N1qlQuery.parameterized( + scanQuery, + JsonArray.from(formatId(table, startkey), recordcount), + N1qlParams.build().adhoc(adhoc).maxParallelism(maxParallelism) + )); + + if (!queryResult.parseSuccess() || !queryResult.finalSuccess()) { + throw new RuntimeException("Error while parsing N1QL Result. Query: " + scanQuery + + ", Errors: " + queryResult.errors()); + } + + boolean allFields = fields == null || fields.isEmpty(); + result.ensureCapacity(recordcount); + + for (N1qlQueryRow row : queryResult) { + JsonObject value = row.value(); + if (fields == null) { + value = value.getObject(bucketName); + } + Set f = allFields ? value.getNames() : fields; + HashMap tuple = new HashMap(f.size()); + for (String field : f) { + tuple.put(field, new StringByteIterator(value.getString(field))); + } + result.add(tuple); + } + return Status.OK; + } + + /** + * Helper method to block on the response, depending on the property set. + * + * By default, since YCSB is sync the code will always wait for the operation to complete. In some + * cases it can be useful to just "drive load" and disable the waiting. Note that when the + * "-p couchbase.syncMutationResponse=false" option is used, the measured results by YCSB can basically + * be thrown away. Still helpful sometimes during load phases to speed them up :) + * + * @param input the async input observable. + */ + private void waitForMutationResponse(final Observable> input) { + if (!syncMutResponse) { + input.subscribe(new Subscriber>() { + @Override + public void onCompleted() { + } + + @Override + public void onError(Throwable e) { + } + + @Override + public void onNext(Document document) { + } + }); + } else { + Blocking.blockForSingle(input, kvTimeout, TimeUnit.MILLISECONDS); + } + } + + /** + * Helper method to turn the values into a String, used with {@link #upsertN1ql(String, HashMap)}. + * + * @param values the values to encode. + * @return the encoded string. + */ + private static String encodeN1qlFields(final HashMap values) { + if (values.isEmpty()) { + return ""; + } + + StringBuilder sb = new StringBuilder(); + for (Map.Entry entry : values.entrySet()) { + String raw = entry.getValue().toString(); + String escaped = raw.replace("\"", "\\\"").replace("\'", "\\\'"); + sb.append(entry.getKey()).append("=\"").append(escaped).append("\" "); + } + String toReturn = sb.toString(); + return toReturn.substring(0, toReturn.length() - 1); + } + + /** + * Helper method to turn the map of values into a {@link JsonObject} for further use. + * + * @param values the values to transform. + * @return the created json object. + */ + private static JsonObject valuesToJsonObject(final HashMap values) { + JsonObject result = JsonObject.create(); + for (Map.Entry entry : values.entrySet()) { + result.put(entry.getKey(), entry.getValue().toString()); + } + return result; + } + + /** + * Helper method to join the set of fields into a String suitable for N1QL. + * + * @param fields the fields to join. + * @return the joined fields as a String. + */ + private static String joinFields(final Set fields) { + if (fields == null || fields.isEmpty()) { + return "*"; + } + StringBuilder builder = new StringBuilder(); + for (String f : fields) { + builder.append("`").append(f).append("`").append(","); + } + String toReturn = builder.toString(); + return toReturn.substring(0, toReturn.length() - 1); + } + + /** + * Helper method to turn the prefix and key into a proper document ID. + * + * @param prefix the prefix (table). + * @param key the key itself. + * @return a document ID that can be used with Couchbase. + */ + private static String formatId(final String prefix, final String key) { + return prefix + ":" + key; + } + + /** + * Helper method to parse the "ReplicateTo" property on startup. + * + * @param property the proeprty to parse. + * @return the parsed setting. + */ + private static ReplicateTo parseReplicateTo(final String property) throws DBException { + int value = Integer.parseInt(property); + + switch (value) { + case 0: + return ReplicateTo.NONE; + case 1: + return ReplicateTo.ONE; + case 2: + return ReplicateTo.TWO; + case 3: + return ReplicateTo.THREE; + default: + throw new DBException("\"couchbase.replicateTo\" must be between 0 and 3"); + } + } + + /** + * Helper method to parse the "PersistTo" property on startup. + * + * @param property the proeprty to parse. + * @return the parsed setting. + */ + private static PersistTo parsePersistTo(final String property) throws DBException { + int value = Integer.parseInt(property); + + switch (value) { + case 0: + return PersistTo.NONE; + case 1: + return PersistTo.ONE; + case 2: + return PersistTo.TWO; + case 3: + return PersistTo.THREE; + case 4: + return PersistTo.FOUR; + default: + throw new DBException("\"couchbase.persistTo\" must be between 0 and 4"); + } + } + + /** + * Decode the String from server and pass it into the decoded destination. + * + * @param source the loaded object. + * @param fields the fields to check. + * @param dest the result passed back to YCSB. + */ + private void decode(final String source, final Set fields, + final HashMap dest) { + try { + JsonNode json = JacksonTransformers.MAPPER.readTree(source); + boolean checkFields = fields != null && !fields.isEmpty(); + for (Iterator> jsonFields = json.fields(); jsonFields.hasNext();) { + Map.Entry jsonField = jsonFields.next(); + String name = jsonField.getKey(); + if (checkFields && fields.contains(name)) { + continue; + } + JsonNode jsonValue = jsonField.getValue(); + if (jsonValue != null && !jsonValue.isNull()) { + dest.put(name, new StringByteIterator(jsonValue.asText())); + } + } + } catch (Exception e) { + throw new RuntimeException("Could not decode JSON"); + } + } + + /** + * Encode the source into a String for storage. + * + * @param source the source value. + * @return the encoded string. + */ + private String encode(final HashMap source) { + HashMap stringMap = StringByteIterator.getStringMap(source); + ObjectNode node = JacksonTransformers.MAPPER.createObjectNode(); + for (Map.Entry pair : stringMap.entrySet()) { + node.put(pair.getKey(), pair.getValue()); + } + JsonFactory jsonFactory = new JsonFactory(); + Writer writer = new StringWriter(); + try { + JsonGenerator jsonGenerator = jsonFactory.createGenerator(writer); + JacksonTransformers.MAPPER.writeTree(jsonGenerator, node); + } catch (Exception e) { + throw new RuntimeException("Could not encode JSON value"); + } + return writer.toString(); + } +} + +/** + * Factory for the {@link BackoffSelectStrategy} to be used with boosting. + */ +class BackoffSelectStrategyFactory implements SelectStrategyFactory { + @Override + public SelectStrategy newSelectStrategy() { + return new BackoffSelectStrategy(); + } +} + +/** + * Custom IO select strategy which trades CPU for throughput, used with the boost setting. + */ +class BackoffSelectStrategy implements SelectStrategy { + + private int counter = 0; + + @Override + public int calculateStrategy(final IntSupplier supplier, final boolean hasTasks) throws Exception { + int selectNowResult = supplier.get(); + if (hasTasks || selectNowResult != 0) { + counter = 0; + return selectNowResult; + } + counter++; + + if (counter > 2000) { + LockSupport.parkNanos(1); + } else if (counter > 3000) { + Thread.yield(); + } else if (counter > 4000) { + LockSupport.parkNanos(1000); + } else if (counter > 5000) { + // defer to blocking select + counter = 0; + return SelectStrategy.SELECT; + } + + return SelectStrategy.CONTINUE; + } +} diff --git a/couchbase2/src/main/java/com/yahoo/ycsb/db/couchbase2/package-info.java b/couchbase2/src/main/java/com/yahoo/ycsb/db/couchbase2/package-info.java new file mode 100644 index 00000000..0eb3b399 --- /dev/null +++ b/couchbase2/src/main/java/com/yahoo/ycsb/db/couchbase2/package-info.java @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2015 - 2016 YCSB contributors. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +/** + * The YCSB binding for Couchbase, new driver. + */ +package com.yahoo.ycsb.db.couchbase2; + diff --git a/distribution/pom.xml b/distribution/pom.xml index e61e5b03..0027c615 100644 --- a/distribution/pom.xml +++ b/distribution/pom.xml @@ -59,6 +59,11 @@ LICENSE file. couchbase-binding ${project.version} + + com.yahoo.ycsb + couchbase2-binding + ${project.version} + com.yahoo.ycsb dynamodb-binding diff --git a/pom.xml b/pom.xml index d1f662fb..5f6e1509 100644 --- a/pom.xml +++ b/pom.xml @@ -90,6 +90,7 @@ LICENSE file. 0.8.0 0.9.5.6 1.4.10 + 2.2.6 1.6.5 3.1.2 5.4.0 @@ -105,6 +106,7 @@ LICENSE file. cassandra cassandra2 couchbase + couchbase2 distribution dynamodb elasticsearch From 57e1ab5a0cae13d1766c8fa6bdbf9d9117ee50d0 Mon Sep 17 00:00:00 2001 From: stack Date: Fri, 8 Apr 2016 16:13:43 -0700 Subject: [PATCH 2/9] [hbase10] Still too many threads #691. Fix broken synchronization meant to guard against over-creation of cluster connection instances on startup when multiple threads. --- .../java/com/yahoo/ycsb/db/HBaseClient10.java | 56 ++++++++++++------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/hbase10/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java b/hbase10/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java index a41c1987..da72f4f8 100644 --- a/hbase10/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java +++ b/hbase10/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java @@ -51,6 +51,7 @@ import java.util.HashMap; import java.util.Map; import java.util.Set; import java.util.Vector; +import java.util.concurrent.atomic.AtomicInteger; /** * HBase 1.0 client for YCSB framework. @@ -63,14 +64,22 @@ import java.util.Vector; */ public class HBaseClient10 extends com.yahoo.ycsb.DB { private Configuration config = HBaseConfiguration.create(); - - // Must be an object for synchronization and tracking running thread counts. - private static Integer threadCount = 0; + + private static AtomicInteger threadCount = new AtomicInteger(0); private boolean debug = false; private String tableName = ""; + + /** + * A Cluster Connection instance that is shared by all running ycsb threads. + * Needs to be initialized late so we pick up command-line configs if any. + * To ensure one instance only in a multi-threaded context, guard access + * with a 'lock' object. + * @See #CONNECTION_LOCK. + */ private static Connection connection = null; + private static final Object CONNECTION_LOCK = new Object(); // Depending on the value of clientSideBuffering, either bufferedMutator // (clientSideBuffering) or currentTable (!clientSideBuffering) will be used. @@ -121,10 +130,10 @@ public class HBaseClient10 extends com.yahoo.ycsb.DB { UserGroupInformation.setConfiguration(config); } - if ((getProperties().getProperty("principal")!=null) + if ((getProperties().getProperty("principal")!=null) && (getProperties().getProperty("keytab")!=null)) { try { - UserGroupInformation.loginUserFromKeytab(getProperties().getProperty("principal"), + UserGroupInformation.loginUserFromKeytab(getProperties().getProperty("principal"), getProperties().getProperty("keytab")); } catch (IOException e) { System.err.println("Keytab file is not readable or not found"); @@ -133,9 +142,10 @@ public class HBaseClient10 extends com.yahoo.ycsb.DB { } try { - synchronized(threadCount) { - ++threadCount; + threadCount.getAndIncrement(); + synchronized (CONNECTION_LOCK) { if (connection == null) { + // Initialize if not set up already. connection = ConnectionFactory.createConnection(config); } } @@ -166,7 +176,9 @@ public class HBaseClient10 extends com.yahoo.ycsb.DB { String table = com.yahoo.ycsb.workloads.CoreWorkload.table; try { final TableName tName = TableName.valueOf(table); - connection.getTable(tName).getTableDescriptor(); + synchronized (CONNECTION_LOCK) { + connection.getTable(tName).getTableDescriptor(); + } } catch (IOException e) { throw new DBException(e); } @@ -193,11 +205,14 @@ public class HBaseClient10 extends com.yahoo.ycsb.DB { long en = System.nanoTime(); final String type = clientSideBuffering ? "UPDATE" : "CLEANUP"; measurements.measure(type, (int) ((en - st) / 1000)); - synchronized(threadCount) { - --threadCount; - if (threadCount <= 0 && connection != null) { - connection.close(); - connection = null; + threadCount.decrementAndGet(); + if (threadCount.get() <= 0) { + // Means we are done so ok to shut down the Connection. + synchronized (CONNECTION_LOCK) { + if (connection != null) { + connection.close(); + connection = null; + } } } } catch (IOException e) { @@ -207,14 +222,13 @@ public class HBaseClient10 extends com.yahoo.ycsb.DB { public void getHTable(String table) throws IOException { final TableName tName = TableName.valueOf(table); - this.currentTable = this.connection.getTable(tName); - // suggestions from - // http://ryantwopointoh.blogspot.com/2009/01/ - // performance-of-hbase-importing.html - if (clientSideBuffering) { - final BufferedMutatorParams p = new BufferedMutatorParams(tName); - p.writeBufferSize(writeBufferSize); - this.bufferedMutator = this.connection.getBufferedMutator(p); + synchronized (CONNECTION_LOCK) { + this.currentTable = connection.getTable(tName); + if (clientSideBuffering) { + final BufferedMutatorParams p = new BufferedMutatorParams(tName); + p.writeBufferSize(writeBufferSize); + this.bufferedMutator = connection.getBufferedMutator(p); + } } } From 710ef6d9cabfd8686e56ae6a6ea6f6b704a29a81 Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Sat, 9 Apr 2016 09:37:04 -0400 Subject: [PATCH 3/9] [elasticsearch] Fix race condition This commit fixes a race condition that exists between the shards being ready to serve requests and the execution of the workload. --- .../src/main/java/com/yahoo/ycsb/db/ElasticsearchClient.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elasticsearch/src/main/java/com/yahoo/ycsb/db/ElasticsearchClient.java b/elasticsearch/src/main/java/com/yahoo/ycsb/db/ElasticsearchClient.java index 9de4cc52..3c0762cd 100644 --- a/elasticsearch/src/main/java/com/yahoo/ycsb/db/ElasticsearchClient.java +++ b/elasticsearch/src/main/java/com/yahoo/ycsb/db/ElasticsearchClient.java @@ -149,8 +149,8 @@ public class ElasticsearchClient extends DB { .put("index.number_of_replicas", numberOfReplicas) .put("index.mapping._id.indexed", true) )).actionGet(); - client.admin().cluster().health(new ClusterHealthRequest().waitForGreenStatus()).actionGet(); } + client.admin().cluster().health(new ClusterHealthRequest().waitForGreenStatus()).actionGet(); } private int parseIntegerProperty(Properties properties, String key, int defaultValue) { From 71f00d1fe309c69ccdb388de65f0fe916d42155b Mon Sep 17 00:00:00 2001 From: Chris Larsen Date: Fri, 8 Apr 2016 12:13:55 -0700 Subject: [PATCH 4/9] [googlebigtable] Add a Google Bigtable binding for testing load using the native Protobuf GRPC based API. --- bin/ycsb | 1 + distribution/pom.xml | 5 + googlebigtable/README.md | 80 ++++ googlebigtable/pom.xml | 47 ++ .../yahoo/ycsb/db/GoogleBigtableClient.java | 443 ++++++++++++++++++ .../java/com/yahoo/ycsb/db/package-info.java | 22 + pom.xml | 2 + 7 files changed, 600 insertions(+) create mode 100644 googlebigtable/README.md create mode 100644 googlebigtable/pom.xml create mode 100644 googlebigtable/src/main/java/com/yahoo/ycsb/db/GoogleBigtableClient.java create mode 100644 googlebigtable/src/main/java/com/yahoo/ycsb/db/package-info.java diff --git a/bin/ycsb b/bin/ycsb index 16c5a82a..a5774c75 100755 --- a/bin/ycsb +++ b/bin/ycsb @@ -61,6 +61,7 @@ DATABASES = { "dynamodb" : "com.yahoo.ycsb.db.DynamoDBClient", "elasticsearch": "com.yahoo.ycsb.db.ElasticsearchClient", "geode" : "com.yahoo.ycsb.db.GeodeClient", + "googlebigtable" : "com.yahoo.ycsb.db.GoogleBigtableClient", "googledatastore" : "com.yahoo.ycsb.db.GoogleDatastoreClient", "hbase094" : "com.yahoo.ycsb.db.HBaseClient", "hbase098" : "com.yahoo.ycsb.db.HBaseClient", diff --git a/distribution/pom.xml b/distribution/pom.xml index e61e5b03..a9a84a02 100644 --- a/distribution/pom.xml +++ b/distribution/pom.xml @@ -79,6 +79,11 @@ LICENSE file. googledatastore-binding ${project.version} + + com.yahoo.ycsb + googlebigtable-binding + ${project.version} + com.yahoo.ycsb hbase094-binding diff --git a/googlebigtable/README.md b/googlebigtable/README.md new file mode 100644 index 00000000..3938b525 --- /dev/null +++ b/googlebigtable/README.md @@ -0,0 +1,80 @@ + + +# Google Bigtable Driver for YCSB + +This driver provides a YCSB workload binding for Google's hosted Bigtable, the inspiration for a number of key-value stores like HBase and Cassandra. The Bigtable Java client provides both Protobuf based GRPC and HBase client APIs. This binding implements the Protobuf API for testing the native client. To test Bigtable using the HBase API, see the `hbase10` binding. + +## Quickstart + +### 1. Setup a Bigtable Cluster + +Login to the Google Cloud Console and follow the [Creating Cluster](https://cloud.google.com/bigtable/docs/creating-cluster) steps. Make a note of your cluster name, zone and project ID. + +### 2. Launch the Bigtable Shell + +From the Cloud Console, launch a shell and follow the [Quickstart](https://cloud.google.com/bigtable/docs/quickstart) up to step 4 where you launch the HBase shell. + +### 3. Create a Table + +For best results, use the pre-splitting strategy recommended in [HBASE-4163](https://issues.apache.org/jira/browse/HBASE-4163): + +``` +hbase(main):001:0> n_splits = 200 # HBase recommends (10 * number of regionservers) +hbase(main):002:0> create 'usertable', 'cf', {SPLITS => (1..n_splits).map {|i| "user#{1000+i*(9999-1000)/n_splits}"}} +``` + +Make a note of the column family, in this example it's `cf``. + +### 4. Fetch the Proper ALPN Boot Jar + +The Bigtable protocol uses HTTP/2 which requires an ALPN protocol negotiation implementation. On JVM instantiation the implementation must be loaded before attempting to connect to the cluster. If you're using Java 7 or 8, use this [Jetty Version Table](http://www.eclipse.org/jetty/documentation/current/alpn-chapter.html#alpn-versions) to determine the version appropriate for your JVM. (ALPN is included in JDK 9+). Download the proper jar from [Maven](http://search.maven.org/#search%7Cgav%7C1%7Cg%3A%22org.mortbay.jetty.alpn%22%20AND%20a%3A%22alpn-boot%22) somewhere on your system. + +### 5. Download JSON Credentials + +Follow these instructions for [Generating a JSON key](https://cloud.google.com/bigtable/docs/installing-hbase-shell#service-account) and save it to your host. + +### 6. Load a Workload + +Switch to the root of the YCSB repo and choose the workload you want to run and `load` it first. With the CLI you must provide the column family, cluster properties and the ALPN jar to load. + +``` +bin/ycsb load googlebigtable -p columnfamily=cf -p google.bigtable.project.id= -p google.bigtable.cluster.name= -p google.bigtable.zone.name= -p google.bigtable.auth.service.account.enable=true -p google.bigtable.auth.json.keyfile= -jvm-args='-Xbootclasspath/p:' -P workloads/workloada + +``` + +Make sure to replace the variables in the angle brackets above with the proper value from your cluster. Additional configuration parameters are available below. + +The `load` step only executes inserts into the datastore. After loading data, run the same workload to mix reads with writes. + +``` +bin/ycsb run googlebigtable -p columnfamily=cf -p google.bigtable.project.id= -p google.bigtable.cluster.name= -p google.bigtable.zone.name= -p google.bigtable.auth.service.account.enable=true -p google.bigtable.auth.json.keyfile= -jvm-args='-Xbootclasspath/p:' -P workloads/workloada + +``` + +## Configuration Options + +The following options can be configured using CLI (using the `-p` parameter) or hbase-site.xml (add the HBase config directory to YCSB's class path via CLI). Check the [Cloud Bigtable Client](https://github.com/manolama/cloud-bigtable-client) project for additional tuning parameters. + +* `columnfamily`: (Required) The Bigtable column family to target. +* `google.bigtable.project.id`: (Required) The ID of a Bigtable project. +* `google.bigtable.cluster.name`: (Required) The name of a Bigtable cluster. +* `google.bigtable.zone.name`: (Required) Zone where the Bigtable cluster is running. +* `google.bigtable.auth.service.account.enable`: Whether or not to authenticate with a service account. The default is true. +* `google.bigtable.auth.json.keyfile`: (Required) A service account key for authentication. +* `debug`: If true, prints debug information to standard out. The default is false. +* `clientbuffering`: Whether or not to use client side buffering and batching of write operations. This can significantly improve performance and defaults to true. diff --git a/googlebigtable/pom.xml b/googlebigtable/pom.xml new file mode 100644 index 00000000..5e70aa44 --- /dev/null +++ b/googlebigtable/pom.xml @@ -0,0 +1,47 @@ + + + + + 4.0.0 + + com.yahoo.ycsb + binding-parent + 0.9.0-SNAPSHOT + ../binding-parent/ + + + googlebigtable-binding + Google Cloud Bigtable Binding + jar + + + + com.google.cloud.bigtable + bigtable-hbase-1.0 + ${googlebigtable.version} + + + + com.yahoo.ycsb + core + ${project.version} + provided + + + + \ No newline at end of file diff --git a/googlebigtable/src/main/java/com/yahoo/ycsb/db/GoogleBigtableClient.java b/googlebigtable/src/main/java/com/yahoo/ycsb/db/GoogleBigtableClient.java new file mode 100644 index 00000000..b405d1a2 --- /dev/null +++ b/googlebigtable/src/main/java/com/yahoo/ycsb/db/GoogleBigtableClient.java @@ -0,0 +1,443 @@ +/** + * Copyright (c) 2016 YCSB contributors. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package com.yahoo.ycsb.db; + +import java.io.IOException; +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map.Entry; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.util.Bytes; + +import java.util.Set; +import java.util.Vector; +import java.util.concurrent.ExecutionException; + +import com.google.bigtable.repackaged.com.google.protobuf.ByteString; +import com.google.bigtable.repackaged.com.google.protobuf.ServiceException; +import com.google.bigtable.v1.Column; +import com.google.bigtable.v1.Family; +import com.google.bigtable.v1.MutateRowRequest; +import com.google.bigtable.v1.Mutation; +import com.google.bigtable.v1.ReadRowsRequest; +import com.google.bigtable.v1.Row; +import com.google.bigtable.v1.RowFilter; +import com.google.bigtable.v1.RowRange; +import com.google.bigtable.v1.Mutation.DeleteFromRow; +import com.google.bigtable.v1.Mutation.SetCell; +import com.google.bigtable.v1.RowFilter.Chain.Builder; +import com.google.cloud.bigtable.config.BigtableOptions; +import com.google.cloud.bigtable.grpc.BigtableDataClient; +import com.google.cloud.bigtable.grpc.BigtableSession; +import com.google.cloud.bigtable.grpc.async.AsyncExecutor; +import com.google.cloud.bigtable.grpc.async.HeapSizeManager; +import com.google.cloud.bigtable.hbase.BigtableOptionsFactory; +import com.google.cloud.bigtable.util.ByteStringer; +import com.yahoo.ycsb.ByteArrayByteIterator; +import com.yahoo.ycsb.ByteIterator; +import com.yahoo.ycsb.DBException; +import com.yahoo.ycsb.Status; + +/** + * Google Bigtable Proto client for YCSB framework. + * + * Bigtable offers two APIs. These include a native Protobuf GRPC API as well as + * an HBase API wrapper for the GRPC API. This client implements the Protobuf + * API to test the underlying calls wrapped up in the HBase API. To use the + * HBase API, see the hbase10 client binding. + */ +public class GoogleBigtableClient extends com.yahoo.ycsb.DB { + public static final Charset UTF8_CHARSET = Charset.forName("UTF8"); + + /** Property names for the CLI. */ + private static final String ASYNC_MUTATOR_MAX_MEMORY = "mutatorMaxMemory"; + private static final String ASYNC_MAX_INFLIGHT_RPCS = "mutatorMaxInflightRPCs"; + private static final String CLIENT_SIDE_BUFFERING = "clientbuffering"; + + /** Must be an object for synchronization and tracking running thread counts. */ + private static Integer threadCount = 0; + + /** This will load the hbase-site.xml config file and/or store CLI options. */ + private static final Configuration CONFIG = HBaseConfiguration.create(); + + /** Print debug information to standard out. */ + private boolean debug = false; + + /** Global Bigtable native API objects. */ + private static BigtableOptions options; + private static BigtableSession session; + + /** Thread loacal Bigtable native API objects. */ + private BigtableDataClient client; + private HeapSizeManager heapSizeManager; + private AsyncExecutor asyncExecutor; + + /** The column family use for the workload. */ + private byte[] columnFamilyBytes; + + /** Cache for the last table name/ID to avoid byte conversions. */ + private String lastTable = ""; + private byte[] lastTableBytes; + + /** + * If true, buffer mutations on the client. For measuring insert/update/delete + * latencies, client side buffering should be disabled. + */ + private boolean clientSideBuffering = false; + + @Override + public void init() throws DBException { + Properties props = getProperties(); + + // Defaults the user can override if needed + CONFIG.set("google.bigtable.auth.service.account.enable", "true"); + + // make it easy on ourselves by copying all CLI properties into the config object. + final Iterator> it = props.entrySet().iterator(); + while (it.hasNext()) { + Entry entry = it.next(); + CONFIG.set((String)entry.getKey(), (String)entry.getValue()); + } + + clientSideBuffering = getProperties().getProperty(CLIENT_SIDE_BUFFERING, "false") + .equals("true") ? true : false; + + System.err.println("Running Google Bigtable with Proto API" + + (clientSideBuffering ? " and client side buffering." : ".")); + + synchronized (threadCount) { + ++threadCount; + if (session == null) { + try { + options = BigtableOptionsFactory.fromConfiguration(CONFIG); + session = new BigtableSession(options); + // important to instantiate the first client here, otherwise the + // other threads may receive an NPE from the options when they try + // to read the cluster name. + client = session.getDataClient(); + } catch (IOException e) { + throw new DBException("Error loading options from config: ", e); + } + } else { + client = session.getDataClient(); + } + + if (clientSideBuffering) { + heapSizeManager = new HeapSizeManager( + Long.parseLong( + getProperties().getProperty(ASYNC_MUTATOR_MAX_MEMORY, + Long.toString(AsyncExecutor.ASYNC_MUTATOR_MAX_MEMORY_DEFAULT))), + Integer.parseInt( + getProperties().getProperty(ASYNC_MAX_INFLIGHT_RPCS, + Integer.toString(AsyncExecutor.MAX_INFLIGHT_RPCS_DEFAULT)))); + asyncExecutor = new AsyncExecutor(client, heapSizeManager); + } + } + + if ((getProperties().getProperty("debug") != null) + && (getProperties().getProperty("debug").compareTo("true") == 0)) { + debug = true; + } + + final String columnFamily = getProperties().getProperty("columnfamily"); + if (columnFamily == null) { + System.err.println("Error, must specify a columnfamily for Bigtable table"); + throw new DBException("No columnfamily specified"); + } + columnFamilyBytes = Bytes.toBytes(columnFamily); + } + + @Override + public void cleanup() throws DBException { + if (asyncExecutor != null) { + try { + asyncExecutor.flush(); + } catch (IOException e) { + throw new DBException(e); + } + } + synchronized (threadCount) { + --threadCount; + if (threadCount <= 0) { + try { + session.close(); + } catch (IOException e) { + throw new DBException(e); + } + } + } + } + + @Override + public Status read(String table, String key, Set fields, + HashMap result) { + if (debug) { + System.out.println("Doing read from Bigtable columnfamily " + + new String(columnFamilyBytes)); + System.out.println("Doing read for key: " + key); + } + + setTable(table); + + RowFilter filter = RowFilter.newBuilder() + .setFamilyNameRegexFilterBytes(ByteStringer.wrap(columnFamilyBytes)) + .build(); + if (fields != null && fields.size() > 0) { + Builder filterChain = RowFilter.Chain.newBuilder(); + filterChain.addFilters(filter); + filterChain.addFilters(RowFilter.newBuilder() + .setCellsPerColumnLimitFilter(1) + .build()); + int count = 0; + // usually "field#" so pre-alloc + final StringBuilder regex = new StringBuilder(fields.size() * 6); + for (final String field : fields) { + if (count++ > 0) { + regex.append("|"); + } + regex.append(field); + } + filterChain.addFilters(RowFilter.newBuilder() + .setColumnQualifierRegexFilter( + ByteStringer.wrap(regex.toString().getBytes()))).build(); + filter = RowFilter.newBuilder().setChain(filterChain.build()).build(); + } + + final ReadRowsRequest.Builder rrr = ReadRowsRequest.newBuilder() + .setTableNameBytes(ByteStringer.wrap(lastTableBytes)) + .setFilter(filter) + .setRowKey(ByteStringer.wrap(key.getBytes())); + + List rows; + try { + rows = client.readRowsAsync(rrr.build()).get(); + if (rows == null || rows.isEmpty()) { + return Status.NOT_FOUND; + } + for (final Row row : rows) { + for (final Family family : row.getFamiliesList()) { + if (Arrays.equals(family.getNameBytes().toByteArray(), columnFamilyBytes)) { + for (final Column column : family.getColumnsList()) { + // we should only have a single cell per column + result.put(column.getQualifier().toString(UTF8_CHARSET), + new ByteArrayByteIterator(column.getCells(0).getValue().toByteArray())); + if (debug) { + System.out.println( + "Result for field: " + column.getQualifier().toString(UTF8_CHARSET) + + " is: " + column.getCells(0).getValue().toString(UTF8_CHARSET)); + } + } + } + } + } + + return Status.OK; + } catch (InterruptedException e) { + System.err.println("Interrupted during get: " + e); + Thread.currentThread().interrupt(); + return Status.ERROR; + } catch (ExecutionException e) { + System.err.println("Exception during get: " + e); + return Status.ERROR; + } + } + + @Override + public Status scan(String table, String startkey, int recordcount, + Set fields, Vector> result) { + setTable(table); + + RowFilter filter = RowFilter.newBuilder() + .setFamilyNameRegexFilterBytes(ByteStringer.wrap(columnFamilyBytes)) + .build(); + if (fields != null && fields.size() > 0) { + Builder filterChain = RowFilter.Chain.newBuilder(); + filterChain.addFilters(filter); + filterChain.addFilters(RowFilter.newBuilder() + .setCellsPerColumnLimitFilter(1) + .build()); + int count = 0; + // usually "field#" so pre-alloc + final StringBuilder regex = new StringBuilder(fields.size() * 6); + for (final String field : fields) { + if (count++ > 0) { + regex.append("|"); + } + regex.append(field); + } + filterChain.addFilters(RowFilter.newBuilder() + .setColumnQualifierRegexFilter( + ByteStringer.wrap(regex.toString().getBytes()))).build(); + filter = RowFilter.newBuilder().setChain(filterChain.build()).build(); + } + + final RowRange range = RowRange.newBuilder() + .setStartKey(ByteStringer.wrap(startkey.getBytes())) + .build(); + + final ReadRowsRequest.Builder rrr = ReadRowsRequest.newBuilder() + .setTableNameBytes(ByteStringer.wrap(lastTableBytes)) + .setFilter(filter) + .setRowRange(range); + + List rows; + try { + rows = client.readRowsAsync(rrr.build()).get(); + if (rows == null || rows.isEmpty()) { + return Status.NOT_FOUND; + } + int numResults = 0; + + for (final Row row : rows) { + final HashMap rowResult = + new HashMap(fields != null ? fields.size() : 10); + + for (final Family family : row.getFamiliesList()) { + if (Arrays.equals(family.getNameBytes().toByteArray(), columnFamilyBytes)) { + for (final Column column : family.getColumnsList()) { + // we should only have a single cell per column + rowResult.put(column.getQualifier().toString(UTF8_CHARSET), + new ByteArrayByteIterator(column.getCells(0).getValue().toByteArray())); + if (debug) { + System.out.println( + "Result for field: " + column.getQualifier().toString(UTF8_CHARSET) + + " is: " + column.getCells(0).getValue().toString(UTF8_CHARSET)); + } + } + } + } + + result.add(rowResult); + + numResults++; + if (numResults >= recordcount) {// if hit recordcount, bail out + break; + } + } + return Status.OK; + } catch (InterruptedException e) { + System.err.println("Interrupted during scan: " + e); + Thread.currentThread().interrupt(); + return Status.ERROR; + } catch (ExecutionException e) { + System.err.println("Exception during scan: " + e); + return Status.ERROR; + } + } + + @Override + public Status update(String table, String key, + HashMap values) { + if (debug) { + System.out.println("Setting up put for key: " + key); + } + + setTable(table); + + final MutateRowRequest.Builder rowMutation = MutateRowRequest.newBuilder(); + rowMutation.setRowKey(ByteString.copyFromUtf8(key)); + rowMutation.setTableNameBytes(ByteStringer.wrap(lastTableBytes)); + + for (final Entry entry : values.entrySet()) { + final Mutation.Builder mutationBuilder = rowMutation.addMutationsBuilder(); + final SetCell.Builder setCellBuilder = mutationBuilder.getSetCellBuilder(); + + setCellBuilder.setFamilyNameBytes(ByteStringer.wrap(columnFamilyBytes)); + setCellBuilder.setColumnQualifier(ByteStringer.wrap(entry.getKey().getBytes())); + setCellBuilder.setValue(ByteStringer.wrap(entry.getValue().toArray())); + setCellBuilder.setTimestampMicros(System.nanoTime() / 1000); + } + + try { + if (clientSideBuffering) { + asyncExecutor.mutateRowAsync(rowMutation.build()); + } else { + client.mutateRow(rowMutation.build()); + } + return Status.OK; + } catch (ServiceException e) { + System.err.println("Failed to insert key: " + key + " " + e.getMessage()); + return Status.ERROR; + } catch (InterruptedException e) { + System.err.println("Interrupted while inserting key: " + key + " " + + e.getMessage()); + Thread.currentThread().interrupt(); + return Status.ERROR; // never get here, but lets make the compiler happy + } + } + + @Override + public Status insert(String table, String key, + HashMap values) { + return update(table, key, values); + } + + @Override + public Status delete(String table, String key) { + if (debug) { + System.out.println("Doing delete for key: " + key); + } + + setTable(table); + + final MutateRowRequest.Builder rowMutation = MutateRowRequest.newBuilder() + .setRowKey(ByteString.copyFromUtf8(key)) + .setTableNameBytes(ByteStringer.wrap(lastTableBytes)); + rowMutation.addMutationsBuilder().setDeleteFromRow( + DeleteFromRow.getDefaultInstance()); + + try { + if (clientSideBuffering) { + asyncExecutor.mutateRowAsync(rowMutation.build()); + } else { + client.mutateRow(rowMutation.build()); + } + return Status.OK; + } catch (ServiceException e) { + System.err.println("Failed to delete key: " + key + " " + e.getMessage()); + return Status.ERROR; + } catch (InterruptedException e) { + System.err.println("Interrupted while delete key: " + key + " " + + e.getMessage()); + Thread.currentThread().interrupt(); + return Status.ERROR; // never get here, but lets make the compiler happy + } + } + + /** + * Little helper to set the table byte array. If it's different than the last + * table we reset the byte array. Otherwise we just use the existing array. + * @param table The table we're operating against + */ + private void setTable(final String table) { + if (!lastTable.equals(table)) { + lastTable = table; + lastTableBytes = options + .getClusterName() + .toTableName(table) + .toString() + .getBytes(); + } + } + +} \ No newline at end of file diff --git a/googlebigtable/src/main/java/com/yahoo/ycsb/db/package-info.java b/googlebigtable/src/main/java/com/yahoo/ycsb/db/package-info.java new file mode 100644 index 00000000..f0ab9e74 --- /dev/null +++ b/googlebigtable/src/main/java/com/yahoo/ycsb/db/package-info.java @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2016 YCSB contributors. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +/** + * The YCSB binding for Google's + * Bigtable. + */ +package com.yahoo.ycsb.db; diff --git a/pom.xml b/pom.xml index d1f662fb..f664aec3 100644 --- a/pom.xml +++ b/pom.xml @@ -76,6 +76,7 @@ LICENSE file. 1.0.3 3.0.0 1.0.0-incubating.M1 + 0.2.3 7.2.2.Final 0.6.0 2.1.1 @@ -109,6 +110,7 @@ LICENSE file. dynamodb elasticsearch geode + googlebigtable googledatastore hbase094 hbase098 From b2e302d0ba023f2f9f90ef35f73e2f7d5351f195 Mon Sep 17 00:00:00 2001 From: Chris Larsen Date: Fri, 8 Apr 2016 11:50:15 -0700 Subject: [PATCH 5/9] [hbase10] Add missing `durability` documentation to readme. Add documentation on how to use the hbase10 client with Google's Bigtable. --- hbase10/README.md | 99 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 3 deletions(-) diff --git a/hbase10/README.md b/hbase10/README.md index 1da5bc43..dd01249e 100644 --- a/hbase10/README.md +++ b/hbase10/README.md @@ -1,5 +1,5 @@ # HBase (1.0.x) Driver for YCSB -This driver is a binding for the YCSB facilities to operate against a HBase 1.0.x Server cluster. +This driver is a binding for the YCSB facilities to operate against a HBase 1.0.x Server cluster or Google's hosted Bigtable. To run against an HBase 0.94.x cluster, use the `hbase094` binding. To run against an HBase 0.98.x cluster, use the `hbase098` binding. -See `hbase098/README.md` for configuration details. +See `hbase098/README.md` for a quickstart to setup HBase for load testing and common configuration details. + +## Configuration Options +In addition to those options available for the `hbase098` binding, the following options are available for the `hbase10` binding: + +* `durability`: Whether or not writes should be appended to the WAL. Bypassing the WAL can improve throughput but data cannot be recovered in the event of a crash. The default is true. + +## Bigtable + +Google's Bigtable service provides an implementation of the HBase API for migrating existing applications. Users can perform load tests against Bigtable using this binding. + +### 1. Setup a Bigtable Cluster + +Login to the Google Cloud Console and follow the [Creating Cluster](https://cloud.google.com/bigtable/docs/creating-cluster) steps. Make a note of your cluster name, zone and project ID. + +### 2. Launch the Bigtable Shell + +From the Cloud Console, launch a shell and follow the [Quickstart](https://cloud.google.com/bigtable/docs/quickstart) up to step 4 where you launch the HBase shell. + +### 3. Create a Table + +For best results, use the pre-splitting strategy recommended in [HBASE-4163](https://issues.apache.org/jira/browse/HBASE-4163): + +``` +hbase(main):001:0> n_splits = 200 # HBase recommends (10 * number of regionservers) +hbase(main):002:0> create 'usertable', 'cf', {SPLITS => (1..n_splits).map {|i| "user#{1000+i*(9999-1000)/n_splits}"}} +``` + +Make a note of the column family, in this example it's `cf``. + +### 4. Fetch the Proper ALPN Boot Jar + +The Bigtable protocol uses HTTP/2 which requires an ALPN protocol negotiation implementation. On JVM instantiation the implementation must be loaded before attempting to connect to the cluster. If you're using Java 7 or 8, use this [Jetty Version Table](http://www.eclipse.org/jetty/documentation/current/alpn-chapter.html#alpn-versions) to determine the version appropriate for your JVM. (ALPN is included in JDK 9+). Download the proper jar from [Maven](http://search.maven.org/#search%7Cgav%7C1%7Cg%3A%22org.mortbay.jetty.alpn%22%20AND%20a%3A%22alpn-boot%22) somewhere on your system. + +### 5. Download the Bigtable Client Jar + +Download one of the `bigtable-hbase-1.#` jars from [Maven](http://search.maven.org/#search%7Cga%7C1%7Ccom.google.cloud.bigtable) to your host. + +### 6. Download JSON Credentials + +Follow these instructions for [Generating a JSON key](https://cloud.google.com/bigtable/docs/installing-hbase-shell#service-account) and save it to your host. + +### 7. Create or Edit hbase-site.xml + +If you have an existing HBase configuration directory with an `hbase-site.xml` file, edit the file as per below. If not, create a directory called `conf` under the `hbase10` directory. Create a file in the conf directory named `hbase-site.xml`. Provide the following settings in the XML file, making sure to replace the bracketed examples with the proper values from your Cloud console. + +``` + + + hbase.client.connection.impl + com.google.cloud.bigtable.hbase1_0.BigtableConnection + + + google.bigtable.cluster.name + [YOUR-CLUSTER-ID] + + + google.bigtable.project.id + [YOUR-PROJECT-ID] + + + google.bigtable.zone.name + [YOUR-ZONE-NAME] + + + google.bigtable.auth.service.account.enable + true + + + google.bigtable.auth.json.keyfile + [PATH-TO-YOUR-KEY-FILE] + + +``` + +If you wish to try other API implementations (1.1.x or 1.2.x) change the `hbase.client.connection.impl` appropriately to match the JAR you downloaded. + +If you have an existing HBase config directory, make sure to add it to the class path via `-cp :`. + +### 8. Execute a Workload + +Switch to the root of the YCSB repo and choose the workload you want to run and `load` it first. With the CLI you must provide the column family, cluster properties and the ALPN jar to load. + +``` +bin/ycsb load hbase10 -p columnfamily=cf -cp -jvm-args='-Xbootclasspath/p:' -P workloads/workloada + +``` + +The `load` step only executes inserts into the datastore. After loading data, run the same workload to mix reads with writes. + +``` +bin/ycsb run hbase10 -p columnfamily=cf -jvm-args='-Xbootclasspath/p:' -P workloads/workloada + +``` \ No newline at end of file From 1479818c81f21db5e86505fd2b80783467d8c952 Mon Sep 17 00:00:00 2001 From: Chris Larsen Date: Fri, 8 Apr 2016 11:48:16 -0700 Subject: [PATCH 6/9] [hbase098] Add missing `writebuffersize` documentation and note to modify hbase-site.xml --- hbase098/README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hbase098/README.md b/hbase098/README.md index e6a7fb41..83c3c7a0 100644 --- a/hbase098/README.md +++ b/hbase098/README.md @@ -1,5 +1,5 @@