gh-88 YCSB client for Amazon DynamoDB

This commit is contained in:
Janani Narayanan 2012-08-02 19:09:30 -07:00 коммит произвёл Michi Mutsuzaki
Родитель be8d7f1337
Коммит 5813dbcaaf
9 изменённых файлов: 459 добавлений и 8 удалений

8
.gitignore поставляемый
Просмотреть файл

@ -1,8 +0,0 @@
.classpath
.project
.settings
db/*/lib/*.jar
db/*/lib/*.zip
*.iml
.idea

Просмотреть файл

@ -28,6 +28,7 @@ DATABASES = {
"cassandra-7" : "com.yahoo.ycsb.db.CassandraClient7",
"cassandra-8" : "com.yahoo.ycsb.db.CassandraClient8",
"cassandra-10" : "com.yahoo.ycsb.db.CassandraClient10",
"dynamodb" : "com.yahoo.ycsb.db.DynamoDBClient",
"gemfire" : "com.yahoo.ycsb.db.GemFireClient",
"hbase" : "com.yahoo.ycsb.db.HBaseClient",
"hypertable" : "com.yahoo.ycsb.db.HypertableClient",

51
dynamodb/README Normal file
Просмотреть файл

@ -0,0 +1,51 @@
CONFIGURE
YCSB_HOME - YCSB home directory
DYNAMODB_HOME - Amazon DynamoDB package files
BENCHMARK
$YCSB_HOME/bin/ycsb load dynamodb -P worklaods/workloada -P dynamodb.properties
$YCSB_HOME/bin/ycsb run dynamodb -P worklaods/workloada -P dynamodb.properties
PROPERTIES
$DYNAMODB_HOME/conf/dynamodb.properties
$DYNAMODB_HOME/conf/AWSCredentials.properties
FAQs
* Why is the recommended workload distribution set to 'uniform'?
This is to conform with the best practices for using DynamoDB - uniform,
evenly distributed workload is the recommended pattern for scaling and
getting predictable performance out of DynamoDB
For more information refer to
http://docs.amazonwebservices.com/amazondynamodb/latest/developerguide/BestPractices.html
* How does workload size affect provisioned throughput?
The default payload size requires double the provisioned throughput to execute
the workload. This translates to double the provisioned throughput cost for testing.
The default item size in YCSB are 1000 bytes plus metadata overhead, which makes the
item exceed 1024 bytes. DynamoDB charges one capacity unit per 1024 bytes for read
or writes. An item that is greater than 1024 bytes but less than or equal to 2048 bytes
would cost 2 capacity units. With the change in payload size, each request would cost
1 capacity unit as opposed to 2, saving the cost of running the benchmark.
For more information refer to
http://docs.amazonwebservices.com/amazondynamodb/latest/developerguide/WorkingWithDDTables.html
* How do you know if DynamoDB throttling is affecting benchmarking?
Monitor CloudWatch for ThrottledRequests and if ThrottledRequests is greater
than zero, either increase the DynamoDB table provisioned throughput or reduce
YCSB throughput by reducing YCSB target throughput, adjusting the number of YCSB
client threads, or combination of both.
For more information please refer to
https://github.com/brianfrankcooper/YCSB/blob/master/doc/tipsfaq.html
When requests are throttled, latency measurements by YCSB can increase.
Please refer to http://aws.amazon.com/dynamodb/faqs/ for more information.
Please refer to Amazon DynamoDB docs here:
http://aws.amazon.com/documentation/dynamodb/

Просмотреть файл

@ -0,0 +1,4 @@
# Fill in your AWS Access Key ID and Secret Access Key
# http://aws.amazon.com/security-credentials
#accessKey =
#secretKey =

Просмотреть файл

@ -0,0 +1,37 @@
#
# Sample property file for Amazon DynamoDB database client
## Mandatory parameters
# AWS credentials associated with your aws account.
#dynamodb.awsCredentialsFile = <path to AWSCredentials.properties>
# Primarykey of table 'usertable'
#dynamodb.primaryKey = <firstname>
## Optional parameters
# Endpoint to connect to.For best latency, it is recommended
# to choose the endpoint which is closer to the client.
# Default is us-east-1
#dynamodb.endpoint = http://dynamodb.us-east-1.amazonaws.com
# Strongly recommended to set to uniform.Refer FAQs in README
#requestdistribution = uniform
# Enable/disable debug messages.Defaults to false
# "true" or "false"
#dynamodb.debug = false
# Maximum number of concurrent connections
#dynamodb.connectMax = 50
# Read consistency.Consistent reads are expensive and consume twice
# as many resources as eventually consistent reads. Defaults to false.
# "true" or "false"
#dynamodb.consistentReads = false
# Workload size has implications on provisioned read and write
# capacity units.Refer FAQs in README
#fieldcount = 10
#fieldlength = 90

55
dynamodb/pom.xml Normal file
Просмотреть файл

@ -0,0 +1,55 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.yahoo.ycsb</groupId>
<artifactId>root</artifactId>
<version>0.1.4</version>
</parent>
<artifactId>dynamodb-binding</artifactId>
<name>DynamoDB DB Binding</name>
<dependencies>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk</artifactId>
<version>1.3.14</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>com.yahoo.ycsb</groupId>
<artifactId>core</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>${maven.assembly.version}</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<appendAssemblyId>false</appendAssemblyId>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

Просмотреть файл

@ -0,0 +1,300 @@
/*
* Copyright 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package com.yahoo.ycsb.db;
import java.io.FileInputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.Vector;
import java.io.File;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.PropertiesCredentials;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.services.dynamodb.AmazonDynamoDBClient;
import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.services.dynamodb.model.AttributeValue;
import com.amazonaws.services.dynamodb.model.AttributeValueUpdate;
import com.amazonaws.services.dynamodb.model.DeleteItemRequest;
import com.amazonaws.services.dynamodb.model.DeleteItemResult;
import com.amazonaws.services.dynamodb.model.GetItemRequest;
import com.amazonaws.services.dynamodb.model.GetItemResult;
import com.amazonaws.services.dynamodb.model.Key;
import com.amazonaws.services.dynamodb.model.PutItemRequest;
import com.amazonaws.services.dynamodb.model.PutItemResult;
import com.amazonaws.services.dynamodb.model.ScanRequest;
import com.amazonaws.services.dynamodb.model.ScanResult;
import com.amazonaws.services.dynamodb.model.UpdateItemRequest;
import com.yahoo.ycsb.ByteIterator;
import com.yahoo.ycsb.DB;
import com.yahoo.ycsb.DBException;
import com.yahoo.ycsb.StringByteIterator;
/**
* DynamoDB v1.3.14 client for YCSB
*/
public class DynamoDBClient extends DB {
private static final int OK = 0;
private static final int SERVER_ERROR = 1;
private static final int CLIENT_ERROR = 2;
private AmazonDynamoDBClient dynamoDB;
private String primaryKeyName;
private boolean debug = false;
private boolean consistentRead = false;
private String endpoint = "http://dynamodb.us-east-1.amazonaws.com";
private int maxConnects = 50;
private static Logger logger = Logger.getLogger(DynamoDBClient.class);
public DynamoDBClient() {}
/**
* Initialize any state for this DB. Called once per DB instance; there is
* one DB instance per client thread.
*/
public void init() throws DBException {
// initialize DynamoDb driver & table.
String debug = getProperties().getProperty("dynamodb.debug",null);
if (null != debug && "true".equalsIgnoreCase(debug)) {
logger.setLevel(Level.DEBUG);
}
String endpoint = getProperties().getProperty("dynamodb.endpoint",null);
String credentialsFile = getProperties().getProperty("dynamodb.awsCredentialsFile",null);
String primaryKey = getProperties().getProperty("dynamodb.primaryKey",null);
String consistentReads = getProperties().getProperty("dynamodb.consistentReads",null);
String connectMax = getProperties().getProperty("dynamodb.connectMax",null);
if (null != connectMax) {
this.maxConnects = Integer.parseInt(connectMax);
}
if (null != consistentReads && "true".equalsIgnoreCase(consistentReads)) {
this.consistentRead = true;
}
if (null != endpoint) {
this.endpoint = endpoint;
}
if (null == primaryKey || primaryKey.length() < 1) {
String errMsg = "Missing primary key attribute name, cannot continue";
logger.error(errMsg);
}
try {
AWSCredentials credentials = new PropertiesCredentials(new File(credentialsFile));
ClientConfiguration cconfig = new ClientConfiguration();
cconfig.setMaxConnections(maxConnects);
dynamoDB = new AmazonDynamoDBClient(credentials,cconfig);
dynamoDB.setEndpoint(this.endpoint);
primaryKeyName = primaryKey;
logger.info("dynamodb connection created with " + this.endpoint);
} catch (Exception e1) {
String errMsg = "DynamoDBClient.init(): Could not initialize DynamoDB client: " + e1.getMessage();
logger.error(errMsg);
}
}
@Override
public int read(String table, String key, Set<String> fields,
HashMap<String, ByteIterator> result) {
logger.debug("readkey: " + key + " from table: " + table);
GetItemRequest req = new GetItemRequest(table, createPrimaryKey(key));
req.setAttributesToGet(fields);
req.setConsistentRead(consistentRead);
GetItemResult res = null;
try {
res = dynamoDB.getItem(req);
}catch (AmazonServiceException ex) {
logger.error(ex.getMessage());
return SERVER_ERROR;
}catch (AmazonClientException ex){
logger.error(ex.getMessage());
return CLIENT_ERROR;
}
if (null != res.getItem())
{
result.putAll(extractResult(res.getItem()));
logger.debug("Result: " + res.toString());
}
return OK;
}
@Override
public int scan(String table, String startkey, int recordcount,
Set<String> fields, Vector<HashMap<String, ByteIterator>> result) {
logger.debug("scan " + recordcount + " records from key: " + startkey + " on table: " + table);
/*
* on DynamoDB's scan, startkey is *exclusive* so we need to
* getItem(startKey) and then use scan for the res
*/
GetItemRequest greq = new GetItemRequest(table, createPrimaryKey(startkey));
greq.setAttributesToGet(fields);
GetItemResult gres = null;
try {
gres = dynamoDB.getItem(greq);
}catch (AmazonServiceException ex) {
logger.error(ex.getMessage());
return SERVER_ERROR;
}catch (AmazonClientException ex){
logger.error(ex.getMessage());
return CLIENT_ERROR;
}
if (null != gres.getItem()) {
result.add(extractResult(gres.getItem()));
}
int count = 1; // startKey is done, rest to go.
Key startKey = createPrimaryKey(startkey);
ScanRequest req = new ScanRequest(table);
req.setAttributesToGet(fields);
while (count < recordcount) {
req.setExclusiveStartKey(startKey);
req.setLimit(recordcount - count);
ScanResult res = null;
try {
res = dynamoDB.scan(req);
}catch (AmazonServiceException ex) {
logger.error(ex.getMessage());
ex.printStackTrace();
return SERVER_ERROR;
}catch (AmazonClientException ex){
logger.error(ex.getMessage());
ex.printStackTrace();
return CLIENT_ERROR;
}
count += res.getCount();
for (Map<String, AttributeValue> items : res.getItems()) {
result.add(extractResult(items));
}
startKey = res.getLastEvaluatedKey();
}
return OK;
}
@Override
public int update(String table, String key, HashMap<String, ByteIterator> values) {
logger.debug("updatekey: " + key + " from table: " + table);
Map<String, AttributeValueUpdate> attributes = new HashMap<String, AttributeValueUpdate>(
values.size());
for (Entry<String, ByteIterator> val : values.entrySet()) {
AttributeValue v = new AttributeValue(val.getValue().toString());
attributes.put(val.getKey(), new AttributeValueUpdate()
.withValue(v).withAction("PUT"));
}
UpdateItemRequest req = new UpdateItemRequest(table, createPrimaryKey(key), attributes);
try {
dynamoDB.updateItem(req);
}catch (AmazonServiceException ex) {
logger.error(ex.getMessage());
return SERVER_ERROR;
}catch (AmazonClientException ex){
logger.error(ex.getMessage());
return CLIENT_ERROR;
}
return OK;
}
@Override
public int insert(String table, String key,HashMap<String, ByteIterator> values) {
logger.debug("insertkey: " + primaryKeyName + "-" + key + " from table: " + table);
Map<String, AttributeValue> attributes = createAttributes(values);
// adding primary key
attributes.put(primaryKeyName, new AttributeValue(key));
PutItemRequest putItemRequest = new PutItemRequest(table, attributes);
PutItemResult res = null;
try {
res = dynamoDB.putItem(putItemRequest);
}catch (AmazonServiceException ex) {
logger.error(ex.getMessage());
return SERVER_ERROR;
}catch (AmazonClientException ex){
logger.error(ex.getMessage());
return CLIENT_ERROR;
}
return res.getConsumedCapacityUnits().intValue();
}
@Override
public int delete(String table, String key) {
logger.debug("deletekey: " + key + " from table: " + table);
DeleteItemRequest req = new DeleteItemRequest(table, createPrimaryKey(key));
DeleteItemResult res = null;
try {
res = dynamoDB.deleteItem(req);
}catch (AmazonServiceException ex) {
logger.error(ex.getMessage());
return SERVER_ERROR;
}catch (AmazonClientException ex){
logger.error(ex.getMessage());
return CLIENT_ERROR;
}
return res.getConsumedCapacityUnits().intValue();
}
private static Map<String, AttributeValue> createAttributes(
HashMap<String, ByteIterator> values) {
Map<String, AttributeValue> attributes = new HashMap<String, AttributeValue>(
values.size() + 1); //leave space for the PrimaryKey
for (Entry<String, ByteIterator> val : values.entrySet()) {
attributes.put(val.getKey(), new AttributeValue(val.getValue()
.toString()));
}
return attributes;
}
private HashMap<String, ByteIterator> extractResult(Map<String, AttributeValue> item) {
if(null == item)
return null;
HashMap<String, ByteIterator> rItems = new HashMap<String, ByteIterator>(item.size());
for (Entry<String, AttributeValue> attr : item.entrySet()) {
logger.debug(String.format("Result- key: %s, value: %s", attr.getKey(), attr.getValue()) );
rItems.put(attr.getKey(), new StringByteIterator(attr.getValue().getS()));
}
return rItems;
}
private static Key createPrimaryKey(String key) {
Key k = new Key().withHashKeyElement(new AttributeValue().withS(key));
return k;
}
}

Просмотреть файл

@ -0,0 +1,10 @@
#define the console appender
log4j.appender.consoleAppender = org.apache.log4j.ConsoleAppender
# now define the layout for the appender
log4j.appender.consoleAppender.layout = org.apache.log4j.PatternLayout
log4j.appender.consoleAppender.layout.ConversionPattern=%-4r [%t] %-5p %c %x -%m%n
# now map our console appender as a root logger, means all log messages will go
# to this appender
log4j.rootLogger = INFO, consoleAppender

Просмотреть файл

@ -64,6 +64,7 @@
<module>hbase</module>
<module>hypertable</module>
<module>cassandra</module>
<module>dynamodb</module>
<!--<module>gemfire</module>-->
<module>infinispan</module>
<module>jdbc</module>