Hypertable plugin in YCSB framework

This commit is contained in:
nixon 2012-07-09 13:38:55 -07:00
Родитель 5b19cde032
Коммит a438f8f719
5 изменённых файлов: 488 добавлений и 0 удалений

Просмотреть файл

@ -30,6 +30,7 @@ DATABASES = {
"cassandra-10" : "com.yahoo.ycsb.db.CassandraClient10",
"gemfire" : "com.yahoo.ycsb.db.GemFireClient",
"hbase" : "com.yahoo.ycsb.db.HBaseClient",
"hypertable" : "com.yahoo.ycsb.db.HypertableClient",
"infinispan" : "com.yahoo.ycsb.db.InfinispanClient",
"jdbc" : "com.yahoo.ycsb.db.JdbcDBClient",
"mapkeeper" : "com.yahoo.ycsb.db.MapKeeperClient",

84
hypertable/README Normal file
Просмотреть файл

@ -0,0 +1,84 @@
1 Install Hypertable
Installation instructions for Hypertable can be found at:
code.google.com/p/hypertable/wiki/HypertableManual
2 Set Up YCSB
Clone the YCSB git repository and compile:
]$ git clone git://github.com/brianfrankcooper/YCSB.git
]$ cd YCSB
]$ mvn clean package
3 Run Hypertable
Once it has been installed, start Hypertable by running
]$ ./bin/ht start all-servers hadoop
if an instance of HDFS is running or
]$ ./bin/ht start all-servers local
if the database is backed by the local file system. YCSB accesses
a table called 'usertable' by default. Create this table through the
Hypertable shell by running
]$ ./bin/ht shell
hypertable> use '/ycsb';
hypertable> create table usertable(family);
hypertable> quit
All iteractions by YCSB take place under the Hypertable namespace '/ycsb'.
Hypertable also uses an additional data grouping structure called a column
family that must be set. YCSB doesn't offer fine grained operations on
column families so in this example the table is created with a single
column family named 'family' to which all column families will belong.
The name of this column family must be passed to YCSB. The table can be
manipulated from within the hypertable shell without interfering with the
operation of YCSB.
4 Run YCSB
Make sure that an instance of Hypertable is running. To access the database
through the YCSB shell, from the YCSB directory run:
]$ ./bin/ycsb shell hypertable -p columnfamily=family
where the value passed to columnfamily matches that used in the table
creation. To run a workload, first load the data:
]$ ./bin/ycsb load hypertable -P workloads/workloada -p columnfamily=family
Then run the workload:
]$ ./bin/ycsb run hypertable -P workloads/workloada -p columnfamily=family
This example runs the core workload 'workloada' that comes packaged with YCSB.
The state of the YCSB data in the Hypertable database can be reset by dropping
usertable and recreating it.
+ Configuration Parameters
Hypertable configuration settings can be found in conf/hypertable.cfg under
your main hypertable directory. Make sure that the constant THRIFTBROKER_PORT
in the class HypertableClient matches the setting ThriftBroker.Port in
hypertable.cfg.
To change the amount of data returned on each call to the ThriftClient on
a Hypertable scan, one must add a new parameter to hypertable.cfg. Include
ThriftBroker.NextThreshold=x where x is set to the size desired in bytes.
The default setting of this parameter is 128000.
To alter the Hypertable namespace YCSB operates under, change the constant
NAMESPACE in the class HypertableClient.

55
hypertable/pom.xml Normal file
Просмотреть файл

@ -0,0 +1,55 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.yahoo.ycsb</groupId>
<artifactId>root</artifactId>
<version>0.1.4</version>
</parent>
<artifactId>hypertable-binding</artifactId>
<name>Hypertable DB Binding</name>
<dependencies>
<dependency>
<groupId>com.yahoo.ycsb</groupId>
<artifactId>core</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
<version>${thrift.version}</version>
</dependency>
<dependency>
<groupId>org.hypertable</groupId>
<artifactId>hypertable</artifactId>
<version>${hypertable.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>${maven.assembly.version}</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<appendAssemblyId>false</appendAssemblyId>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

Просмотреть файл

@ -0,0 +1,345 @@
/**
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You
* may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License. See accompanying
* LICENSE file.
*/
package com.yahoo.ycsb.db;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import org.apache.thrift.TException;
import org.hypertable.thrift.SerializedCellsFlag;
import org.hypertable.thrift.SerializedCellsWriter;
import org.hypertable.thrift.ThriftClient;
import org.hypertable.thriftgen.Cell;
import org.hypertable.thriftgen.ClientException;
import org.hypertable.thriftgen.Key;
import org.hypertable.thriftgen.KeyFlag;
import org.hypertable.thriftgen.RowInterval;
import org.hypertable.thriftgen.ScanSpec;
import org.hypertable.thrift.SerializedCellsReader;
import com.yahoo.ycsb.ByteArrayByteIterator;
import com.yahoo.ycsb.ByteIterator;
import com.yahoo.ycsb.DBException;
/**
* Hypertable client for YCSB framework
*/
public class HypertableClient extends com.yahoo.ycsb.DB
{
private boolean _debug = false;
private ThriftClient connection;
private long ns;
private String _columnFamily = "";
public static final int OK = 0;
public static final int SERVERERROR = -1;
public static final String NAMESPACE = "/ycsb";
public static final int THRIFTBROKER_PORT = 38080;
//TODO: make dynamic
public static final int BUFFER_SIZE = 4096;
/**
* Initialize any state for this DB.
* Called once per DB instance; there is one DB instance per client thread.
*/
@Override
public void init() throws DBException
{
if ( (getProperties().getProperty("debug") != null) &&
(getProperties().getProperty("debug").equals("true")) )
{
_debug = true;
}
try {
connection = ThriftClient.create("localhost", THRIFTBROKER_PORT);
if (!connection.namespace_exists(NAMESPACE)) {
connection.namespace_create(NAMESPACE);
}
ns = connection.open_namespace(NAMESPACE);
} catch (ClientException e) {
throw new DBException("Could not open namespace", e);
} catch (TException e) {
throw new DBException("Could not open namespace", e);
}
_columnFamily = getProperties().getProperty("columnfamily");
if (_columnFamily == null)
{
System.err.println("Error, must specify a " +
"columnfamily for Hypertable table");
throw new DBException("No columnfamily specified");
}
}
/**
* Cleanup any state for this DB.
* Called once per DB instance; there is one DB instance per client thread.
*/
@Override
public void cleanup() throws DBException
{
try {
connection.namespace_close(ns);
} catch (ClientException e) {
throw new DBException("Could not close namespace", e);
} catch (TException e) {
throw new DBException("Could not close namespace", e);
}
}
/**
* Read a record from the database. Each field/value pair from the result
* will be stored in a HashMap.
*
* @param table The name of the table
* @param key The record key of the record to read.
* @param fields The list of fields to read, or null for all of them
* @param result A HashMap of field/value pairs for the result
* @return Zero on success, a non-zero error code on error
*/
@Override
public int read(String table, String key, Set<String> fields,
HashMap<String, ByteIterator> result)
{
//SELECT _column_family:field[i]
// FROM table WHERE ROW=key MAX_VERSIONS 1;
if (_debug) {
System.out.println("Doing read from Hypertable columnfamily " +
_columnFamily);
System.out.println("Doing read for key: " + key);
}
try {
if (null != fields) {
Vector<HashMap<String, ByteIterator>> resMap =
new Vector<HashMap<String, ByteIterator>>();
if (0 != scan(table, key, 1, fields, resMap)) {
return SERVERERROR;
}
if (!resMap.isEmpty())
result.putAll(resMap.firstElement());
} else {
SerializedCellsReader reader = new SerializedCellsReader(null);
reader.reset(connection.get_row_serialized(ns, table, key));
while (reader.next()) {
result.put(new String(reader.get_column_qualifier()),
new ByteArrayByteIterator(reader.get_value()));
}
}
} catch (ClientException e) {
if (_debug) {
System.err.println("Error doing read: " + e.message);
}
return SERVERERROR;
} catch (TException e) {
if (_debug)
System.err.println("Error doing read");
return SERVERERROR;
}
return OK;
}
/**
* Perform a range scan for a set of records in the database. Each
* field/value pair from the result will be stored in a HashMap.
*
* @param table The name of the table
* @param startkey The record key of the first record to read.
* @param recordcount The number of records to read
* @param fields The list of fields to read, or null for all of them
* @param result A Vector of HashMaps, where each HashMap is a set
* field/value pairs for one record
* @return Zero on success, a non-zero error code on error
*/
@Override
public int scan(String table, String startkey, int recordcount,
Set<String> fields,
Vector<HashMap<String, ByteIterator>> result)
{
//SELECT _columnFamily:fields FROM table WHERE (ROW >= startkey)
// LIMIT recordcount MAX_VERSIONS 1;
ScanSpec spec = new ScanSpec();
RowInterval elem = new RowInterval();
elem.setStart_inclusive(true);
elem.setStart_row(startkey);
spec.addToRow_intervals(elem);
if (null != fields) {
for (String field : fields) {
spec.addToColumns(_columnFamily + ":" + field);
}
}
spec.setVersions(1);
spec.setRow_limit(recordcount);
SerializedCellsReader reader = new SerializedCellsReader(null);
try {
long sc = connection.scanner_open(ns, table, spec);
String lastRow = null;
boolean eos = false;
while (!eos) {
reader.reset(connection.scanner_get_cells_serialized(sc));
while (reader.next()) {
String currentRow = new String(reader.get_row());
if (!currentRow.equals(lastRow)) {
result.add(new HashMap<String, ByteIterator>());
lastRow = currentRow;
}
result.lastElement().put(
new String(reader.get_column_qualifier()),
new ByteArrayByteIterator(reader.get_value()));
}
eos = reader.eos();
if (_debug) {
System.out.println("Number of rows retrieved so far: " +
result.size());
}
}
connection.scanner_close(sc);
} catch (ClientException e) {
if (_debug) {
System.err.println("Error doing scan: " + e.message);
}
return SERVERERROR;
} catch (TException e) {
if (_debug)
System.err.println("Error doing scan");
return SERVERERROR;
}
return OK;
}
/**
* Update a record in the database. Any field/value pairs in the specified
* values HashMap will be written into the record with the specified
* record key, overwriting any existing values with the same field name.
*
* @param table The name of the table
* @param key The record key of the record to write
* @param values A HashMap of field/value pairs to update in the record
* @return Zero on success, a non-zero error code on error
*/
@Override
public int update(String table, String key,
HashMap<String, ByteIterator> values)
{
return insert(table, key, values);
}
/**
* Insert a record in the database. Any field/value pairs in the specified
* values HashMap will be written into the record with the specified
* record key.
*
* @param table The name of the table
* @param key The record key of the record to insert.
* @param values A HashMap of field/value pairs to insert in the record
* @return Zero on success, a non-zero error code on error
*/
@Override
public int insert(String table, String key,
HashMap<String, ByteIterator> values)
{
//INSERT INTO table VALUES
// (key, _column_family:entry,getKey(), entry.getValue()), (...);
if (_debug) {
System.out.println("Setting up put for key: " + key);
}
try {
long mutator = connection.mutator_open(ns, table, 0, 0);
SerializedCellsWriter writer =
new SerializedCellsWriter(BUFFER_SIZE*values.size(), true);
for (Map.Entry<String, ByteIterator> entry : values.entrySet()) {
writer.add(key, _columnFamily, entry.getKey(),
SerializedCellsFlag.AUTO_ASSIGN,
ByteBuffer.wrap(entry.getValue().toArray()));
}
connection.mutator_set_cells_serialized(mutator,
writer.buffer(), true);
connection.mutator_close(mutator);
} catch (ClientException e) {
if (_debug) {
System.err.println("Error doing set: " + e.message);
}
return SERVERERROR;
} catch (TException e) {
if (_debug)
System.err.println("Error doing set");
return SERVERERROR;
}
return OK;
}
/**
* Delete a record from the database.
*
* @param table The name of the table
* @param key The record key of the record to delete.
* @return Zero on success, a non-zero error code on error
*/
@Override
public int delete(String table, String key)
{
//DELETE * FROM table WHERE ROW=key;
if (_debug) {
System.out.println("Doing delete for key: "+key);
}
Cell entry = new Cell();
entry.key = new Key();
entry.key.row = key;
entry.key.flag = KeyFlag.DELETE_ROW;
try {
connection.set_cell(ns, table, entry);
} catch (ClientException e) {
if (_debug) {
System.err.println("Error doing delete: " + e.message);
}
return SERVERERROR;
} catch (TException e) {
if (_debug)
System.err.println("Error doing delete");
return SERVERERROR;
}
return OK;
}
}

Просмотреть файл

@ -53,12 +53,15 @@
<redis.version>2.0.0</redis.version>
<voldemort.version>0.81</voldemort.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<thrift.version>0.8.0</thrift.version>
<hypertable.version>0.9.5.6</hypertable.version>
</properties>
<modules>
<!--module>build-tools</module-->
<module>core</module>
<module>hbase</module>
<module>hypertable</module>
<module>cassandra</module>
<module>gemfire</module>
<module>infinispan</module>