From 989b930d47644b13bd702fdc0ae41fcf1c08a667 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Fri, 27 Nov 2020 00:06:16 -0800 Subject: [PATCH] [seaweedfs] add seaweedfs (#1443) --- bin/ycsb | 1 + distribution/pom.xml | 5 + pom.xml | 2 + seaweedfs/README.md | 59 ++++ seaweedfs/pom.xml | 54 +++ .../site/ycsb/db/seaweed/SeaweedClient.java | 320 ++++++++++++++++++ .../site/ycsb/db/seaweed/package-info.java | 21 ++ 7 files changed, 462 insertions(+) create mode 100644 seaweedfs/README.md create mode 100644 seaweedfs/pom.xml create mode 100644 seaweedfs/src/main/java/site/ycsb/db/seaweed/SeaweedClient.java create mode 100644 seaweedfs/src/main/java/site/ycsb/db/seaweed/package-info.java diff --git a/bin/ycsb b/bin/ycsb index c0761c5b..1c24a413 100755 --- a/bin/ycsb +++ b/bin/ycsb @@ -98,6 +98,7 @@ DATABASES = { "riak" : "site.ycsb.db.riak.RiakKVClient", "rocksdb" : "site.ycsb.db.rocksdb.RocksDBClient", "s3" : "site.ycsb.db.S3Client", + "seaweedfs" : "site.ycsb.db.seaweed.SeaweedClient", "solr" : "site.ycsb.db.solr.SolrClient", "solr6" : "site.ycsb.db.solr6.SolrClient", "solr7" : "site.ycsb.db.solr7.SolrClient", diff --git a/distribution/pom.xml b/distribution/pom.xml index 0b9e87aa..c3ad21e9 100644 --- a/distribution/pom.xml +++ b/distribution/pom.xml @@ -234,6 +234,11 @@ LICENSE file. s3-binding ${project.version} + + site.ycsb + seaweedfs-binding + ${project.version} + site.ycsb solr-binding diff --git a/pom.xml b/pom.xml index 6236ab11..d5691317 100644 --- a/pom.xml +++ b/pom.xml @@ -143,6 +143,7 @@ LICENSE file. 2.0.5 6.2.2 1.10.20 + 1.4.1 5.5.3 6.4.1 7.7.2 @@ -198,6 +199,7 @@ LICENSE file. riak rocksdb s3 + seaweedfs solr solr6 solr7 diff --git a/seaweedfs/README.md b/seaweedfs/README.md new file mode 100644 index 00000000..1e9f3fa4 --- /dev/null +++ b/seaweedfs/README.md @@ -0,0 +1,59 @@ + +Quick Start +=============== +[SeaweedFS](https://github.com/chrislusf/seaweedfs) is a distributed file system with optimization for small files. +It can also be used as a key-value store for large values. + +### 1. Set Up YCSB + +Download the YCSB from this website: + + https://github.com/brianfrankcooper/YCSB/releases/ + +You can choose to download either the full stable version or just one of the available binding. + +### 2. Run YCSB + +To execute the benchmark using the SeaweedFS storage binding, first files must be uploaded using the "load" option with +this command: + + ./bin/ycsb load seaweedfs -p seaweed.filerHost=localhost -p seaweed.filerPort=8888 -p seaweed.folder=/ycsb -p fieldlength=10 -p fieldcount=20 -p recordcount=10000 -P workloads/workloada + +With this command, the workload A will be executing with the loading phase. The file size is determined by the number +of fields (fieldcount) and by the field size (fieldlength). In this case each file is 200 bytes (10 bytes for each +field multiplied by 20 fields). + +Running the command: + + ./bin/ycsb run seaweedfs -p seaweed.filerHost=localhost -p seaweed.filerPort=8888 -p seaweed.folder=/ycsb -p fieldlength=10 -p fieldcount=20 -p recordcount=10000 -P workloads/workloada + +the workload A will be executed with file size 200 bytes. + +#### SeaweedFS Storage Configuration Parameters + +- `seaweed.filerHost` + - This indicate the filer host or ip address. + - Default value is `localhost`. + +- `seaweed.filerPort` + - This indicate the filer port. + - Default value is `8888`. + +- `seaweed.folder` + - This indicate the folder on filer to store all the files. + - Default value is `/ycsb`. diff --git a/seaweedfs/pom.xml b/seaweedfs/pom.xml new file mode 100644 index 00000000..54869f16 --- /dev/null +++ b/seaweedfs/pom.xml @@ -0,0 +1,54 @@ + + + + + 4.0.0 + + site.ycsb + binding-parent + 0.18.0-SNAPSHOT + ../binding-parent + + + seaweedfs-binding + SeaweedFS Storage Binding + jar + + + + com.github.chrislusf + seaweedfs-client + ${seaweed.client.version} + + + site.ycsb + core + ${project.version} + provided + + + org.slf4j + slf4j-api + 1.7.25 + + + org.slf4j + slf4j-simple + 1.7.25 + runtime + + + diff --git a/seaweedfs/src/main/java/site/ycsb/db/seaweed/SeaweedClient.java b/seaweedfs/src/main/java/site/ycsb/db/seaweed/SeaweedClient.java new file mode 100644 index 00000000..6267c19e --- /dev/null +++ b/seaweedfs/src/main/java/site/ycsb/db/seaweed/SeaweedClient.java @@ -0,0 +1,320 @@ +/** + * Copyright (c) 2015 YCSB contributors. All rights reserved. + *

+ * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + *

+ * SeaweedFS storage client binding for YCSB. + */ +package site.ycsb.db.seaweed; + +import seaweedfs.client.FilerProto; +import seaweedfs.client.FilerClient; +import seaweedfs.client.FilerGrpcClient; +import seaweedfs.client.SeaweedRead; +import seaweedfs.client.SeaweedWrite; +import site.ycsb.ByteIterator; +import site.ycsb.DB; +import site.ycsb.DBException; +import site.ycsb.Status; +import site.ycsb.StringByteIterator; + +import java.io.IOException; +import java.io.StringWriter; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.Vector; + +import org.codehaus.jackson.JsonFactory; +import org.codehaus.jackson.JsonGenerator; +import org.codehaus.jackson.JsonNode; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.node.ObjectNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * SeaweedFS Storage client for YCSB framework. + * + * The size of the file to upload is determined by two parameters: + * - fieldcount this is the number of fields of a record in YCSB + * - fieldlength this is the size in bytes of a single field in the record + * together these two parameters define the size of the file to upload, + * the size in bytes is given by the fieldlength multiplied by the fieldcount. + * The name of the file is determined by the parameter key. + * This key is automatically generated by YCSB. + */ +public class SeaweedClient extends DB { + + private static final Logger LOG = LoggerFactory.getLogger(SeaweedClient.class); + protected static final ObjectMapper MAPPER = new ObjectMapper(); + + private FilerClient filerClient; + private FilerGrpcClient filerGrpcClient; + private String filerHost; + private int filerPort; + private String folder; + + /** + * Cleanup any state for this storage. + * Called once per instance; + */ + @Override + public void cleanup() throws DBException { + } + + /** + * Delete a file from SeaweedFS Storage. + * + * @param tableName The name of the table + * @param key The record key of the file to delete. + * @return OK on success, otherwise ERROR. See the + * {@link DB} class's description for a discussion of error codes. + */ + @Override + public Status delete(String tableName, String key) { + if (!filerClient.rm(this.folder + "/" + tableName + "/" + key, true, true)) { + return Status.ERROR; + } + return Status.OK; + } + + /** + * Initialize any state for the storage. + * Called once per SeaweedFS instance; If the client is not null it is re-used. + */ + @Override + public void init() throws DBException { + filerHost = getProperties().getProperty("seaweed.filerHost", "localhost"); + filerPort = Integer.parseInt(getProperties().getProperty("seaweed.filerPort", "8888")); + folder = getProperties().getProperty("seaweed.folder", "/ycsb"); + filerGrpcClient = new FilerGrpcClient(filerHost, filerPort+10000); + filerClient = new FilerClient(filerGrpcClient); + filerClient.mkdirs(this.folder, 0755); + } + + /** + * Create a new File in the table. Any field/value pairs in the specified + * values HashMap will be written into the file with the specified record + * key. + * + * @param tableName The name of the table + * @param key The record key of the file to insert. + * @param values A HashMap of field/value pairs to insert in the file. + * Only the content of the first field is written to a byteArray + * multiplied by the number of field. In this way the size + * of the file to upload is determined by the fieldlength + * and fieldcount parameters. + * @return OK on success, ERROR otherwise. See the + * {@link DB} class's description for a discussion of error codes. + */ + @Override + public Status insert(String tableName, String key, + Map values) { + return writeToStorage(tableName, key, values); + } + + /** + * Read a file from the table. Each field/value pair from the result + * will be stored in a HashMap. + * + * @param tableName The name of the table + * @param key The record key of the file to read. + * @param fields The list of fields to read, or null for all of them, + * it is null by default + * @param result A HashMap of field/value pairs for the result + * @return OK on success, ERROR otherwise. + */ + @Override + public Status read(String tableName, String key, Set fields, + Map result) { + return readFromStorage(tableName, key, fields, result); + } + + /** + * Update a file in the table. Any field/value pairs in the specified + * values HashMap will be written into the file with the specified file + * key, overwriting any existing values with the same field name. + * + * @param tableName The name of the table + * @param key The file key of the file to write. + * @param values A HashMap of field/value pairs to update in the record + * @return OK on success, ERORR otherwise. + */ + @Override + public Status update(String tableName, String key, + Map values) { + Map existingValues = new HashMap<>(); + Status readStatus = readFromStorage(tableName, key, null, existingValues); + if (readStatus != Status.OK) { + return readStatus; + } + existingValues.putAll(values); + return writeToStorage(tableName, key, existingValues); + } + + /** + * Perform a range scan for a set of files in the table. Each + * field/value pair from the result will be stored in a HashMap. + * + * @param tableName The name of the table + * @param startkey The file key of the first file to read. + * @param recordcount The number of files to read + * @param fields The list of fields to read, or null for all of them + * @param result A Vector of HashMaps, where each HashMap is a set field/value + * pairs for one file + * @return OK on success, ERROR otherwise. + */ + @Override + public Status scan(String tableName, String startkey, int recordcount, + Set fields, Vector> result) { + return scanFromStorage(tableName, startkey, recordcount, fields, result); + } + + /** + * Write a new object to SeaweedFS. + * + * @param tableName The name of the table + * @param key The file key of the object to upload/update. + * @param values The data to be written on the object + */ + protected Status writeToStorage(String tableName, String key, Map values) { + try { + byte[] jsonData = toJson(values).getBytes(StandardCharsets.UTF_8); + + long now = System.currentTimeMillis() / 1000L; + FilerProto.Entry.Builder entry = FilerProto.Entry.newBuilder() + .setName(key) + .setIsDirectory(false) + .setAttributes( + FilerProto.FuseAttributes.newBuilder() + .setCrtime(now) + .setMtime(now) + .setFileMode(0755) + ); + + SeaweedWrite.writeData(entry, "000", this.filerGrpcClient, 0, jsonData, 0, jsonData.length); + + SeaweedWrite.writeMeta(this.filerGrpcClient, this.folder + "/" + tableName, entry); + + } catch (Exception e) { + LOG.error("Not possible to write the object {}", key, e); + return Status.ERROR; + } + + return Status.OK; + } + + /** + * Download an object from SeaweedFS. + * + * @param tableName The name of the table + * @param key The file key of the object to upload/update. + * @param result The Hash map where data from the object are written + */ + protected Status readFromStorage(String tableName, String key, Set fields, Map result) { + try { + FilerProto.Entry entry = this.filerClient.lookupEntry(this.folder + "/" + tableName, key); + if (entry!=null) { + readOneEntry(entry, key, fields, result); + }else{ + LOG.error("Fail to read the object {}", key); + return Status.NOT_FOUND; + } + } catch (Exception e) { + LOG.error("Not possible to get the object {}", key, e); + return Status.ERROR; + } + + return Status.OK; + } + + protected void readOneEntry( + FilerProto.Entry entry, String key, Set fields, Map result) throws IOException { + List visibleIntervalList = + SeaweedRead.nonOverlappingVisibleIntervals(filerGrpcClient, entry.getChunksList()); + int length = (int) SeaweedRead.totalSize(entry.getChunksList()); + byte[] buffer = new byte[length]; + SeaweedRead.read(this.filerGrpcClient, visibleIntervalList, 0, buffer, 0, buffer.length); + fromJson(new String(buffer, StandardCharsets.UTF_8), fields, result); + } + + /** + * Perform an emulation of a database scan operation on a SeaweedFS table. + * + * @param tableName The name of the table + * @param startkey The file key of the first file to read. + * @param recordcount The number of files to read + * @param fields The list of fields to read, or null for all of them + * @param result A Vector of HashMaps, where each HashMap is a set field/value + * pairs for one file + */ + protected Status scanFromStorage(String tableName, String startkey, + int recordcount, Set fields, Vector> result) { + + try { + List entryList = this.filerClient.listEntries( + this.folder + "/" + tableName, "", startkey, recordcount, true); + for (FilerProto.Entry entry : entryList) { + HashMap ret = new HashMap(); + readOneEntry(entry, entry.getName(), fields, ret); + result.add(ret); + } + } catch (Exception e) { + LOG.error("Not possible to list the object {} limit {}", startkey, recordcount, e); + return Status.ERROR; + } + + return Status.OK; + } + + protected static void fromJson( + String value, Set fields, + Map result) throws IOException { + JsonNode json = MAPPER.readTree(value); + boolean checkFields = fields != null && !fields.isEmpty(); + for (Iterator> jsonFields = json.getFields(); + jsonFields.hasNext(); + /* increment in loop body */) { + Map.Entry jsonField = jsonFields.next(); + String name = jsonField.getKey(); + if (checkFields && !fields.contains(name)) { + continue; + } + JsonNode jsonValue = jsonField.getValue(); + if (jsonValue != null && !jsonValue.isNull()) { + result.put(name, new StringByteIterator(jsonValue.asText())); + } + } + } + + protected static String toJson(Map values) + throws IOException { + ObjectNode node = MAPPER.createObjectNode(); + Map stringMap = StringByteIterator.getStringMap(values); + for (Map.Entry pair : stringMap.entrySet()) { + node.put(pair.getKey(), pair.getValue()); + } + JsonFactory jsonFactory = new JsonFactory(); + Writer writer = new StringWriter(); + JsonGenerator jsonGenerator = jsonFactory.createJsonGenerator(writer); + MAPPER.writeTree(jsonGenerator, node); + return writer.toString(); + } + +} diff --git a/seaweedfs/src/main/java/site/ycsb/db/seaweed/package-info.java b/seaweedfs/src/main/java/site/ycsb/db/seaweed/package-info.java new file mode 100644 index 00000000..15416cef --- /dev/null +++ b/seaweedfs/src/main/java/site/ycsb/db/seaweed/package-info.java @@ -0,0 +1,21 @@ +/** + * Copyright (c) 2015 YCSB contributors. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + * + * SeaweedFS storage client binding for YCSB. + */ + +package site.ycsb.db.seaweed; +