addressed review comments, code cleaned up
This commit is contained in:
Родитель
d22e4aefc6
Коммит
208fcd3d9c
|
@ -134,7 +134,6 @@ class BatchInserter {
|
||||||
|
|
||||||
public Iterator<Callable<InsertMetrics>> miniBatchInsertExecutionCallableIterator() {
|
public Iterator<Callable<InsertMetrics>> miniBatchInsertExecutionCallableIterator() {
|
||||||
|
|
||||||
// TODO: FIXME handle scenario where due to a non-retriable error we should break out of the stream loop
|
|
||||||
Stream<Callable<InsertMetrics>> stream = batchesToInsert.stream().map(miniBatch -> {
|
Stream<Callable<InsertMetrics>> stream = batchesToInsert.stream().map(miniBatch -> {
|
||||||
return new Callable<InsertMetrics>() {
|
return new Callable<InsertMetrics>() {
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,8 @@ import java.util.Collections;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.JsonNode;
|
import com.fasterxml.jackson.databind.JsonNode;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
@ -34,7 +36,8 @@ import com.microsoft.azure.documentdb.Undefined;
|
||||||
import com.microsoft.azure.documentdb.internal.routing.PartitionKeyInternal;
|
import com.microsoft.azure.documentdb.internal.routing.PartitionKeyInternal;
|
||||||
|
|
||||||
class DocumentAnalyzer {
|
class DocumentAnalyzer {
|
||||||
|
private final static ObjectMapper objectMapper = new ObjectMapper();
|
||||||
|
private final static Logger LOGGER = LoggerFactory.getLogger(DocumentAnalyzer.class);
|
||||||
/**
|
/**
|
||||||
* Extracts effective {@link PartitionKeyInternal} from serialized document.
|
* Extracts effective {@link PartitionKeyInternal} from serialized document.
|
||||||
* @param documentAsString Serialized document to extract partition key value from.
|
* @param documentAsString Serialized document to extract partition key value from.
|
||||||
|
@ -52,43 +55,53 @@ class DocumentAnalyzer {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static PartitionKeyInternal extractPartitionKeyValueInternal(String documentAsString, PartitionKeyDefinition partitionKeyDefinition) {
|
private static PartitionKeyInternal extractPartitionKeyValueInternal(String documentAsString, PartitionKeyDefinition partitionKeyDefinition) {
|
||||||
ObjectMapper objectMapper = new ObjectMapper();
|
|
||||||
JsonNode root;
|
JsonNode root;
|
||||||
try {
|
try {
|
||||||
root = objectMapper.readTree(documentAsString);
|
root = objectMapper.readTree(documentAsString);
|
||||||
}catch (Exception e) {
|
|
||||||
throw new RuntimeException(e);
|
Iterator<String> path = partitionKeyDefinition.getPaths().iterator();
|
||||||
|
JsonNode node = root.path(path.next().substring(1));
|
||||||
|
|
||||||
|
while(path.hasNext()) {
|
||||||
|
node = node.path(path.next());
|
||||||
|
}
|
||||||
|
|
||||||
|
Object partitionKeyValue = null;
|
||||||
|
|
||||||
|
switch (node.getNodeType()) {
|
||||||
|
case BOOLEAN:
|
||||||
|
partitionKeyValue = node.booleanValue();
|
||||||
|
break;
|
||||||
|
case MISSING:
|
||||||
|
partitionKeyValue = Undefined.Value();
|
||||||
|
break;
|
||||||
|
case NULL:
|
||||||
|
partitionKeyValue = JSONObject.NULL;
|
||||||
|
break;
|
||||||
|
case NUMBER:
|
||||||
|
partitionKeyValue = node.numberValue();
|
||||||
|
break;
|
||||||
|
case STRING:
|
||||||
|
partitionKeyValue = node.textValue();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new RuntimeException(String.format("undefined json type %s", node.getNodeType()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return fromPartitionKeyvalue(partitionKeyValue);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOGGER.error("Failed to extract partition key value from document {}", documentAsString, e);
|
||||||
|
throw ExceptionUtils.toRuntimeException(e);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Iterator<String> path = partitionKeyDefinition.getPaths().iterator();
|
public static PartitionKeyInternal fromPartitionKeyvalue(Object partitionKeyValue) {
|
||||||
JsonNode node = root.path(path.next().substring(1));
|
try {
|
||||||
|
return PartitionKeyInternal.fromObjectArray(Collections.singletonList(partitionKeyValue), true);
|
||||||
while(path.hasNext()) {
|
} catch (Exception e) {
|
||||||
node = node.path(path.next());
|
LOGGER.error("Failed to instantiate ParitionKeyInternal from {}", partitionKeyValue, e);
|
||||||
|
throw ExceptionUtils.toRuntimeException(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
Object partitionKeyValue = null;
|
|
||||||
|
|
||||||
switch (node.getNodeType()) {
|
|
||||||
case BOOLEAN:
|
|
||||||
partitionKeyValue = node.booleanValue();
|
|
||||||
break;
|
|
||||||
case MISSING:
|
|
||||||
partitionKeyValue = Undefined.Value();
|
|
||||||
break;
|
|
||||||
case NULL:
|
|
||||||
partitionKeyValue = JSONObject.NULL;
|
|
||||||
break;
|
|
||||||
case NUMBER:
|
|
||||||
partitionKeyValue = node.numberValue();
|
|
||||||
break;
|
|
||||||
case STRING:
|
|
||||||
partitionKeyValue = node.textValue();
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new RuntimeException(String.format("undefined json type %s", node.getNodeType()));
|
|
||||||
}
|
|
||||||
|
|
||||||
return PartitionKeyInternal.fromObjectArray(Collections.singletonList(partitionKeyValue), true);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,6 +45,7 @@ import java.util.stream.Collectors;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.common.base.Function;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.base.Stopwatch;
|
import com.google.common.base.Stopwatch;
|
||||||
import com.google.common.util.concurrent.AsyncCallable;
|
import com.google.common.util.concurrent.AsyncCallable;
|
||||||
|
@ -62,7 +63,6 @@ import com.microsoft.azure.documentdb.Offer;
|
||||||
import com.microsoft.azure.documentdb.PartitionKeyDefinition;
|
import com.microsoft.azure.documentdb.PartitionKeyDefinition;
|
||||||
import com.microsoft.azure.documentdb.PartitionKeyRange;
|
import com.microsoft.azure.documentdb.PartitionKeyRange;
|
||||||
import com.microsoft.azure.documentdb.internal.HttpConstants;
|
import com.microsoft.azure.documentdb.internal.HttpConstants;
|
||||||
import com.microsoft.azure.documentdb.internal.query.funcs.Func2;
|
|
||||||
import com.microsoft.azure.documentdb.internal.routing.CollectionRoutingMap;
|
import com.microsoft.azure.documentdb.internal.routing.CollectionRoutingMap;
|
||||||
import com.microsoft.azure.documentdb.internal.routing.PartitionKeyInternal;
|
import com.microsoft.azure.documentdb.internal.routing.PartitionKeyInternal;
|
||||||
import com.microsoft.azure.documentdb.internal.routing.PartitionKeyRangeCache;
|
import com.microsoft.azure.documentdb.internal.routing.PartitionKeyRangeCache;
|
||||||
|
@ -248,7 +248,14 @@ public class DocumentBulkImporter implements AutoCloseable {
|
||||||
* Executes a bulk import in the Azure Cosmos DB database service.
|
* Executes a bulk import in the Azure Cosmos DB database service.
|
||||||
*
|
*
|
||||||
* <code>
|
* <code>
|
||||||
* DocumentClient client = new DocumentClient(HOST, MASTER_KEY, null, null);
|
* ConnectionPolicy connectionPolicy = new ConnectionPolicy();
|
||||||
|
* RetryOptions retryOptions = new RetryOptions();
|
||||||
|
* // set to 0 to let bulk importer handles throttling
|
||||||
|
* retryOptions.setMaxRetryAttemptsOnThrottledRequests(0);
|
||||||
|
* connectionPolicy.setRetryOptions(retryOptions);
|
||||||
|
* connectionPolicy.setMaxPoolSize(200);
|
||||||
|
*
|
||||||
|
* DocumentClient client = new DocumentClient(HOST, MASTER_KEY, connectionPolicy, null);
|
||||||
*
|
*
|
||||||
* String collectionLink = String.format("/dbs/%s/colls/%s", "mydb", "mycol");
|
* String collectionLink = String.format("/dbs/%s/colls/%s", "mydb", "mycol");
|
||||||
* DocumentCollection collection = client.readCollection(collectionLink, null).getResource();
|
* DocumentCollection collection = client.readCollection(collectionLink, null).getResource();
|
||||||
|
@ -264,6 +271,7 @@ public class DocumentBulkImporter implements AutoCloseable {
|
||||||
* // bulkImportResponse.getNumberOfDocumentsImported() == documents.size()
|
* // bulkImportResponse.getNumberOfDocumentsImported() == documents.size()
|
||||||
* }
|
* }
|
||||||
*
|
*
|
||||||
|
* importer.close();
|
||||||
* client.close();
|
* client.close();
|
||||||
* </code>
|
* </code>
|
||||||
* @param documents to insert
|
* @param documents to insert
|
||||||
|
@ -272,28 +280,24 @@ public class DocumentBulkImporter implements AutoCloseable {
|
||||||
* @throws DocumentClientException if any failure happens
|
* @throws DocumentClientException if any failure happens
|
||||||
*/
|
*/
|
||||||
public BulkImportResponse importAll(Collection<String> documents, boolean isUpsert) throws DocumentClientException {
|
public BulkImportResponse importAll(Collection<String> documents, boolean isUpsert) throws DocumentClientException {
|
||||||
Func2<Collection<String>, ConcurrentHashMap<String, Set<String>>, Void> bucketingFunction = new Func2<Collection<String>, ConcurrentHashMap<String,Set<String>>, Void>() {
|
return executeBulkImportInternal(documents,
|
||||||
|
document -> document,
|
||||||
@Override
|
document -> DocumentAnalyzer.extractPartitionKeyValue(document, partitionKeyDefinition),
|
||||||
public Void apply(Collection<String> documents, ConcurrentHashMap<String, Set<String>> partitionKeyToBucket) throws Exception {
|
isUpsert);
|
||||||
|
|
||||||
documents.parallelStream().forEach(document -> {
|
|
||||||
PartitionKeyInternal partitionKeyValue = DocumentAnalyzer.extractPartitionKeyValue(document, partitionKeyDefinition);
|
|
||||||
String effectivePartitionKey = partitionKeyValue.getEffectivePartitionKeyString(partitionKeyDefinition, true);
|
|
||||||
String partitionRangeId = collectionRoutingMap.getRangeByEffectivePartitionKey(effectivePartitionKey).getId();
|
|
||||||
partitionKeyToBucket.get(partitionRangeId).add(document);
|
|
||||||
});
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
return executeBulkImportInternal(documents, bucketingFunction, isUpsert);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Executes a bulk import in the Azure Cosmos DB database service.
|
* Executes a bulk import in the Azure Cosmos DB database service.
|
||||||
*
|
*
|
||||||
* <code>
|
* <code>
|
||||||
* DocumentClient client = new DocumentClient(HOST, MASTER_KEY, null, null);
|
* ConnectionPolicy connectionPolicy = new ConnectionPolicy();
|
||||||
|
* RetryOptions retryOptions = new RetryOptions();
|
||||||
|
* // set to 0 to let bulk importer handles throttling
|
||||||
|
* retryOptions.setMaxRetryAttemptsOnThrottledRequests(0);
|
||||||
|
* connectionPolicy.setRetryOptions(retryOptions);
|
||||||
|
* connectionPolicy.setMaxPoolSize(200);
|
||||||
|
*
|
||||||
|
* DocumentClient client = new DocumentClient(HOST, MASTER_KEY, connectionPolicy, null);
|
||||||
*
|
*
|
||||||
* String collectionLink = String.format("/dbs/%s/colls/%s", "mydb", "mycold");
|
* String collectionLink = String.format("/dbs/%s/colls/%s", "mydb", "mycold");
|
||||||
* DocumentCollection collection = client.readCollection(collectionLink, null).getResource();
|
* DocumentCollection collection = client.readCollection(collectionLink, null).getResource();
|
||||||
|
@ -301,7 +305,7 @@ public class DocumentBulkImporter implements AutoCloseable {
|
||||||
* BulkImporter importer = new BulkImporter(client, collection);
|
* BulkImporter importer = new BulkImporter(client, collection);
|
||||||
*
|
*
|
||||||
* for(int i = 0; i < 10; i++) {
|
* for(int i = 0; i < 10; i++) {
|
||||||
* List<Tuple> tuples = documentSource.getMoreDocumentsPartitionKeyValueTuples();
|
* List<DocumentPKValuePair> tuples = documentSource.getMoreDocumentsPartitionKeyValueTuples();
|
||||||
*
|
*
|
||||||
* BulkImportResponse bulkImportResponse = importer.importAllWithPartitionKey(tuples, false);
|
* BulkImportResponse bulkImportResponse = importer.importAllWithPartitionKey(tuples, false);
|
||||||
*
|
*
|
||||||
|
@ -309,7 +313,9 @@ public class DocumentBulkImporter implements AutoCloseable {
|
||||||
* // bulkImportResponse.getNumberOfDocumentsImported() == documents.size()
|
* // bulkImportResponse.getNumberOfDocumentsImported() == documents.size()
|
||||||
* }
|
* }
|
||||||
*
|
*
|
||||||
|
* importer.close();
|
||||||
* client.close();
|
* client.close();
|
||||||
|
*
|
||||||
* </code>
|
* </code>
|
||||||
* @param documentPartitionKeyValueTuples list of {@link DocumentPKValuePair}
|
* @param documentPartitionKeyValueTuples list of {@link DocumentPKValuePair}
|
||||||
* @param isUpsert whether enable upsert (overwrite if it exists)
|
* @param isUpsert whether enable upsert (overwrite if it exists)
|
||||||
|
@ -318,31 +324,20 @@ public class DocumentBulkImporter implements AutoCloseable {
|
||||||
*/
|
*/
|
||||||
public BulkImportResponse importAllWithPartitionKey(Collection<DocumentPKValuePair> documentPartitionKeyValueTuples, boolean isUpsert) throws DocumentClientException {
|
public BulkImportResponse importAllWithPartitionKey(Collection<DocumentPKValuePair> documentPartitionKeyValueTuples, boolean isUpsert) throws DocumentClientException {
|
||||||
|
|
||||||
Func2<Collection<DocumentPKValuePair>, ConcurrentHashMap<String, Set<String>>, Void> bucketingFunction =
|
return executeBulkImportInternal(documentPartitionKeyValueTuples,
|
||||||
new Func2<Collection<DocumentPKValuePair>, ConcurrentHashMap<String,Set<String>>, Void>() {
|
tuple -> tuple.document,
|
||||||
|
tuple -> DocumentAnalyzer.fromPartitionKeyvalue(tuple.partitionKeyValue),
|
||||||
@Override
|
isUpsert);
|
||||||
public Void apply(Collection<DocumentPKValuePair> input, ConcurrentHashMap<String, Set<String>> partitionKeyToBucket) throws Exception {
|
|
||||||
|
|
||||||
input.parallelStream().forEach(tuple -> {
|
|
||||||
PartitionKeyInternal partitionKeyValue = PartitionKeyInternal.fromObjectArray(Collections.singletonList(tuple.partitionKeyValue), true);
|
|
||||||
String effectivePartitionKey = partitionKeyValue.getEffectivePartitionKeyString(partitionKeyDefinition, true);
|
|
||||||
String partitionRangeId = collectionRoutingMap.getRangeByEffectivePartitionKey(effectivePartitionKey).getId();
|
|
||||||
partitionKeyToBucket.get(partitionRangeId).add(tuple.document);
|
|
||||||
});
|
|
||||||
return null;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
return executeBulkImportInternal(documentPartitionKeyValueTuples, bucketingFunction, isUpsert);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private <T> BulkImportResponse executeBulkImportInternal(Collection<T> input,
|
private <T> BulkImportResponse executeBulkImportInternal(Collection<T> input,
|
||||||
Func2<Collection<T>, ConcurrentHashMap<String, Set<String>>, Void> bucketByPartitionFunc,
|
Function<T, String> getDocument,
|
||||||
|
Function<T, PartitionKeyInternal> getPartitionKey,
|
||||||
boolean isUpsert) throws DocumentClientException {
|
boolean isUpsert) throws DocumentClientException {
|
||||||
Preconditions.checkNotNull(input, "document collection cannot be null");
|
Preconditions.checkNotNull(input, "document collection cannot be null");
|
||||||
try {
|
try {
|
||||||
initializationFuture.get();
|
initializationFuture.get();
|
||||||
return executeBulkImportAsyncImpl(input, bucketByPartitionFunc, isUpsert).get();
|
return executeBulkImportAsyncImpl(input, getDocument, getPartitionKey, isUpsert).get();
|
||||||
|
|
||||||
} catch (ExecutionException e) {
|
} catch (ExecutionException e) {
|
||||||
logger.debug("Failed to import documents", e);
|
logger.debug("Failed to import documents", e);
|
||||||
|
@ -367,7 +362,8 @@ public class DocumentBulkImporter implements AutoCloseable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private <T> ListenableFuture<BulkImportResponse> executeBulkImportAsyncImpl(Collection<T> input,
|
private <T> ListenableFuture<BulkImportResponse> executeBulkImportAsyncImpl(Collection<T> input,
|
||||||
Func2<Collection<T>, ConcurrentHashMap<String, Set<String>>, Void> bucketByPartitionFunc,
|
Function<T, String> getDocument,
|
||||||
|
Function<T, PartitionKeyInternal> getPartitionKey,
|
||||||
boolean isUpsert) throws Exception {
|
boolean isUpsert) throws Exception {
|
||||||
Stopwatch watch = Stopwatch.createStarted();
|
Stopwatch watch = Stopwatch.createStarted();
|
||||||
|
|
||||||
|
@ -385,7 +381,12 @@ public class DocumentBulkImporter implements AutoCloseable {
|
||||||
|
|
||||||
logger.debug("Bucketing documents ...");
|
logger.debug("Bucketing documents ...");
|
||||||
|
|
||||||
bucketByPartitionFunc.apply(input, documentsToImportByPartition);
|
input.parallelStream().forEach(item -> {
|
||||||
|
PartitionKeyInternal partitionKeyValue = getPartitionKey.apply(item);
|
||||||
|
String effectivePartitionKey = partitionKeyValue.getEffectivePartitionKeyString(partitionKeyDefinition, true);
|
||||||
|
String partitionRangeId = collectionRoutingMap.getRangeByEffectivePartitionKey(effectivePartitionKey).getId();
|
||||||
|
documentsToImportByPartition.get(partitionRangeId).add(getDocument.apply(item));
|
||||||
|
});
|
||||||
|
|
||||||
logger.trace("Creating mini batches within each partition bucket");
|
logger.trace("Creating mini batches within each partition bucket");
|
||||||
int maxMiniBatchSize = (int)Math.floor(MAX_BULK_IMPORT_SCRIPT_INPUT_SIZE * FRACTION_OF_MAX_BULK_IMPORT_SCRIPT_INPUT_SIZE_ALLOWED);
|
int maxMiniBatchSize = (int)Math.floor(MAX_BULK_IMPORT_SCRIPT_INPUT_SIZE * FRACTION_OF_MAX_BULK_IMPORT_SCRIPT_INPUT_SIZE_ALLOWED);
|
||||||
|
|
|
@ -27,7 +27,6 @@ public class DocumentPKValuePair {
|
||||||
public final Object partitionKeyValue;
|
public final Object partitionKeyValue;
|
||||||
|
|
||||||
public DocumentPKValuePair(String documentAsString, Object partitionKeyValue) {
|
public DocumentPKValuePair(String documentAsString, Object partitionKeyValue) {
|
||||||
//Preconditions.checkNotNull(documentAsString);
|
|
||||||
this.document = documentAsString;
|
this.document = documentAsString;
|
||||||
this.partitionKeyValue = partitionKeyValue;
|
this.partitionKeyValue = partitionKeyValue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,4 +58,12 @@ class ExceptionUtils {
|
||||||
}
|
}
|
||||||
return dce;
|
return dce;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static RuntimeException toRuntimeException(Exception e) {
|
||||||
|
if (e instanceof RuntimeException) {
|
||||||
|
return (RuntimeException) e;
|
||||||
|
} else {
|
||||||
|
return new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,83 +49,80 @@ public class Main {
|
||||||
|
|
||||||
CmdLineConfiguration cfg = parseCommandLineArgs(args);
|
CmdLineConfiguration cfg = parseCommandLineArgs(args);
|
||||||
|
|
||||||
DocumentClient client = documentClientFrom(cfg);
|
try(DocumentClient client = documentClientFrom(cfg)) {
|
||||||
|
|
||||||
String collectionLink = String.format("/dbs/%s/colls/%s", cfg.getDatabaseId(), cfg.getCollectionId());
|
String collectionLink = String.format("/dbs/%s/colls/%s", cfg.getDatabaseId(), cfg.getCollectionId());
|
||||||
// this assumes database and collection already exists
|
// this assumes database and collection already exists
|
||||||
// also it is a good idea to set your connection pool size to be equal to the number of partitions serving your collection.
|
// also it is a good idea to set your connection pool size to be equal to the number of partitions serving your collection.
|
||||||
DocumentCollection collection = client.readCollection(collectionLink, null).getResource();
|
DocumentCollection collection = client.readCollection(collectionLink, null).getResource();
|
||||||
|
|
||||||
// instantiates bulk importer
|
// instantiates bulk importer
|
||||||
DocumentBulkImporter bulkImporter = new DocumentBulkImporter(client, collection);
|
try(DocumentBulkImporter bulkImporter = new DocumentBulkImporter(client, collection)) {
|
||||||
|
|
||||||
Stopwatch totalWatch = Stopwatch.createUnstarted();
|
Stopwatch totalWatch = Stopwatch.createUnstarted();
|
||||||
|
|
||||||
double totalRequestCharge = 0;
|
double totalRequestCharge = 0;
|
||||||
long totalTimeInMillis = 0;
|
long totalTimeInMillis = 0;
|
||||||
long totalNumberOfDocumentsImported = 0;
|
long totalNumberOfDocumentsImported = 0;
|
||||||
|
|
||||||
for(int i = 0 ; i < cfg.getNumberOfCheckpoints(); i++) {
|
for(int i = 0 ; i < cfg.getNumberOfCheckpoints(); i++) {
|
||||||
|
|
||||||
BulkImportResponse bulkImportResponse;
|
BulkImportResponse bulkImportResponse;
|
||||||
if (cfg.isWithPreprocessedPartitionKeyValue()) {
|
if (cfg.isWithPreprocessedPartitionKeyValue()) {
|
||||||
Collection<DocumentPKValuePair> documentPartitionKeyValueTuples = DataMigrationDocumentSource.loadDocumentPartitionKeyValueTuples(cfg.getNumberOfDocumentsForEachCheckpoint(), collection.getPartitionKey());
|
Collection<DocumentPKValuePair> documentPartitionKeyValueTuples = DataMigrationDocumentSource.loadDocumentPartitionKeyValueTuples(cfg.getNumberOfDocumentsForEachCheckpoint(), collection.getPartitionKey());
|
||||||
|
|
||||||
// NOTE: only sum the bulk import time,
|
// NOTE: only sum the bulk import time,
|
||||||
// loading/generating documents is out of the scope of bulk importer and so has to be excluded
|
// loading/generating documents is out of the scope of bulk importer and so has to be excluded
|
||||||
totalWatch.start();
|
totalWatch.start();
|
||||||
bulkImportResponse = bulkImporter.importAllWithPartitionKey(documentPartitionKeyValueTuples, false);
|
bulkImportResponse = bulkImporter.importAllWithPartitionKey(documentPartitionKeyValueTuples, false);
|
||||||
totalWatch.stop();
|
totalWatch.stop();
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
Collection<String> documents = DataMigrationDocumentSource.loadDocuments(cfg.getNumberOfDocumentsForEachCheckpoint(), collection.getPartitionKey());
|
Collection<String> documents = DataMigrationDocumentSource.loadDocuments(cfg.getNumberOfDocumentsForEachCheckpoint(), collection.getPartitionKey());
|
||||||
|
|
||||||
// NOTE: only sum the bulk import time,
|
// NOTE: only sum the bulk import time,
|
||||||
// loading/generating documents is out of the scope of bulk importer and so has to be excluded
|
// loading/generating documents is out of the scope of bulk importer and so has to be excluded
|
||||||
totalWatch.start();
|
totalWatch.start();
|
||||||
bulkImportResponse = bulkImporter.importAll(documents, false);
|
bulkImportResponse = bulkImporter.importAll(documents, false);
|
||||||
totalWatch.stop();
|
totalWatch.stop();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
System.out.println("##########################################################################################");
|
System.out.println("##########################################################################################");
|
||||||
|
|
||||||
totalNumberOfDocumentsImported += bulkImportResponse.getNumberOfDocumentsImported();
|
totalNumberOfDocumentsImported += bulkImportResponse.getNumberOfDocumentsImported();
|
||||||
totalTimeInMillis += bulkImportResponse.getTotalTimeTaken().toMillis();
|
totalTimeInMillis += bulkImportResponse.getTotalTimeTaken().toMillis();
|
||||||
totalRequestCharge += bulkImportResponse.getTotalRequestUnitsConsumed();
|
totalRequestCharge += bulkImportResponse.getTotalRequestUnitsConsumed();
|
||||||
|
|
||||||
// check the number of imported documents to ensure everything is successfully imported
|
// check the number of imported documents to ensure everything is successfully imported
|
||||||
// bulkImportResponse.getNumberOfDocumentsImported() == documents.size()
|
// bulkImportResponse.getNumberOfDocumentsImported() == documents.size()
|
||||||
if (bulkImportResponse.getNumberOfDocumentsImported() != cfg.getNumberOfDocumentsForEachCheckpoint()) {
|
if (bulkImportResponse.getNumberOfDocumentsImported() != cfg.getNumberOfDocumentsForEachCheckpoint()) {
|
||||||
System.err.println("Some documents failed to get inserted in this checkpoint");
|
System.err.println("Some documents failed to get inserted in this checkpoint");
|
||||||
}
|
}
|
||||||
|
|
||||||
// print stats
|
// print stats
|
||||||
System.out.println("Number of documents inserted in this checkpoint: " + bulkImportResponse.getNumberOfDocumentsImported());
|
System.out.println("Number of documents inserted in this checkpoint: " + bulkImportResponse.getNumberOfDocumentsImported());
|
||||||
System.out.println("Import time for this checkpoint in milli seconds " + bulkImportResponse.getTotalTimeTaken().toMillis());
|
System.out.println("Import time for this checkpoint in milli seconds " + bulkImportResponse.getTotalTimeTaken().toMillis());
|
||||||
System.out.println("Total request unit consumed in this checkpoint: " + bulkImportResponse.getTotalRequestUnitsConsumed());
|
System.out.println("Total request unit consumed in this checkpoint: " + bulkImportResponse.getTotalRequestUnitsConsumed());
|
||||||
|
|
||||||
System.out.println("Average RUs/second in this checkpoint: " + bulkImportResponse.getTotalRequestUnitsConsumed() / (0.001 * bulkImportResponse.getTotalTimeTaken().toMillis()));
|
System.out.println("Average RUs/second in this checkpoint: " + bulkImportResponse.getTotalRequestUnitsConsumed() / (0.001 * bulkImportResponse.getTotalTimeTaken().toMillis()));
|
||||||
System.out.println("Average #Inserts/second in this checkpoint: " + bulkImportResponse.getNumberOfDocumentsImported() / (0.001 * bulkImportResponse.getTotalTimeTaken().toMillis()));
|
System.out.println("Average #Inserts/second in this checkpoint: " + bulkImportResponse.getNumberOfDocumentsImported() / (0.001 * bulkImportResponse.getTotalTimeTaken().toMillis()));
|
||||||
System.out.println("##########################################################################################");
|
System.out.println("##########################################################################################");
|
||||||
}
|
}
|
||||||
|
|
||||||
// print average stats
|
// print average stats
|
||||||
System.out.println("##########################################################################################");
|
System.out.println("##########################################################################################");
|
||||||
|
|
||||||
System.out.println("Total import time in milli seconds measured by stopWatch: " + totalWatch.elapsed().toMillis());
|
System.out.println("Total import time in milli seconds measured by stopWatch: " + totalWatch.elapsed().toMillis());
|
||||||
System.out.println("Total import time in milli seconds measured by api : " + totalTimeInMillis);
|
System.out.println("Total import time in milli seconds measured by api : " + totalTimeInMillis);
|
||||||
System.out.println("Total Number of documents inserted " + totalNumberOfDocumentsImported);
|
System.out.println("Total Number of documents inserted " + totalNumberOfDocumentsImported);
|
||||||
System.out.println("Total request unit consumed: " + totalRequestCharge);
|
System.out.println("Total request unit consumed: " + totalRequestCharge);
|
||||||
System.out.println("Average RUs/second:" + totalRequestCharge / (totalWatch.elapsed().toMillis() * 0.001));
|
System.out.println("Average RUs/second:" + totalRequestCharge / (totalWatch.elapsed().toMillis() * 0.001));
|
||||||
System.out.println("Average #Inserts/second: " + totalNumberOfDocumentsImported / (totalWatch.elapsed().toMillis() * 0.001));
|
System.out.println("Average #Inserts/second: " + totalNumberOfDocumentsImported / (totalWatch.elapsed().toMillis() * 0.001));
|
||||||
|
|
||||||
// close bulk importer to release any existing resources
|
} // close bulk importer
|
||||||
bulkImporter.close();
|
} // closes client
|
||||||
|
}
|
||||||
// close document client
|
|
||||||
client.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
static class DataMigrationDocumentSource {
|
static class DataMigrationDocumentSource {
|
||||||
|
|
||||||
|
@ -225,7 +222,7 @@ public class Main {
|
||||||
policy.setRetryOptions(retryOptions);
|
policy.setRetryOptions(retryOptions);
|
||||||
policy.setConnectionMode(cfg.getConnectionMode());
|
policy.setConnectionMode(cfg.getConnectionMode());
|
||||||
policy.setMaxPoolSize(cfg.getMaxConnectionPoolSize());
|
policy.setMaxPoolSize(cfg.getMaxConnectionPoolSize());
|
||||||
|
|
||||||
return new DocumentClient(cfg.getServiceEndpoint(), cfg.getMasterKey(),
|
return new DocumentClient(cfg.getServiceEndpoint(), cfg.getMasterKey(),
|
||||||
policy, cfg.getConsistencyLevel());
|
policy, cfg.getConsistencyLevel());
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,14 +40,14 @@ public class Sample {
|
||||||
|
|
||||||
public static void main(String[] args) throws DocumentClientException, InterruptedException, ExecutionException {
|
public static void main(String[] args) throws DocumentClientException, InterruptedException, ExecutionException {
|
||||||
|
|
||||||
ConnectionPolicy policy = new ConnectionPolicy();
|
ConnectionPolicy connectionPolicy = new ConnectionPolicy();
|
||||||
RetryOptions retryOptions = new RetryOptions();
|
RetryOptions retryOptions = new RetryOptions();
|
||||||
// set to 0 to let bulk importer handles throttling
|
// set to 0 to let bulk importer handles throttling
|
||||||
retryOptions.setMaxRetryAttemptsOnThrottledRequests(0);
|
retryOptions.setMaxRetryAttemptsOnThrottledRequests(0);
|
||||||
policy.setRetryOptions(retryOptions);
|
connectionPolicy.setRetryOptions(retryOptions);
|
||||||
policy.setMaxPoolSize(200);
|
connectionPolicy.setMaxPoolSize(200);
|
||||||
|
|
||||||
try(DocumentClient client = new DocumentClient(HOST, MASTER_KEY, policy, ConsistencyLevel.Session)) {
|
try(DocumentClient client = new DocumentClient(HOST, MASTER_KEY, connectionPolicy, ConsistencyLevel.Session)) {
|
||||||
|
|
||||||
String collectionLink = String.format("/dbs/%s/colls/%s", "mydb", "mycol");
|
String collectionLink = String.format("/dbs/%s/colls/%s", "mydb", "mycol");
|
||||||
// this assumes database and collection already exists
|
// this assumes database and collection already exists
|
||||||
|
|
Загрузка…
Ссылка в новой задаче