From 67f31ecadaeac2f17cdefece933652a2ef0dce4d Mon Sep 17 00:00:00 2001 From: Ryan CrawCour Date: Tue, 2 Jun 2020 14:02:45 +1200 Subject: [PATCH] moving old scala code to its own branch --- .dockerignore | 3 + .gitignore | 86 +++++- Dockerfile | 29 ++ README.md | 40 ++- azure-pipelines.yml | 38 +++ build.sbt | 30 +++ pom.xml | 87 ------ .../kafka/connect/CosmosDBProvider.java | 5 - .../connect/sink/CosmosDBSinkConnector.java | 42 --- .../source/CosmosDBSourceConnector.java | 41 --- src/main/resources/logback.xml | 11 + .../connect/CosmosDBClientSettings.scala | 43 +++ .../kafka/connect/CosmosDBProvider.scala | 9 + .../kafka/connect/CosmosDBProviderImpl.scala | 249 ++++++++++++++++++ .../azure/cosmosdb/kafka/connect/Runner.scala | 45 ++++ .../ErrorHandler/HandleRetriableError.scala | 76 ++++++ .../kafka/connect/config/CosmosDBConfig.scala | 83 ++++++ .../config/CosmosDBConfigConstants.scala | 70 +++++ .../kafka/connect/kafka/EmbeddedConnect.scala | 113 ++++++++ .../kafka/connect/kafka/KafkaCluster.scala | 70 +++++ .../connect/processor/JsonPostProcessor.scala | 46 ++++ .../connect/processor/PostProcessor.scala | 32 +++ .../SampleConsoleWriterPostProcessor.scala | 22 ++ .../sink/DocumentIdSinkPostProcessor.scala | 57 ++++ .../sink/SelectorSinkPostProcessor.scala | 13 + .../source/SelectorSourcePostProcessor.scala | 13 + .../connect/processor/trait/Selector.scala | 106 ++++++++ .../connect/sink/ConnectCosmosConverter.scala | 56 ++++ .../connect/sink/CosmosDBSinkConnector.scala | 56 ++++ .../connect/sink/CosmosDBSinkSettings.scala | 11 + .../kafka/connect/sink/CosmosDBSinkTask.scala | 125 +++++++++ .../kafka/connect/sink/CosmosDBWriter.scala | 87 ++++++ .../connect/source/ChangeFeedObserver.scala | 5 + .../connect/source/ChangeFeedProcessor.scala | 64 +++++ .../source/ChangeFeedProcessorBuilder.scala | 57 ++++ .../source/ChangeFeedProcessorOptions.scala | 5 + .../kafka/connect/source/CosmosDBReader.scala | 170 ++++++++++++ .../CosmosDBReaderChangeFeedState.scala | 7 + .../source/CosmosDBSourceConnector.scala | 83 ++++++ .../source/CosmosDBSourceSettings.scala | 12 + .../connect/source/CosmosDBSourceTask.scala | 150 +++++++++++ .../source/DocumentClientBuilder.scala | 27 ++ .../source/DocumentCollectionInfo.scala | 5 + .../cosmosdb/kafka/connect/source/Main.scala | 65 +++++ .../connect/source/PartitionFeedReader.scala | 56 ++++ .../connect/source/PartitionFeedState.scala | 5 + .../source/PartitionLeaseStateManager.scala | 47 ++++ .../kafka/connect/source/SampleConsumer.scala | 47 ++++ .../java/com/microsoft/azure/AppTest.java | 20 -- .../kafka/connect/MockCosmosDBProvider.scala | 32 +++ .../kafka/connect/MockCosmosDBReader.scala | 52 ++++ .../ErrorHandler/ErrorHandlerTest.scala | 37 +++ .../connect/config/CosmosDBConfigTest.scala | 62 +++++ .../connect/config/TestConfigurations.scala | 113 ++++++++ .../kafka/connect/model/Address.scala | 21 ++ .../connect/model/CosmosDBDocumentTest.scala | 29 ++ .../connect/model/KafkaPayloadTest.scala | 14 + .../DocumentIdSinkPostProcessorTest.scala | 76 ++++++ .../processor/SelectorPostProcessorTest.scala | 151 +++++++++++ .../processor/SinkPostProcessorTest.scala | 72 +++++ .../processor/SourcePostProcessorTest.scala | 75 ++++++ .../provider/CosmosDBProviderImplTest.scala | 43 +++ .../sink/CosmosDBSinkConnectorTest.scala | 34 +++ .../CosmosDBSinkConnectorWriterTest.scala | 19 ++ .../connect/sink/CosmosDBSinkTaskTest.scala | 156 +++++++++++ .../connect/sink/CosmosDBWriterTest.scala | 216 +++++++++++++++ .../CosmosDBSourceConnectorReaderTest.scala | 65 +++++ .../CosmosDBSourceConnectorTaskTestMock.scala | 226 ++++++++++++++++ .../source/CosmosDBSourceConnectorTest.scala | 35 +++ .../source/CosmosDBSourceTaskTest.scala | 230 ++++++++++++++++ 70 files changed, 4178 insertions(+), 199 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 azure-pipelines.yml create mode 100644 build.sbt delete mode 100644 pom.xml delete mode 100644 src/main/java/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBProvider.java delete mode 100644 src/main/java/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnector.java delete mode 100644 src/main/java/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnector.java create mode 100644 src/main/resources/logback.xml create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBClientSettings.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBProvider.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBProviderImpl.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/Runner.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/common/ErrorHandler/HandleRetriableError.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/CosmosDBConfig.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/CosmosDBConfigConstants.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/kafka/EmbeddedConnect.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/kafka/KafkaCluster.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/JsonPostProcessor.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/PostProcessor.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SampleConsoleWriterPostProcessor.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/sink/DocumentIdSinkPostProcessor.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/sink/SelectorSinkPostProcessor.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/source/SelectorSourcePostProcessor.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/trait/Selector.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/ConnectCosmosConverter.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnector.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkSettings.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkTask.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBWriter.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedObserver.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedProcessor.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedProcessorBuilder.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedProcessorOptions.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBReader.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBReaderChangeFeedState.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnector.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceSettings.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceTask.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/DocumentClientBuilder.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/DocumentCollectionInfo.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/Main.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/PartitionFeedReader.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/PartitionFeedState.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/PartitionLeaseStateManager.scala create mode 100644 src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/SampleConsumer.scala delete mode 100644 src/test/java/com/microsoft/azure/AppTest.java create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/MockCosmosDBProvider.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/MockCosmosDBReader.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/common/ErrorHandler/ErrorHandlerTest.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/CosmosDBConfigTest.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/TestConfigurations.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/model/Address.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/model/CosmosDBDocumentTest.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/model/KafkaPayloadTest.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/DocumentIdSinkPostProcessorTest.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SelectorPostProcessorTest.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SinkPostProcessorTest.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SourcePostProcessorTest.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/provider/CosmosDBProviderImplTest.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnectorTest.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnectorWriterTest.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkTaskTest.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBWriterTest.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnectorReaderTest.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnectorTaskTestMock.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnectorTest.scala create mode 100644 src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceTaskTest.scala diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..9f924e3 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +azure-pipelines.yaml +.git +.vscode \ No newline at end of file diff --git a/.gitignore b/.gitignore index e6ebc35..9dc8d41 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,85 @@ -.idea/ +/target/* +/project/* + +### Intellij ### + +.idea/** + +### mac ## +.DS_Store +C:/ + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +.idea/modules.xml +.idea/*.iml +.idea/modules +.idea/misc.xml +.idea/sbt.xml +.idea/vcs.xml + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Created by https://www.gitignore.io/api/sbt,scala + +### SBT ### +# Simple Build Tool +# http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control + +dist/* target/ -*.iml \ No newline at end of file +lib_managed/ +src_managed/ +project/boot/ +project/plugins/project/ +.history +.cache +.lib/ + +### Scala ### +*.class +*.log + +# End of https://www.gitignore.io/api/sbt,scala + diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0ccf7b7 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,29 @@ +# ----- Base Java - Check Dependencies ---- +FROM openjdk:8u212-b04-jdk-stretch AS base +# Env variables +ENV SCALA_VERSION=2.12.8 +ENV SBT_VERSION=1.2.8 +ENV HOME=/app +WORKDIR $HOME + +# Install sbt +RUN \ + curl -L -o sbt-$SBT_VERSION.deb https://dl.bintray.com/sbt/debian/sbt-$SBT_VERSION.deb && \ + dpkg -i sbt-$SBT_VERSION.deb && \ + rm sbt-$SBT_VERSION.deb && \ + apt-get update && \ + apt-get install sbt + +# +# ----Build the app ---- +FROM base AS build +ADD . $HOME +RUN sbt compile + +# +# ---- Publish the App ---- +FROM build AS release +EXPOSE 8888 +CMD sbt run + + diff --git a/README.md b/README.md index ee24b25..db4037e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,41 @@ # Kafka Connect for Azure Cosmos DB ________________________ -This connector is currently undergoing a major refresh. -Stay tuned for new Java version targeted at Cosmos DB V4 Java SDK +**Kafka Connect for Azure Cosmos DB** consists of 2 connectors - + +A **Source Connector** which is used to pump data from [Azure Cosmos DB](https://azure.microsoft.com/services/cosmos-db//) via its Change Feed to [Apache Kafka](https://kafka.apache.org/). + +A **Sink Connector** reads messages from Kafka and sends them to Cosmos DB. + +## Contribute +This project welcomes contributions, feedback and suggestions. +If you would like to become a contributor to this project, please refer to our [Contribution Guide](CONTRIBUTING.MD). + +## Setup + +### [Source Connector](doc/README_Source.md) + +### [Sink Connector](doc/README_Sink.md) + + +## Configuration + + +## References +It is worth looking through this material to get better understanding of how Kafka Connect and these connectors work and how to use them. + +[Kafka Connect](https://docs.confluent.io/current/connect/index.html) + +[Kafka Connect Concepts](https://docs.confluent.io/current/connect/concepts.html) + +[Installing and Configuring Kafka Connect](https://docs.confluent.io/current/connect/userguide.html) + +[Tutorial: Moving Data In and Out of Kafka](https://docs.confluent.io/current/connect/quickstart.html) + +It is also worth understanding how Cosmos DB and its Change Feed works + +[Cosmos DB](https://docs.microsoft.com/en-us/azure/cosmos-db/introduction) + +[Cosmos DB Change feed](https://docs.microsoft.com/azure/cosmos-db/change-feed) + +[Cosmos DB Change feed processor](https://docs.microsoft.com/en-us/azure/cosmos-db/change-feed-processor) \ No newline at end of file diff --git a/azure-pipelines.yml b/azure-pipelines.yml new file mode 100644 index 0000000..715e60e --- /dev/null +++ b/azure-pipelines.yml @@ -0,0 +1,38 @@ +# Starter pipeline +# Start with a minimal pipeline that you can customize to build and deploy your code. +# Add steps that build, run tests, deploy, and more: +# https://aka.ms/yaml + +trigger: +- master + +variables: # pipeline-level + projName: 'kafka-connect-cosmosdb' + topicName: '$(Build.SourceBranchName)' + releaseversion: '$(Build.BuildNumber)' + appName: 'kafkaconnectcosmosdb' + +stages: +- stage: Build_Container + displayName: Build the App and publish it in Azure Container Registry + jobs: + - job: BuildJob + pool: + vmImage: 'ubuntu-latest' + + steps: + - task: Bash@3 + inputs: + targetType: 'inline' + script: 'docker build --target=build -t $(appName)acr.azurecr.io/$(appname):canary .' + + - task: Bash@3 + inputs: + targetType: 'inline' + script: 'docker cp app/cosmosdbkafkaconnector.jar $(Build.ArtifactStagingDirectory)' + + - task: PublishBuildArtifacts@1 + inputs: + PathtoPublish: '$(Build.ArtifactStagingDirectory)' + ArtifactName: 'drop' + publishLocation: 'Container' diff --git a/build.sbt b/build.sbt new file mode 100644 index 0000000..2c6299f --- /dev/null +++ b/build.sbt @@ -0,0 +1,30 @@ +name := "com.microsoft.azure.cosmosdb.kafka.connect" +organization := "com.microsoft.azure" +version := "0.0.1-preview" +scalaVersion := "2.12.8" + +libraryDependencies += "com.microsoft.azure" % "azure-cosmosdb" % "2.4.4" + +libraryDependencies += "javax.ws.rs" % "javax.ws.rs-api" % "2.1.1" artifacts Artifact("javax.ws.rs-api", "jar", "jar") +libraryDependencies += "com.typesafe.scala-logging" %% "scala-logging" % "3.9.2" +libraryDependencies += "ch.qos.logback" % "logback-classic" % "1.2.3" +libraryDependencies += "com.google.code.gson" % "gson" % "2.8.5" +libraryDependencies += "io.reactivex" %% "rxscala" % "0.26.5" +libraryDependencies += "org.json4s" %% "json4s-jackson" % "3.5.0" +libraryDependencies += "org.mockito" % "mockito-scala_2.12" % "1.5.11" + +libraryDependencies += "org.apache.kafka" %% "kafka" % "2.2.0" % Compile classifier "test" +libraryDependencies += "org.apache.kafka" %% "kafka" % "2.2.0" % Compile +libraryDependencies += "org.apache.kafka" % "kafka-clients" % "2.2.0" % Compile classifier "test" +libraryDependencies += "org.apache.kafka" % "kafka-clients" % "2.2.0" % Compile +libraryDependencies += "org.apache.kafka" % "connect-api" % "2.2.0" % Compile +libraryDependencies += "org.apache.kafka" % "connect-runtime" % "2.2.0" % Compile + +trapExit := false +fork in run := true + +libraryDependencies += "org.scalactic" %% "scalactic" % "3.0.5" +libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.7" % "test" +libraryDependencies += "com.typesafe" % "config" % "1.3.3" % "test" + +licenses += ("MIT", url("https://github.com/Microsoft/kafka-connect-cosmosdb/blob/master/LICENSE")) \ No newline at end of file diff --git a/pom.xml b/pom.xml deleted file mode 100644 index 10235dc..0000000 --- a/pom.xml +++ /dev/null @@ -1,87 +0,0 @@ - - - - 4.0.0 - - com.microsoft.azure - cosmosdb.kafka.connect - 1.0-SNAPSHOT - - cosmosdb.kafka.connect - - http://www.example.com - - - UTF-8 - 1.8 - 1.8 - 2.5.0 - - - - - org.apache.kafka - connect-api - ${kafka.version} - - - org.apache.kafka - kafka-clients - ${kafka.version} - - - com.microsoft.azure - azure-cosmosdb - 2.6.6 - - - junit - junit - 4.11 - test - - - - - - - - - maven-clean-plugin - 3.1.0 - - - - maven-resources-plugin - 3.0.2 - - - maven-compiler-plugin - 3.8.0 - - - maven-jar-plugin - 3.0.2 - - - maven-install-plugin - 2.5.2 - - - maven-deploy-plugin - 2.8.2 - - - - maven-site-plugin - 3.7.1 - - - maven-project-info-reports-plugin - 3.0.0 - - - - - diff --git a/src/main/java/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBProvider.java b/src/main/java/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBProvider.java deleted file mode 100644 index 7ad6bcd..0000000 --- a/src/main/java/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBProvider.java +++ /dev/null @@ -1,5 +0,0 @@ -package com.microsoft.azure.cosmosdb.kafka.connect; - -public class CosmosDBProvider{ - -} \ No newline at end of file diff --git a/src/main/java/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnector.java b/src/main/java/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnector.java deleted file mode 100644 index e4d02fd..0000000 --- a/src/main/java/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnector.java +++ /dev/null @@ -1,42 +0,0 @@ -package com.microsoft.azure.cosmosdb.kafka.connect.sink; - -import org.apache.kafka.common.config.ConfigDef; -import org.apache.kafka.connect.connector.Task; -import org.apache.kafka.connect.sink.SinkConnector; - -import java.util.List; -import java.util.Map; - -public class CosmosDBSinkConnector extends SinkConnector { - - @Override - public void start(Map map) { - - } - - @Override - public Class taskClass() { - throw new IllegalStateException("Not implemented"); - } - - @Override - public List> taskConfigs(int i) { - throw new IllegalStateException("Not implemented"); - } - - @Override - public void stop() { - throw new IllegalStateException("Not implemented"); - } - - @Override - public ConfigDef config() { - throw new IllegalStateException("Not implemented"); - } - - @Override - public String version() { - return this.getClass().getPackage().getImplementationVersion(); - } - -} \ No newline at end of file diff --git a/src/main/java/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnector.java b/src/main/java/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnector.java deleted file mode 100644 index 3ef5bfa..0000000 --- a/src/main/java/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnector.java +++ /dev/null @@ -1,41 +0,0 @@ -package com.microsoft.azure.cosmosdb.kafka.connect.source; - -import org.apache.kafka.common.config.ConfigDef; -import org.apache.kafka.connect.connector.Task; -import org.apache.kafka.connect.source.SourceConnector; - -import java.util.List; -import java.util.Map; - -public class CosmosDBSourceConnector extends SourceConnector { - - @Override - public void start(Map map) { - throw new IllegalStateException("Not implemented"); - } - - @Override - public Class taskClass() { - throw new IllegalStateException("Not implemented"); - } - - @Override - public List> taskConfigs(int i) { - throw new IllegalStateException("Not implemented"); - } - - @Override - public void stop() { - throw new IllegalStateException("Not implemented"); - } - - @Override - public ConfigDef config() { - throw new IllegalStateException("Not implemented"); - } - - @Override - public String version() { - return this.getClass().getPackage().getImplementationVersion(); - } -} diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml new file mode 100644 index 0000000..218703f --- /dev/null +++ b/src/main/resources/logback.xml @@ -0,0 +1,11 @@ + + + + %d{HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n + + + + + + + \ No newline at end of file diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBClientSettings.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBClientSettings.scala new file mode 100644 index 0000000..c19850b --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBClientSettings.scala @@ -0,0 +1,43 @@ +package com.microsoft.azure.cosmosdb.kafka.connect + +import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfig, CosmosDBConfigConstants} +import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel} + +case class CosmosDBClientSettings( + endpoint:String, + masterkey:String, + database:String, + collection:String, + connectionPolicy:ConnectionPolicy, + consistencyLevel:ConsistencyLevel + ) + +object CosmosDBClientSettings{ + def apply(config: CosmosDBConfig): CosmosDBClientSettings = { + val endpoint:String = config.getString(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG) + require(endpoint.trim.nonEmpty, s"Invalid value for ${CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG}") + require(endpoint.startsWith("https://"), s"""Invalid value for ${CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG} - endpoint must start with "https://"""") + + val masterKey:String = config.getPassword(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG).value() + require(masterKey.trim.nonEmpty, s"Invalid value for ${CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG}") + + val database:String = config.getString(CosmosDBConfigConstants.DATABASE_CONFIG) + require(database.trim.nonEmpty, s"Invalid value for ${CosmosDBConfigConstants.DATABASE_CONFIG}") + + val collection:String = config.getString(CosmosDBConfigConstants.COLLECTION_CONFIG) + require(collection.trim.nonEmpty, s"Invalid value for ${CosmosDBConfigConstants.COLLECTION_CONFIG}") + + //TODO: make this configurable + val connectionPolicy = ConnectionPolicy.GetDefault() + + //TODO: make this configurable + val consistencyLevel = ConsistencyLevel.Session + + new CosmosDBClientSettings(endpoint, + masterKey, + database, + collection, + connectionPolicy, + consistencyLevel) + } +} \ No newline at end of file diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBProvider.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBProvider.scala new file mode 100644 index 0000000..afaeddc --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBProvider.scala @@ -0,0 +1,9 @@ +package com.microsoft.azure.cosmosdb.kafka.connect + +import java.util.concurrent.CountDownLatch +import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient + +trait CosmosDBProvider { + def getClient(settings: CosmosDBClientSettings): AsyncDocumentClient + def upsertDocuments[T](docs: scala.List[T], databaseName: String, collectionName: String, completionLatch: CountDownLatch): Unit +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBProviderImpl.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBProviderImpl.scala new file mode 100644 index 0000000..c402fef --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/CosmosDBProviderImpl.scala @@ -0,0 +1,249 @@ +package com.microsoft.azure.cosmosdb.kafka.connect + +import java.util +import java.util.List +import java.util.concurrent.CountDownLatch + +import _root_.rx.Observable +import _root_.rx.lang.scala.JavaConversions._ +import com.microsoft.azure.cosmosdb._ +import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError +import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient + +import scala.util.{Failure, Success} + +object CosmosDBProviderImpl extends HandleRetriableError with CosmosDBProvider { + + private val requestOptionsInsert = new RequestOptions + requestOptionsInsert.setConsistencyLevel(ConsistencyLevel.Session) + + initializeErrorHandler(2) + + var client: AsyncDocumentClient = _ + + def getClient(settings: CosmosDBClientSettings): AsyncDocumentClient = synchronized { + if (client == null) { + client = new AsyncDocumentClient.Builder() + .withServiceEndpoint(settings.endpoint) + .withMasterKeyOrResourceToken(settings.masterkey) + .withConnectionPolicy(settings.connectionPolicy) + .withConsistencyLevel(settings.consistencyLevel) + .build() + } + + client + } + + def getCollectionLink(databaseName: String, collectionName: String) = "/dbs/%s/colls/%s".format(databaseName, collectionName) + + def createDatabaseIfNotExists(databaseName: String): Unit = { + + if (!isDatabaseExists(databaseName)) { + val dbDefinition = new Database() + dbDefinition.setId(databaseName) + + logger.info(s"Creating Database $databaseName") + + client.createDatabase(dbDefinition, null).toCompletable.await() + } + } + + def createCollectionIfNotExists(databaseName: String, collectionName: String): Unit = { + if (!isCollectionExists(databaseName, collectionName)) { + val dbLnk = String.format("/dbs/%s", databaseName) + val collDefinition = new DocumentCollection + collDefinition.setId(collectionName) + + logger.info(s"Creating Collection $collectionName") + + client.createCollection(dbLnk, collDefinition, null).toCompletable.await() + } + } + + def isDatabaseExists(databaseName: String): Boolean = { + val databaseLink = s"/dbs/$databaseName" + val databaseReadObs = client.readDatabase(databaseLink, null) + var isDatabaseExists = false + + val db = databaseReadObs + .doOnNext((x: ResourceResponse[Database]) => { + def foundDataBase(x: ResourceResponse[Database]): Unit = { + logger.info(s"Database $databaseName already exists.") + isDatabaseExists = true + } + + foundDataBase(x) + }) + .onErrorResumeNext((e: Throwable) => { + def tryCreateDatabaseOnError(e: Throwable) = { + e match { + case de: DocumentClientException => + if (de.getStatusCode == 404) { + logger.info(s"Database $databaseName does not exist") + isDatabaseExists = false + } + } + Observable.empty() + } + + tryCreateDatabaseOnError(e) + }) + + db.toCompletable.await() + + isDatabaseExists + } + + def isCollectionExists(databaseName: String, collectionName: String): Boolean = { + + var isCollectionExists = false + val dbLnk = s"/dbs/$databaseName" + val params = new SqlParameterCollection(new SqlParameter("@id", collectionName)) + + val qry = new SqlQuerySpec("SELECT * FROM r where r.id = @id", params) + + client.queryCollections(dbLnk, qry, null).single.flatMap(page => { + def foundCollection(page: FeedResponse[DocumentCollection]) = { + isCollectionExists = !page.getResults.isEmpty + Observable.empty + } + + foundCollection(page) + }).toCompletable.await() + + isCollectionExists + } + + def close(): Unit = { + client.close() + } + + def readChangeFeed(databaseName: String, collectionName: String): Unit = { + //TODO: call Allan's ChangeFeedProcessor here + //TODO: ultimately replace Allan's ChangeFeedProcessor with the PG one + } + + def createDocuments[T](docs: scala.List[T], databaseName: String, collectionName: String, completionLatch: CountDownLatch): Unit = { + val colLnk = s"/dbs/$databaseName/colls/$collectionName" + val createDocumentsOBs: List[Observable[ResourceResponse[Document]]] = new util.ArrayList[Observable[ResourceResponse[Document]]] + + docs.foreach(f = t => { + val obs = client.createDocument(colLnk, t, null, false) + createDocumentsOBs.add(obs) + }) + + val forcedScalaObservable: _root_.rx.lang.scala.Observable[ResourceResponse[Document]] = Observable.merge(createDocumentsOBs) + + forcedScalaObservable + .map(r => r.getRequestCharge) + .reduce((sum, value) => sum + value) + .subscribe( + t => { + logger.debug(s"createDocuments total RU charge is $t") + HandleRetriableError(Success()) + }, + e => { + logger.debug(s"error creating documents e:${e.getMessage()} stack:${e.getStackTrace().toString()}") + HandleRetriableError(Failure(e)) + completionLatch.countDown() + }, + () => { + logger.info("createDocuments completed") + completionLatch.countDown() + }) + } + + + def upsertDocuments[T](docs: scala.List[T], databaseName: String, collectionName: String, completionLatch: CountDownLatch): Unit = { + val colLnk = s"/dbs/$databaseName/colls/$collectionName" + val upsertDocumentsOBs: List[Observable[ResourceResponse[Document]]] = new util.ArrayList[Observable[ResourceResponse[Document]]] + + docs.foreach(f = t => { + val obs = client.upsertDocument(colLnk, t, null, false) + upsertDocumentsOBs.add(obs) + }) + + val forcedScalaObservable: _root_.rx.lang.scala.Observable[ResourceResponse[Document]] = Observable.merge(upsertDocumentsOBs) + + forcedScalaObservable + .map(r => r.getRequestCharge) + .reduce((sum, value) => sum + value) + .subscribe( + t => { + logger.debug(s"upsertDocuments total RU charge is $t") + HandleRetriableError(Success()) + }, + e => { + logger.debug(s"error upserting documents e:${e.getMessage()} stack:${e.getStackTrace().toString()}") + HandleRetriableError(Failure(e)) + completionLatch.countDown() + }, + () => { + logger.info("upsertDocuments completed") + completionLatch.countDown() + }) + } + + + + def readCollection(databaseName: String, collectionName: String, completionLatch: CountDownLatch): _root_.rx.lang.scala.Observable[ResourceResponse[DocumentCollection]]= { // Create a Collection + val colLnk = s"/dbs/$databaseName/colls/$collectionName" + logger.info("reading collection " + colLnk) + + val readDocumentsOBs = client.readCollection(colLnk, null) + val forcedScalaObservable: _root_.rx.lang.scala.Observable[ResourceResponse[DocumentCollection]] = readDocumentsOBs + + forcedScalaObservable + .subscribe( + t => { + logger.debug(s"activityId" + t.getActivityId + s"id" + t.getResource.getId) + HandleRetriableError(Success()) + }, + e => { + logger.debug(s"error reading document collection e:${e.getMessage()} stack:${e.getStackTrace().toString()}") + HandleRetriableError(Failure(e)) + completionLatch.countDown() + }, + () => { + logger.info("readDocuments completed") + completionLatch.countDown() + }) + return forcedScalaObservable + + } + + + def queryCollection(databaseName: String, collectionName: String, completionLatch: CountDownLatch): _root_.rx.lang.scala.Observable[FeedResponse[DocumentCollection]]= { // Create a Collection + val colLnk = s"/dbs/$databaseName/colls/$collectionName" + val dbLink = s"/dbs/$databaseName" + logger.info("reading collection " + colLnk) + + //val query = "SELECT * from c" + val query = String.format("SELECT * from c where c.id = '%s'", collectionName) + val options = new FeedOptions + options.setMaxItemCount(2) + + val queryCollectionObservable = client.queryCollections(dbLink, query, options) + + val forcedScalaObservable: _root_.rx.lang.scala.Observable[FeedResponse[DocumentCollection]] = queryCollectionObservable + + forcedScalaObservable + .subscribe( + t => { + logger.debug(s"activityId" + t.getActivityId + s"id" + t.getResults.toString) + HandleRetriableError(Success()) + }, + e => { + logger.debug(s"error reading document collection e:${e.getMessage()} stack:${e.getStackTrace().toString()}") + HandleRetriableError(Failure(e)) + completionLatch.countDown() + }, + () => { + logger.debug("readDocuments completed") + completionLatch.countDown() + }) + return forcedScalaObservable + + } + +} \ No newline at end of file diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/Runner.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/Runner.scala new file mode 100644 index 0000000..a2f3871 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/Runner.scala @@ -0,0 +1,45 @@ +package com.microsoft.azure.cosmosdb.kafka.connect + +import java.util.concurrent.CountDownLatch + +import com.microsoft.azure.cosmosdb._ +import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfig, CosmosDBConfigConstants} + +import scala.collection.JavaConverters._ + +// TODO: Please follow getter and setter model +// Otherwise document create fails +class SampleDoc() { + private var name = "" + private var age = 0 +} + +object Runner extends App{ + + val connectionPolicy=new ConnectionPolicy(); + connectionPolicy.setConnectionMode(ConnectionMode.Direct) + connectionPolicy.setMaxPoolSize(600) + + val consistencyLevel = ConsistencyLevel.Session + + val cosmosDBClientSettings=CosmosDBClientSettings( + endpoint = "test", + masterkey = "test", + database = "test", + collection = "test", + connectionPolicy = connectionPolicy, + consistencyLevel = consistencyLevel) + + val client = CosmosDBProviderImpl.getClient(cosmosDBClientSettings) + + CosmosDBProviderImpl.createDatabaseIfNotExists("test8") + + CosmosDBProviderImpl.createCollectionIfNotExists("test8","collection") + + val sampleDoc = new SampleDoc() + val docs=List[SampleDoc](sampleDoc) + + CosmosDBProviderImpl.createDocuments[SampleDoc](docs,"test8","collection", new CountDownLatch(1)) + + println("End of the Runner.") +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/common/ErrorHandler/HandleRetriableError.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/common/ErrorHandler/HandleRetriableError.scala new file mode 100644 index 0000000..5d1b205 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/common/ErrorHandler/HandleRetriableError.scala @@ -0,0 +1,76 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler + +import java.util.Date + +import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants +import com.typesafe.scalalogging.StrictLogging +import org.apache.kafka.connect.errors.{ConnectException, RetriableException} + +import scala.util.{Failure, Success, Try} + + +case class ErrorHandlerObj(remainingRetries: Int, maxRetries: Int, errorMessage: String, lastErrorTimestamp: Date) + + +trait HandleRetriableError extends StrictLogging{ + + var errorHandlerObj: Option[ErrorHandlerObj] = None + private var maxRetriesDefault = CosmosDBConfigConstants.ERROR_MAX_RETRIES_DEFAULT + + + def initializeErrorHandler(maxRetries: Int): Unit = { + errorHandlerObj = Some(ErrorHandlerObj(maxRetries, maxRetries, "", new Date())) + } + + def HandleRetriableError[A](t : Try[A]) : Option[A] = { + if(!errorHandlerObj.isDefined) { + logger.info(s"HandleRetriableError not initialized, getting max retries value") + maxRetriesDefault = CosmosDBConfigConstants.ERROR_MAX_RETRIES_DEFAULT + initializeErrorHandler(maxRetriesDefault) + } + t + match { + case Success(s) => { + //in case we had previous errors. + if (errorHandlerObj.get.remainingRetries != errorHandlerObj.get.maxRetries) { + logger.info(s"Message retry is successful.") + } + //reset ErrorHandlerObj + resetErrorHandlerObj() + Some(s) + } + case Failure(f) => + + //decrement the retry count + logger.error(s"Encountered error ${f.getMessage}", f) + this.errorHandlerObj = Some(decrementErrorHandlerRetries(errorHandlerObj.get, f.getMessage)) + //handle policy error + handleError(f, errorHandlerObj.get.remainingRetries, errorHandlerObj.get.maxRetries) + None + } + } + + def resetErrorHandlerObj() = { + errorHandlerObj = Some(ErrorHandlerObj(errorHandlerObj.get.maxRetries, errorHandlerObj.get.maxRetries, "", new Date())) + } + + private def decrementErrorHandlerRetries(errorHandlerObj: ErrorHandlerObj, msg: String): ErrorHandlerObj = { + if (errorHandlerObj.maxRetries == -1) { + ErrorHandlerObj(errorHandlerObj.remainingRetries, errorHandlerObj.maxRetries, msg, new Date()) + } else { + ErrorHandlerObj(errorHandlerObj.remainingRetries - 1, errorHandlerObj.maxRetries, msg, new Date()) + } + } + + private def handleError(error: Throwable, retryCount: Int, maxRetries: Int) = { + + //throw connectException + if (maxRetries > 0 && retryCount == 0) { + throw new ConnectException(error) + } + else { + logger.warn(s"Error policy set to RETRY. Remaining attempts $retryCount") + throw new RetriableException(error) + } + } +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/CosmosDBConfig.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/CosmosDBConfig.scala new file mode 100644 index 0000000..67e73a9 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/CosmosDBConfig.scala @@ -0,0 +1,83 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.config + +import java.util + +import org.apache.kafka.common.config.ConfigDef.{Importance, Type, Width} +import org.apache.kafka.common.config.{AbstractConfig, ConfigDef} + +object ConnectorConfig { + lazy val baseConfigDef: ConfigDef = new ConfigDef() + .define(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG, Type.STRING, Importance.HIGH, + CosmosDBConfigConstants.CONNECTION_ENDPOINT_DOC, "Connection", 1, Width.LONG, + CosmosDBConfigConstants.CONNECTION_ENDPOINT_DISPLAY) + + .define(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG, Type.PASSWORD, Importance.HIGH, + CosmosDBConfigConstants.CONNECTION_MASTERKEY_DOC, "Connection", 2, Width.LONG, + CosmosDBConfigConstants.CONNECTION_MASTERKEY_DISPLAY) + + .define(CosmosDBConfigConstants.DATABASE_CONFIG, Type.STRING, Importance.HIGH, + CosmosDBConfigConstants.DATABASE_CONFIG_DOC, "Database", 1, Width.MEDIUM, + CosmosDBConfigConstants.DATABASE_CONFIG_DISPLAY) + + .define(CosmosDBConfigConstants.COLLECTION_CONFIG, Type.STRING, Importance.HIGH, + CosmosDBConfigConstants.COLLECTION_CONFIG_DOC, "Collection", 1, Width.MEDIUM, + CosmosDBConfigConstants.COLLECTION_CONFIG_DISPLAY) + + .define(CosmosDBConfigConstants.TOPIC_CONFIG, Type.STRING, Importance.HIGH, + CosmosDBConfigConstants.TOPIC_CONFIG_DOC, "Topic", 1, Width.MEDIUM, + CosmosDBConfigConstants.TOPIC_CONFIG_DISPLAY) + + .define(CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_CONFIG, Type.INT, CosmosDBConfigConstants.ERROR_MAX_RETRIES_DEFAULT, Importance.MEDIUM, + CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_DOC, "Common", 1, + Width.MEDIUM , CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_DISPLAY) + + + /** + * Holds the extra configurations for the source on top of + * the base. + **/ + lazy val sourceConfigDef: ConfigDef = ConnectorConfig.baseConfigDef + .define(CosmosDBConfigConstants.ASSIGNED_PARTITIONS, Type.STRING, "", Importance.HIGH, + CosmosDBConfigConstants.ASSIGNED_PARTITIONS_DOC, "Source", 1, Width.MEDIUM, + CosmosDBConfigConstants.ASSIGNED_PARTITIONS_DISPLAY) + .define(CosmosDBConfigConstants.READER_BUFFER_SIZE, Type.INT, CosmosDBConfigConstants.READER_BUFFER_SIZE_DEFAULT, Importance.MEDIUM, + CosmosDBConfigConstants.READER_BUFFER_SIZE_DOC, "Source", 2, Width.LONG, + CosmosDBConfigConstants.READER_BUFFER_SIZE_DISPLAY) + .define(CosmosDBConfigConstants.BATCH_SIZE, Type.INT, CosmosDBConfigConstants.BATCH_SIZE_DEFAULT, Importance.MEDIUM, + CosmosDBConfigConstants.BATCH_SIZE_DOC, "Source", 3, Width.LONG, + CosmosDBConfigConstants.BATCH_SIZE_DISPLAY) + .define(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR, Type.STRING, CosmosDBConfigConstants.SOURCE_POST_PROCESSOR_DEFAULT, Importance.MEDIUM, + CosmosDBConfigConstants.SOURCE_POST_PROCESSOR_DOC, "Source", 4, Width.LONG, + CosmosDBConfigConstants.SOURCE_POST_PROCESSOR_DISPLAY) + .define(CosmosDBConfigConstants.TIMEOUT, Type.INT, CosmosDBConfigConstants.TIMEOUT_DEFAULT, Importance.MEDIUM, + CosmosDBConfigConstants.TIMEOUT_DOC, "Source", 4, Width.LONG, + CosmosDBConfigConstants.TIMEOUT_DISPLAY) + + /** + * Holds the extra configurations for the sink on top of + * the base. + **/ + + lazy val sinkConfigDef: ConfigDef = ConnectorConfig.baseConfigDef + .define(CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG, Type.STRING, Importance.HIGH, + CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG_DOC, "Map", 1, Width.MEDIUM, + CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG_DISPLAY) + .define(CosmosDBConfigConstants.SINK_POST_PROCESSOR, Type.STRING, CosmosDBConfigConstants.SINK_POST_PROCESSOR_DEFAULT, Importance.MEDIUM, + CosmosDBConfigConstants.SINK_POST_PROCESSOR_DOC, "Sink", 1, Width.LONG, + CosmosDBConfigConstants.SINK_POST_PROCESSOR_DISPLAY) + // .define(CosmosDBConfigConstants.EXTRA_SINK_CONFIG_01, Type.STRING, Importance.HIGH, + // CosmosDBConfigConstants.EXTRA_SINK_CONFIG_01_DOC, "Sink", 1, Width.MEDIUM, + // CosmosDBConfigConstants.EXTRA_SINK_CONFIG_01_DISPLAY) + // .define(CosmosDBConfigConstants.EXTRA_SINK_CONFIG_02, Type.STRING, Importance.HIGH, + // CosmosDBConfigConstants.EXTRA_SINK_CONFIG_02_DOC, "Sink", 2, Width.MEDIUM, + // CosmosDBConfigConstants.EXTRA_SINK_CONFIG_02_DISPLAY) + + lazy val commonConfigDef: ConfigDef = ConnectorConfig.baseConfigDef + .define(CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_CONFIG, Type.INT, CosmosDBConfigConstants.ERROR_MAX_RETRIES_DEFAULT, Importance.MEDIUM, + CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_DOC, "Common", 1, + Width.MEDIUM , CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_DISPLAY) + +} + +case class CosmosDBConfig(config: ConfigDef, props: util.Map[String, String]) + extends AbstractConfig(config, props) \ No newline at end of file diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/CosmosDBConfigConstants.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/CosmosDBConfigConstants.scala new file mode 100644 index 0000000..9c437fc --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/CosmosDBConfigConstants.scala @@ -0,0 +1,70 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.config + +object CosmosDBConfigConstants { + val CONNECTOR_PREFIX = "connect.cosmosdb" + + val CONNECTION_ENDPOINT_CONFIG = s"$CONNECTOR_PREFIX.connection.endpoint" + val CONNECTION_ENDPOINT_DOC = "The Cosmos DB endpoint." + val CONNECTION_ENDPOINT_DISPLAY = "Endpoint" + + val CONNECTION_MASTERKEY_CONFIG = s"$CONNECTOR_PREFIX.master.key" + val CONNECTION_MASTERKEY_DOC = "The connection master key." + val CONNECTION_MASTERKEY_DISPLAY = "Master Key" + + val DATABASE_CONFIG = s"$CONNECTOR_PREFIX.database" + val DATABASE_CONFIG_DISPLAY = "Database Name." + val DATABASE_CONFIG_DOC = "The Cosmos DB target database." + + val COLLECTION_CONFIG = s"$CONNECTOR_PREFIX.collections" + val COLLECTION_CONFIG_DISPLAY = "Collection Names List." + val COLLECTION_CONFIG_DOC = "A comma delimited list of target collection names." + + val TOPIC_CONFIG = s"$CONNECTOR_PREFIX.topic.name" + val TOPIC_CONFIG_DISPLAY = "Topic Names List." + val TOPIC_CONFIG_DOC = "A comma delimited list of target Kafka Topics." + + val COLLECTION_TOPIC_MAP_CONFIG = s"$CONNECTOR_PREFIX.collections.topicmap" + val COLLECTION_TOPIC_MAP_CONFIG_DISPLAY = "Collection Topic Map." + val COLLECTION_TOPIC_MAP_CONFIG_DOC = "A comma delimited list of collections mapped to their partitions. Formatted coll1#topic1,coll2#topic2." + + //for the source task, the connector will set this for the each source task + val ASSIGNED_PARTITIONS = s"$CONNECTOR_PREFIX.assigned.partitions" + val ASSIGNED_PARTITIONS_DOC = "The CosmosDB partitions a task has been assigned." + val ASSIGNED_PARTITIONS_DISPLAY = "Assigned Partitions." + + val BATCH_SIZE = s"$CONNECTOR_PREFIX.task.batch.size" + val BATCH_SIZE_DISPLAY = "Batch Size." + val BATCH_SIZE_DOC = "The max number of of documents the source task will buffer before send them to Kafka." + val BATCH_SIZE_DEFAULT = 100 + + val READER_BUFFER_SIZE = s"$CONNECTOR_PREFIX.task.buffer.size" + val READER_BUFFER_SIZE_DISPLAY = "Reader Buffer Size." + val READER_BUFFER_SIZE_DOC = "The max size the collection of documents the source task will buffer before send them to Kafka." + val READER_BUFFER_SIZE_DEFAULT = 10000 + + val SOURCE_POST_PROCESSOR = s"$CONNECTOR_PREFIX.source.post-processor" + val SOURCE_POST_PROCESSOR_DISPLAY = "Source Post-Processor List" + val SOURCE_POST_PROCESSOR_DOC = "Comma-separated list of Source Post-Processor class names to use for post-processing" + val SOURCE_POST_PROCESSOR_DEFAULT = "" + + val SINK_POST_PROCESSOR = s"$CONNECTOR_PREFIX.sink.post-processor" + val SINK_POST_PROCESSOR_DISPLAY = "Sink Post-Processor List" + val SINK_POST_PROCESSOR_DOC = "Comma-separated list of Source Post-Processor class names to use for post-processing" + val SINK_POST_PROCESSOR_DEFAULT = "" + + val DEFAULT_POLL_INTERVAL = 1000 + + val ERRORS_RETRY_TIMEOUT_CONFIG = "errors.retry.timeout" + val ERROR_MAX_RETRIES_DEFAULT = 3 + val ERRORS_RETRY_TIMEOUT_DISPLAY = "Retry Timeout for Errors" + val ERRORS_RETRY_TIMEOUT_DOC = "The maximum duration in milliseconds that a failed operation " + + "will be reattempted. The default is 0, which means no retries will be attempted. Use -1 for infinite retries."; + + val TIMEOUT = s"$CONNECTOR_PREFIX.task.timeout" + val TIMEOUT_DISPLAY = "Timeout." + val TIMEOUT_DOC = "The max number of milliseconds the source task will use to read documents before send them to Kafka." + val TIMEOUT_DEFAULT = 5000 + +} + + diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/kafka/EmbeddedConnect.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/kafka/EmbeddedConnect.scala new file mode 100644 index 0000000..bc14c27 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/kafka/EmbeddedConnect.scala @@ -0,0 +1,113 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.kafka + + +import org.apache.kafka.common.utils.SystemTime +import org.apache.kafka.common.utils.Time +import org.apache.kafka.common.utils.Utils +import org.apache.kafka.connect.runtime.{ConnectorConfig, Herder, Worker} +import org.apache.kafka.connect.runtime.distributed.DistributedConfig +import org.apache.kafka.connect.runtime.distributed.DistributedHerder +import org.apache.kafka.connect.runtime.rest.entities.ConnectorInfo +import org.apache.kafka.connect.storage._ +import org.apache.kafka.connect.util.FutureCallback +import java.util.Properties +import java.util.UUID +import java.util.concurrent.CountDownLatch +import java.util.concurrent.ExecutionException +import java.util.concurrent.TimeUnit +import java.util.concurrent.TimeoutException +import java.util.concurrent.atomic.AtomicBoolean +import scala.collection.JavaConversions._ +import com.typesafe.scalalogging.StrictLogging +import org.apache.kafka.connect.runtime.isolation.Plugins + +/** + * Embedded Kafka Connect server as per KIP-26 + */ +case class EmbeddedConnect(workerConfig: Properties, connectorConfigs: List[Properties]) extends StrictLogging { + + private val REQUEST_TIMEOUT_MS = 120000 + private val startLatch: CountDownLatch = new CountDownLatch(1) + private val shutdown: AtomicBoolean = new AtomicBoolean(false) + private val stopLatch: CountDownLatch = new CountDownLatch(1) + + private var worker: Worker = _ + private var herder: DistributedHerder = _ + + // ConnectEmbedded - throws Exception + val time: Time = new SystemTime() + val config: DistributedConfig = new DistributedConfig(Utils.propsToStringMap(workerConfig)) + + val offsetBackingStore: KafkaOffsetBackingStore = new KafkaOffsetBackingStore() + offsetBackingStore.configure(config) + //not sure if this is going to work but because we don't have advertised url we can get at least a fairly random + val workerId: String = UUID.randomUUID().toString + println("---> " + config.toString) + worker = new Worker(workerId, time, new Plugins(Map.empty[String, String]), config, offsetBackingStore) + + val statusBackingStore: StatusBackingStore = new KafkaStatusBackingStore(time, worker.getInternalValueConverter) + statusBackingStore.configure(config) + + val configBackingStore: ConfigBackingStore = new KafkaConfigBackingStore(worker.getInternalValueConverter, config, worker.configTransformer()) + + //advertisedUrl = "" as we don't have the rest server - hopefully this will not break anything + herder = new DistributedHerder(config, time, worker, "KafkaCluster1",statusBackingStore, configBackingStore, "") + + def start(): Unit = { + try { + logger.info("Kafka ConnectEmbedded starting") + + sys.ShutdownHookThread { + logger.info("exiting") + try { + startLatch.await() + EmbeddedConnect.this.stop() + } catch { + case e: InterruptedException => logger.error("Interrupted in shutdown hook while waiting for Kafka Connect startup to finish"); + } + } + worker.start() + herder.start() + + logger.info("Kafka ConnectEmbedded started") + + connectorConfigs.foreach { connectorConfig: Properties => + val callback = new FutureCallback[Herder.Created[ConnectorInfo]]() + val name = connectorConfig.getProperty(ConnectorConfig.NAME_CONFIG) + herder.putConnectorConfig(name, Utils.propsToStringMap(connectorConfig), true, callback) + callback.get(REQUEST_TIMEOUT_MS, TimeUnit.MILLISECONDS) + } + + } catch { + case e: InterruptedException => logger.error("Starting interrupted ", e) + case e: ExecutionException => logger.error("Submitting connector config failed", e.getCause) + case e: TimeoutException => logger.error("Submitting connector config timed out", e) + case e: Exception => logger.error("Starting failed", e) + } finally { + startLatch.countDown() + } + } + + def stop(): Unit = { + try { + val wasShuttingDown = shutdown.getAndSet(true) + if (!wasShuttingDown) { + logger.info("Kafka ConnectEmbedded stopping") + herder.stop() + worker.stop() + logger.info("Kafka ConnectEmbedded stopped") + } + } finally { + stopLatch.countDown() + } + } + + def awaitStop(): Unit = { + try { + stopLatch.await() + } catch { + case e: InterruptedException => logger.error("Interrupted waiting for Kafka Connect to shutdown") + } + } + +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/kafka/KafkaCluster.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/kafka/KafkaCluster.scala new file mode 100644 index 0000000..06afd6a --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/kafka/KafkaCluster.scala @@ -0,0 +1,70 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.kafka + +import java.util.Properties + +import kafka.server.{KafkaConfig, KafkaServer} +import kafka.utils.{CoreUtils, TestUtils} +import kafka.zk.EmbeddedZookeeper +import org.apache.kafka.common.security.auth.SecurityProtocol +import org.apache.kafka.common.utils.SystemTime + +import scala.collection.immutable.IndexedSeq + + +object KafkaCluster extends AutoCloseable { + + private val Zookeeper = new EmbeddedZookeeper + val brokersNumber = 1 + val ZookeeperConnection = s"localhost:${Zookeeper.port}" + var Connect: EmbeddedConnect = _ + var kafkaConnectEnabled: Boolean = false + val BrokersConfig: IndexedSeq[KafkaConfig] = (1 to brokersNumber).map(i => getKafkaConfig(i)) + val Brokers: IndexedSeq[KafkaServer] = BrokersConfig.map(TestUtils.createServer(_, new SystemTime())) + val BrokersList: String = TestUtils.getBrokerListStrFromServers(Brokers, SecurityProtocol.PLAINTEXT) + System.setProperty("http.nonProxyHosts", "localhost|0.0.0.0|127.0.0.1") + + def startEmbeddedConnect(workerConfig: Properties, connectorConfigs: List[Properties]): Unit = { + kafkaConnectEnabled = true + Connect = EmbeddedConnect(workerConfig, connectorConfigs) + Connect.start() + } + + private def injectProperties(props: Properties, brokerId: Int): Unit = { + props.setProperty("log.dir", s"C:/Temp/kafka-logs-${brokerId}") + props.setProperty("auto.create.topics.enable", "true") + props.setProperty("num.partitions", "1") + } + + private def getKafkaConfig(brokerId: Int): KafkaConfig = { + val props: Properties = TestUtils.createBrokerConfig( + brokerId, + ZookeeperConnection, + enableControlledShutdown = false, + enableDeleteTopic = false, + TestUtils.RandomPort, + interBrokerSecurityProtocol = None, + trustStoreFile = None, + None, + enablePlaintext = true, + enableSaslPlaintext = false, + TestUtils.RandomPort, + enableSsl = false, + TestUtils.RandomPort, + enableSaslSsl = false, + TestUtils.RandomPort, + None) + injectProperties(props, brokerId) + KafkaConfig.fromProps(props) + } + + def close(): Unit = { + if (kafkaConnectEnabled) { + Connect.stop() + } + Brokers.foreach { server => + server.shutdown + CoreUtils.delete(server.config.logDirs) + } + Zookeeper.shutdown() + } +} \ No newline at end of file diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/JsonPostProcessor.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/JsonPostProcessor.scala new file mode 100644 index 0000000..5931436 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/JsonPostProcessor.scala @@ -0,0 +1,46 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.processor + +import com.google.gson._ +import org.apache.kafka.connect.sink.SinkRecord +import org.apache.kafka.connect.source.SourceRecord + +abstract class JsonPostProcessor extends PostProcessor { + + override final def runPostProcess(sourceRecord: SourceRecord): SourceRecord = { + val jsonParser = new JsonParser() + val json: JsonObject = jsonParser.parse(sourceRecord.value().toString).getAsJsonObject + + val processedJson = runJsonPostProcess(json) + + val result = new SourceRecord( + sourceRecord.sourcePartition, + sourceRecord.sourceOffset, + sourceRecord.topic, + null, + processedJson.toString + ) + + result + } + + override def runPostProcess(sinkRecord: SinkRecord): SinkRecord = { + val jsonParser = new JsonParser() + val json: JsonObject = jsonParser.parse(sinkRecord.value().toString).getAsJsonObject + + val processedJson = runJsonPostProcess(json) + + val result = new SinkRecord( + sinkRecord.topic, + sinkRecord.kafkaPartition, + sinkRecord.keySchema, + sinkRecord.key, + sinkRecord.valueSchema, + processedJson.toString, + sinkRecord.kafkaOffset + ) + + result + } + + def runJsonPostProcess(json: JsonObject): JsonObject +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/PostProcessor.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/PostProcessor.scala new file mode 100644 index 0000000..9edf0ae --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/PostProcessor.scala @@ -0,0 +1,32 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.processor + +import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfig +import com.typesafe.scalalogging.LazyLogging +import org.apache.kafka.connect.sink.SinkRecord +import org.apache.kafka.connect.source.SourceRecord + +abstract class PostProcessor { + + def configure(config: CosmosDBConfig): Unit + + def runPostProcess(sourceRecord: SourceRecord): SourceRecord + + def runPostProcess(sinkRecord: SinkRecord): SinkRecord + +} + +object PostProcessor extends AnyRef with LazyLogging { + + def createPostProcessorList(processorClassNames: String, config: CosmosDBConfig): List[PostProcessor] = + processorClassNames.split(',').map(c => { + logger.info(s"Instantiating ${c} as Post-Processor") + if (c.isEmpty) { + null + } else { + val postProcessor = Class.forName(c).newInstance().asInstanceOf[PostProcessor] + postProcessor.configure(config) + postProcessor + } + }).filter( e => e != null).toList + +} \ No newline at end of file diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SampleConsoleWriterPostProcessor.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SampleConsoleWriterPostProcessor.scala new file mode 100644 index 0000000..c768e40 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SampleConsoleWriterPostProcessor.scala @@ -0,0 +1,22 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.processor + +import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfig +import org.apache.kafka.connect.sink.SinkRecord +import org.apache.kafka.connect.source.SourceRecord + +class SampleConsoleWriterPostProcessor extends PostProcessor { + + override def configure(config: CosmosDBConfig): Unit = { + + } + + override def runPostProcess(sourceRecord: SourceRecord): SourceRecord = { + println(sourceRecord.value()) + sourceRecord + } + + override def runPostProcess(sinkRecord: SinkRecord): SinkRecord = { + println(sinkRecord.value()) + sinkRecord + } +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/sink/DocumentIdSinkPostProcessor.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/sink/DocumentIdSinkPostProcessor.scala new file mode 100644 index 0000000..12e0832 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/sink/DocumentIdSinkPostProcessor.scala @@ -0,0 +1,57 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.processor.sink + +import com.google.gson._ +import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, CosmosDBConfigConstants} +import com.microsoft.azure.cosmosdb.kafka.connect.processor.JsonPostProcessor +import org.apache.kafka.common.config.ConfigDef.{Importance, Type, Width} + +class DocumentIdSinkPostProcessor extends JsonPostProcessor { + + var documentIdField: String = "" + + override def configure(config: CosmosDBConfig): Unit = { + + val field = getPostProcessorConfiguration(config) + if (field.isDefined) documentIdField = field.get + + } + + override def runJsonPostProcess(json: JsonObject): JsonObject = { + + if (!json.has("id")) { + if (json.has(documentIdField)) + json.addProperty("id", json.get(documentIdField).getAsString) + else + json.add("id", JsonNull.INSTANCE) + } + + json + } + + private def getPostProcessorConfiguration(config: CosmosDBConfig): Option[String] = + { + val CONFIG = s"${CosmosDBConfigConstants.CONNECTOR_PREFIX}.sink.post-processor.documentId.field" + val DOC = "JSON field to be used as the Cosmos DB id" + val DISPLAY = "JSON Field Path" + val DEFAULT = "" + + val postProcessorConfigDef = ConnectorConfig.baseConfigDef + + if(ConnectorConfig.baseConfigDef.configKeys().containsKey(CONFIG)) { + ConnectorConfig.baseConfigDef.configKeys().remove(CONFIG) + } + + postProcessorConfigDef.define( + CONFIG, Type.STRING, DEFAULT, Importance.MEDIUM, + DOC, s"PostProcessor:DocumentId", + 1, Width.LONG, DISPLAY + ) + + val postProcessorConfig: CosmosDBConfig = CosmosDBConfig(postProcessorConfigDef, config.props) + + val field = Option(postProcessorConfig.getString(CONFIG)) + + field + } + +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/sink/SelectorSinkPostProcessor.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/sink/SelectorSinkPostProcessor.scala new file mode 100644 index 0000000..bd322c1 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/sink/SelectorSinkPostProcessor.scala @@ -0,0 +1,13 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.processor.sink + +import com.google.gson._ +import com.microsoft.azure.cosmosdb.kafka.connect.processor.JsonPostProcessor +import com.microsoft.azure.cosmosdb.kafka.connect.processor.`trait`._ + +class SelectorSinkPostProcessor extends JsonPostProcessor with Selector { + + override def pipelineStage = "sink" + + override def runJsonPostProcess(json: JsonObject): JsonObject = processor(json) + +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/source/SelectorSourcePostProcessor.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/source/SelectorSourcePostProcessor.scala new file mode 100644 index 0000000..de0e7d5 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/source/SelectorSourcePostProcessor.scala @@ -0,0 +1,13 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.processor.source + +import com.google.gson._ +import com.microsoft.azure.cosmosdb.kafka.connect.processor.JsonPostProcessor +import com.microsoft.azure.cosmosdb.kafka.connect.processor.`trait`._ + +class SelectorSourcePostProcessor extends JsonPostProcessor with Selector { + + override def pipelineStage = "source" + + override def runJsonPostProcess(json: JsonObject): JsonObject = processor(json) + +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/trait/Selector.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/trait/Selector.scala new file mode 100644 index 0000000..8b70c40 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/trait/Selector.scala @@ -0,0 +1,106 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.processor.`trait` + +import com.google.gson._ +import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, CosmosDBConfigConstants} +import com.microsoft.azure.cosmosdb.kafka.connect.processor.PostProcessor +import org.apache.kafka.common.config.ConfigDef.{Importance, Type, Width} + +object SelectorType extends Enumeration { + type SelectorType = Value + val Include, Exclude, All = Value + + def fromString(s: String): Value = values.find(_.toString == s).getOrElse(All) +} + +import SelectorType._ + +trait Selector extends PostProcessor { + + var selectorFields = Seq.empty[String] + var selectorType: SelectorType = SelectorType.Include + var processor: JsonObject => JsonObject = includeFields + + def pipelineStage: String + + override def configure(config: CosmosDBConfig): Unit = { + + val configValues = getPostProcessorConfiguration(config) + selectorFields = configValues._1 + selectorType = configValues._2 + + processor = selectorType match { + case Include => includeFields + case Exclude => excludeFields + case _ => includeAll + } + + } + + private def includeAll(json: JsonObject): JsonObject = json + + private def includeFields(json: JsonObject): JsonObject = { + + val toInclude = selectorFields + + val newJson: JsonObject = new JsonObject() + + toInclude.foreach(e => { + val j = json.get(e) + if (j != null) newJson.add(e, j) + }) + + newJson + + } + + private def excludeFields(json: JsonObject): JsonObject = { + + val toRemove = selectorFields + + toRemove.foreach(e => json.remove(e)) + + json + + } + + private def getPostProcessorConfiguration(config: CosmosDBConfig): (Seq[String], SelectorType) = + { + val FIELD_CONFIG = s"${CosmosDBConfigConstants.CONNECTOR_PREFIX}.$pipelineStage.post-processor.selector.fields" + val FIELD_DOC = "List of fields to be included or excluded in the generated JSON" + val FIELD_DISPLAY = "List of fields" + val FIELD_DEFAULT = "" + + val TYPE_CONFIG = s"${CosmosDBConfigConstants.CONNECTOR_PREFIX}.$pipelineStage.post-processor.selector.type" + val TYPE_DOC = "How the selector should behave: Include or Exclude specified fields in the processed JSON" + val TYPE_DISPLAY = "Selector behaviour: Include or Exclued" + val TYPE_DEFAULT = "" + + if(ConnectorConfig.baseConfigDef.configKeys().containsKey(FIELD_CONFIG)) { + ConnectorConfig.baseConfigDef.configKeys().remove(FIELD_CONFIG) + } + + if(ConnectorConfig.baseConfigDef.configKeys().containsKey(TYPE_CONFIG)) { + ConnectorConfig.baseConfigDef.configKeys().remove(TYPE_CONFIG) + } + + val postProcessorConfigDef = ConnectorConfig.baseConfigDef + .define( + FIELD_CONFIG, Type.STRING, FIELD_DEFAULT, Importance.MEDIUM, + FIELD_DOC, s"PostProcessor:Selector:${pipelineStage}", + 1, Width.LONG, FIELD_DISPLAY + ).define( + TYPE_CONFIG, Type.STRING, TYPE_DEFAULT, Importance.MEDIUM, + TYPE_DOC, s"PostProcessor:Selector:${pipelineStage}", + 2, Width.LONG, TYPE_DISPLAY + ) + + val postProcessorConfig: CosmosDBConfig = CosmosDBConfig(postProcessorConfigDef, config.props) + + selectorFields = postProcessorConfig.getString(FIELD_CONFIG).split(',').map(e => e.trim).toSeq + selectorType = SelectorType.fromString(postProcessorConfig.getString(TYPE_CONFIG)) + + (selectorFields, selectorType) + } + +} + diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/ConnectCosmosConverter.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/ConnectCosmosConverter.scala new file mode 100644 index 0000000..5a30184 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/ConnectCosmosConverter.scala @@ -0,0 +1,56 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.sink + +import java.util + +import org.apache.kafka.connect.data.Struct +import org.apache.kafka.connect.data.Schema._ +import sun.reflect.generics.reflectiveObjects.NotImplementedException + +import scala.collection.JavaConversions._ + +trait ConnectCosmosConverter { + /** + * Converts connect data to json tuples. + * + * @return converted data + */ + def toJsonMap(value: Object): List[(String, Object)] +} + +/** + * Converter of connect data with schema to json tuples. + */ +object SchemaConnectCosmosConverter extends ConnectCosmosConverter { + override def toJsonMap(value: Object): List[(String, Object)] = { + val struct = value.asInstanceOf[Struct] + var res : Map[String,Object] = Map() + + for (field <- struct.schema().fields()){ + val fieldName = field.name() + val fieldType = field.schema().`type`() + + fieldType match { + case Type.INT8 => res += (fieldName-> struct.getInt8(fieldName)) + case Type.INT16 => res += (fieldName-> struct.getInt16(fieldName)) + case Type.INT32 => res += (fieldName-> struct.getInt32(fieldName)) + case Type.INT64 => res += (fieldName-> struct.getInt64(fieldName)) + case Type.FLOAT32 => res += (fieldName-> struct.getFloat32(fieldName)) + case Type.FLOAT64 => res += (fieldName-> struct.getFloat64(fieldName)) + case Type.BOOLEAN => res += (fieldName-> struct.getBoolean(fieldName)) + case Type.STRING => res += (fieldName-> struct.getString(fieldName)) + case _ => throw new NotImplementedException() + } + } + + res.toList + } +} + +/** + * Converter of connect data without schema to json tuples. + */ +object NoSchemaConnectCosmosConverter extends ConnectCosmosConverter { + override def toJsonMap(value: Object): List[(String, Object)] = { + value.asInstanceOf[util.HashMap[String,Object]].toList + } +} \ No newline at end of file diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnector.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnector.scala new file mode 100644 index 0000000..cdff303 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnector.scala @@ -0,0 +1,56 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.sink + +import java.util + +import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError +import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig} +import org.apache.kafka.common.config.ConfigDef +import org.apache.kafka.connect.connector.Task +import org.apache.kafka.connect.sink.SinkConnector + +import scala.collection.JavaConverters._ +import scala.util.{Failure, Success} + +class CosmosDBSinkConnector extends SinkConnector with HandleRetriableError { + + + private var configProps: util.Map[String, String] = _ + + + override def version(): String = getClass.getPackage.getImplementationVersion + + override def start(props: util.Map[String, String]): Unit = { + logger.info("Starting CosmosDBSinkConnector") + + try { + initializeErrorHandler(props.get(org.apache.kafka.connect.runtime.ConnectorConfig.ERRORS_RETRY_TIMEOUT_CONFIG).toInt) // TODO: test + + val config = CosmosDBConfig(ConnectorConfig.sinkConfigDef, props) + HandleRetriableError(Success(config)) + } + catch{ + case f: Throwable => + logger.error(s"Couldn't start Cosmos DB Sink due to configuration error: ${f.getMessage}", f) + HandleRetriableError(Failure(f)) + } + + configProps = props + + } + + override def stop(): Unit = { + logger.info("Stopping CosmosDBSinkConnector") + } + + override def taskClass(): Class[_ <: Task] = classOf[CosmosDBSinkTask] + + override def taskConfigs(maxTasks: Int): util.List[util.Map[String, String]] = { + logger.info(s"Setting task configurations for $maxTasks workers with properties $this.configProps") + println(this.configProps) + + (1 to maxTasks).map(_ => this.configProps).toList.asJava + + } + override def config(): ConfigDef = ConnectorConfig.sinkConfigDef + +} \ No newline at end of file diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkSettings.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkSettings.scala new file mode 100644 index 0000000..4cbf3c0 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkSettings.scala @@ -0,0 +1,11 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.sink + + +import scala.collection.mutable.HashMap + + +case class CosmosDBSinkSettings(endpoint: String, + masterKey: String, + database: String, + collectionTopicMap: HashMap[String, String]) { +} \ No newline at end of file diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkTask.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkTask.scala new file mode 100644 index 0000000..c56516d --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkTask.scala @@ -0,0 +1,125 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.sink + +import java.util + +import scala.collection.mutable.HashMap +import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, CosmosDBConfigConstants} +import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProviderImpl, CosmosDBProvider} +import com.microsoft.azure.cosmosdb.kafka.connect.processor._ +import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient +import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel} +import com.typesafe.scalalogging.LazyLogging +import org.apache.kafka.clients.consumer.OffsetAndMetadata +import org.apache.kafka.common.TopicPartition +import org.apache.kafka.connect.errors.ConnectException +import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} + +import scala.collection.JavaConverters._ +import scala.util.{Failure, Success, Try} + +class CosmosDBSinkTask extends SinkTask with LazyLogging { + + private var writer: Option[CosmosDBWriter] = None + + private var client: AsyncDocumentClient = null + private var database: String = "" + private var taskConfig: Option[CosmosDBConfig] = None + private var topicNames: Array[String] = null + private var postProcessors = List.empty[PostProcessor] + val collectionTopicMap: HashMap[String, String] = HashMap.empty[String, String] // Public to allow for testing + val cosmosDBProvider: CosmosDBProvider = CosmosDBProviderImpl + + override def start(props: util.Map[String, String]): Unit = { + logger.info("Starting CosmosDBSinkTask") + + var config: util.Map[String, String] = null + if (context != null) { + config = if (context.configs().isEmpty) props else context.configs() + } + else { + config = props + } + + // Get Configuration for this Task + taskConfig = Try(CosmosDBConfig(ConnectorConfig.sinkConfigDef, config)) match { + case Failure(f) => throw new ConnectException("Couldn't start CosmosDBSink due to configuration error.", f) + case Success(s) => Some(s) + } + + // Add configured Post-Processors + val processorClassNames = taskConfig.get.getString(CosmosDBConfigConstants.SINK_POST_PROCESSOR) + postProcessors = PostProcessor.createPostProcessorList(processorClassNames, taskConfig.get) + + // Get CosmosDB Connection + val endpoint: String = taskConfig.get.getString(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG) + val masterKey: String = taskConfig.get.getPassword(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG).value() + database = taskConfig.get.getString(CosmosDBConfigConstants.DATABASE_CONFIG) + + // Populate collection topic map + // TODO: add support for many to many mapping, this only assumes each topic writes to one collection and multiple topics can write to the same collection + val collectionTopicMapString = taskConfig.get.getString(CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG) + if(collectionTopicMapString.contains("#")) { // There is at least one pair + collectionTopicMapString.split(",").map(_.trim).foreach( + m => { + val map = m.split("#").map(_.trim) + collectionTopicMap.put(map(1), map(0)) // topic, collection + }) + } + + // If there are topics with no mapping, add them to the map with topic name as collection name + topicNames = taskConfig.get.getString(CosmosDBConfigConstants.TOPIC_CONFIG).split(",").map(_.trim) + topicNames.foreach( + t => { + if (!collectionTopicMap.contains(t)) { + collectionTopicMap.put(t, t) // topic, collection + } + }) + + val clientSettings = CosmosDBClientSettings( + endpoint, + masterKey, + database, + null, // Don't pass a collection because our client is potentially for multiple collections + ConnectionPolicy.GetDefault(), + ConsistencyLevel.Session + ) + client = Try(cosmosDBProvider.getClient(clientSettings)) match { + case Success(conn) => + logger.info("Connection to CosmosDB established.") + conn + case Failure(f) => throw new ConnectException(s"Couldn't connect to CosmosDB.", f) + } + + // Set up Writer + val setting = new CosmosDBSinkSettings(endpoint, masterKey, database, collectionTopicMap) + writer = Option(new CosmosDBWriter(setting, cosmosDBProvider)) + } + + + override def put(records: util.Collection[SinkRecord]): Unit = { + val seq = records.asScala.toList + logger.info(s"Sending ${seq.length} records to writer to be written") + + // Execute PostProcessing + val postProcessed = seq.map(sr => applyPostProcessing(sr)) + + // Currently only built for messages with JSON payload without schema + writer.foreach(w => w.write(postProcessed)) + } + + override def stop(): Unit = { + logger.info("Stopping CosmosDBSinkTask") + } + + override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {} + + override def version(): String = getClass.getPackage.getImplementationVersion + + private def applyPostProcessing(sinkRecord: SinkRecord): SinkRecord = + postProcessors.foldLeft(sinkRecord)((r, p) => { + //println(p.getClass.toString) + p.runPostProcess(r) + }) + +} + diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBWriter.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBWriter.scala new file mode 100644 index 0000000..fba5bae --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBWriter.scala @@ -0,0 +1,87 @@ + +package com.microsoft.azure.cosmosdb.kafka.connect.sink + +import java.util.concurrent.CountDownLatch + +import com.fasterxml.jackson.databind.ObjectMapper +import com.microsoft.azure.cosmosdb._ +import com.microsoft.azure.cosmosdb.kafka.connect.CosmosDBProvider +import com.typesafe.scalalogging.StrictLogging +import org.apache.kafka.connect.sink.SinkRecord + + +class CosmosDBWriter(val settings: CosmosDBSinkSettings, val cosmosDBProvider: CosmosDBProvider) extends StrictLogging +{ + private val requestOptionsInsert = new RequestOptions + requestOptionsInsert.setConsistencyLevel(ConsistencyLevel.Session) + + def write(records: Seq[SinkRecord]): Unit = { + if (records.isEmpty) { + logger.info("No records received.") + } else { + logger.info(s"Received ${records.size} records.") + insert(records) + } + } + + private def insert(records: Seq[SinkRecord]) = { + try { + + var docs = List.empty[Document] + var collection: String = "" + + records.groupBy(_.topic()).foreach { case (_, groupedRecords) => + groupedRecords.foreach { record => + // Determine which collection to write to + if (settings.collectionTopicMap.contains(record.topic)) + collection = settings.collectionTopicMap(record.topic) + else + throw new Exception("No sink collection specified for this topic.") // TODO: tie this in with the exception handler + + val content: String = serializeValue(record.value()) + val document = new Document(content) + + logger.info("Upserting Document object id " + document.get("id") + " into collection " + collection) + docs = docs :+ document + } + // Send current batch of documents and reset the list for the next topic's documents + cosmosDBProvider.upsertDocuments[Document](docs, settings.database, collection, new CountDownLatch(1)) + docs = List.empty[Document] + } + + } + catch { + case t: Throwable => + logger.error(s"There was an error inserting the records ${t.getMessage}", t) + + } + } + + def close(): Unit = { + logger.info("Shutting down CosmosDBWriter.") + } + + def serializeValue(value: Any): String = { + var content: String = null + val om = new ObjectMapper() + + if (!value.isInstanceOf[String]){ + content = om.writeValueAsString(value) + }else { + content = value.toString + } + + if(om.readTree(content).has("payload")){ + val temp = om.readTree(content).get("payload") + if (temp.isTextual()){ // TextNodes cannot be directly converted to strings + content = temp.asText() + } else { + content = temp.toString + } + } + + return content + } + +} + diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedObserver.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedObserver.scala new file mode 100644 index 0000000..72aebb1 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedObserver.scala @@ -0,0 +1,5 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +trait ChangeFeedObserver { + def processChanges(documentList: List[String]) +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedProcessor.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedProcessor.scala new file mode 100644 index 0000000..e67f52e --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedProcessor.scala @@ -0,0 +1,64 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +import com.microsoft.azure.cosmosdb._ +import java.util.concurrent.CountDownLatch + +import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError + +import scala.collection.JavaConversions._ + +class ChangeFeedProcessor(feedCollectionInfo: DocumentCollectionInfo, leaseCollectionInfo: DocumentCollectionInfo, changeFeedProcessorOptions: ChangeFeedProcessorOptions, changeFeedObserver: ChangeFeedObserver)extends HandleRetriableError { + + val asyncClientFeed = DocumentClientBuilder.buildAsyncDocumentClient(feedCollectionInfo.uri, feedCollectionInfo.masterKey) + val asyncClientLease = DocumentClientBuilder.buildAsyncDocumentClient(leaseCollectionInfo.uri, leaseCollectionInfo.masterKey) + + val partitionLeaseStateManager = new PartitionLeaseStateManager(asyncClientLease, leaseCollectionInfo.databaseName, leaseCollectionInfo.collectionName) + val partitionFeedReaders = createPartitionMap() + private var run = true + + private def createPartitionMap(): Map[String, PartitionFeedReader] = { + val rangeIdList = getPartitionRangeIds() + val feedReaderMap = Map(rangeIdList map { partitionKeyRangeId => (partitionKeyRangeId, new PartitionFeedReader(asyncClientFeed, feedCollectionInfo.databaseName, feedCollectionInfo.collectionName, partitionKeyRangeId, partitionLeaseStateManager, changeFeedProcessorOptions)) }: _*) + return feedReaderMap + } + + private def getPartitionRangeIds(): List[String] = { + val collectionLink = DocumentClientBuilder.getCollectionLink(feedCollectionInfo.databaseName, feedCollectionInfo.collectionName) + val changeFeedObservable = asyncClientFeed.readPartitionKeyRanges(collectionLink, null) + + var results = List[PartitionKeyRange]() + changeFeedObservable.toBlocking().forEach(x => results = results ++ x.getResults()) + + return results.map(p => p.getId) + } + + def start(): Unit = { + println("Started!") + + spawn { + do { + val countDownLatch = new CountDownLatch(partitionFeedReaders.size) + // Parallel + partitionFeedReaders.par.foreach { p => p._2.readChangeFeed(changeFeedObserver.processChanges, countDownLatch) } + // Serial: + //for ((id, pfr) <- partitionFeedReaders) pfr.readChangeFeed(changeFeedObserver.processChanges, countDownLatch) + countDownLatch.await() + println("Waiting...") + Thread.sleep(changeFeedProcessorOptions.defaultFeedPollDelay) + } while (run) + } + } + + def stop(): Unit = { + run = false + println("Finished!") + } + + private def spawn(p: => Unit) { + val t = new Thread() { + override def run() = p + } + t.start() + } + +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedProcessorBuilder.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedProcessorBuilder.scala new file mode 100644 index 0000000..95bcb90 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedProcessorBuilder.scala @@ -0,0 +1,57 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +import org.apache.kafka.connect.errors.ConnectException + +import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError + +import scala.reflect._ + + +class ChangeFeedProcessorBuilder(feedCollectionInfo: DocumentCollectionInfo, leaseCollectionInfo: DocumentCollectionInfo, changeFeedProcessorOptions: ChangeFeedProcessorOptions, changeFeedObserver: ChangeFeedObserver)extends HandleRetriableError { + + def this() = this(null, null, new ChangeFeedProcessorOptions(), null) + + def withFeedCollection(newFeedCollectionInfo: DocumentCollectionInfo): ChangeFeedProcessorBuilder = { + guardAgainstNull(newFeedCollectionInfo) + return new ChangeFeedProcessorBuilder(newFeedCollectionInfo, this.leaseCollectionInfo, this.changeFeedProcessorOptions, this.changeFeedObserver) + } + + def withLeaseCollection(newLeaseCollectionInfo: DocumentCollectionInfo): ChangeFeedProcessorBuilder = { + guardAgainstNull(newLeaseCollectionInfo) + return new ChangeFeedProcessorBuilder(this.feedCollectionInfo, newLeaseCollectionInfo, this.changeFeedProcessorOptions, this.changeFeedObserver) + } + + def withProcessorOptions(newChangeFeedProcessorOptions: ChangeFeedProcessorOptions): ChangeFeedProcessorBuilder = { + guardAgainstNull(newChangeFeedProcessorOptions) + return new ChangeFeedProcessorBuilder(this.feedCollectionInfo, this.leaseCollectionInfo, newChangeFeedProcessorOptions, this.changeFeedObserver) + } + + def withObserver(newChangeFeedObserver: ChangeFeedObserver): ChangeFeedProcessorBuilder = { + guardAgainstNull(newChangeFeedObserver) + return new ChangeFeedProcessorBuilder(this.feedCollectionInfo, this.leaseCollectionInfo, this.changeFeedProcessorOptions, newChangeFeedObserver) + } + + def build(): ChangeFeedProcessor = { + guardAgainstNull(this.feedCollectionInfo) + guardAgainstNull(this.leaseCollectionInfo) + guardAgainstNull(this.changeFeedProcessorOptions) + guardAgainstNull(this.changeFeedObserver) + + return new ChangeFeedProcessor(this.feedCollectionInfo, this.leaseCollectionInfo, this.changeFeedProcessorOptions, this.changeFeedObserver) + } + + private def guardAgainstNull[T: ClassTag](objectToCheck: T): Unit = { + try{ + val className = classTag[T].runtimeClass.getSimpleName() + val messageIfNull = "%s can't be null!".format(className) + if (objectToCheck == null) throw new NullPointerException(messageIfNull) + + logger.debug("%s Object initialized".format(className)) + }catch{ + case f: Throwable => + throw new ConnectException("%s can't be null!".format(classTag[T].runtimeClass.getSimpleName()), f) + } + + } + +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedProcessorOptions.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedProcessorOptions.scala new file mode 100644 index 0000000..bc1e1a3 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/ChangeFeedProcessorOptions.scala @@ -0,0 +1,5 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +class ChangeFeedProcessorOptions(val queryPartitionsMaxBatchSize: Int, val defaultFeedPollDelay: Int) { + def this() = this(100, 2000) +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBReader.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBReader.scala new file mode 100644 index 0000000..a9bedd5 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBReader.scala @@ -0,0 +1,170 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +import java.util + +import com.google.gson.Gson +import com.microsoft.azure.cosmosdb._ +import com.microsoft.azure.cosmosdb.kafka.connect.CosmosDBProviderImpl +import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError +import com.microsoft.azure.cosmosdb.rx._ +import org.apache.kafka.connect.source.{SourceRecord, SourceTaskContext} + +import scala.collection.JavaConversions._ + +class CosmosDBReader(private val client: AsyncDocumentClient, + val setting: CosmosDBSourceSettings, + private val context: SourceTaskContext) extends HandleRetriableError { + + + private val SOURCE_PARTITION_FIELD = "partition" + private val SOURCE_OFFSET_FIELD = "changeFeedState" + + // Read the initial state from the offset storage when the CosmosDBReader is instantiated for the + // assigned partition + private val initialState : CosmosDBReaderChangeFeedState = getCosmosDBReaderChangeFeedState(setting.assignedPartition) + // Initialize the current state using the same values of the initial state + private var currentState = initialState + + // Initialize variables that control the position of the reading cursor + private var lastCursorPosition = -1 + private var currentCursorPosition = -1 + + def processChanges(): util.List[SourceRecord] = { + + + val records = new util.ArrayList[SourceRecord] + var bufferSize = 0 + + val collectionLink = CosmosDBProviderImpl.getCollectionLink(setting.database, setting.collection) + val changeFeedOptions = createChangeFeedOptions() + + try + { + + // Initial position of the reading cursor + if (initialState != null) + lastCursorPosition = initialState.lsn.toInt + else + lastCursorPosition = currentCursorPosition + + + val changeFeedObservable = client.queryDocumentChangeFeed(collectionLink, changeFeedOptions) + + changeFeedObservable + .doOnNext(feedResponse => { + + val processingStartTime = System.currentTimeMillis() + + // Return the list of documents in the FeedResponse + val documents = feedResponse.getResults() + + documents.foreach(doc => { + + // Update the reader state + currentState = new CosmosDBReaderChangeFeedState( + setting.assignedPartition, + feedResponse.getResponseHeaders.get("etag"), + doc.get("_lsn").toString + ) + + // Update the current reader cursor + currentCursorPosition = currentState.lsn.toInt + + // Check if the cursor has moved beyond the last processed position + if (currentCursorPosition > lastCursorPosition) { + + // Process new document + + logger.debug(s"Sending document ${doc} to the Kafka topic ${setting.topicName}") + logger.debug(s"Current State => Partition: ${currentState.partition}, " + + s"ContinuationToken: ${currentState.continuationToken}, " + + s"LSN: ${currentState.lsn}") + + records.add(new SourceRecord( + sourcePartition(setting.assignedPartition), + sourceOffset(new Gson().toJson(currentState)), + setting.topicName, + null, + doc.toJson() + )) + + // Increment the buffer + bufferSize = bufferSize + doc.toJson().getBytes().length + + // Calculate the elapsed time + val processingElapsedTime = System.currentTimeMillis() - processingStartTime + + // Returns records based on batch size, buffer size or timeout + if (records.size >= setting.batchSize || bufferSize >= setting.bufferSize || processingElapsedTime >= setting.timeout) { + return records + } + } + }) + }) + .doOnCompleted(() => {}) // signal to the consumer that there is no more data available + .doOnError((e) => { logger.error(e.getMessage()) }) // signal to the consumer that an error has occurred + .subscribe() + + changeFeedObservable.toBlocking.single + + } + catch + { + case f: Throwable => + logger.error(s"Couldn't add documents to the kafka topic: ${f.getMessage}", f) + } + + return records + } + + private def createChangeFeedOptions(): ChangeFeedOptions = { + val changeFeedOptions = new ChangeFeedOptions() + changeFeedOptions.setPartitionKeyRangeId(setting.assignedPartition) + changeFeedOptions.setMaxItemCount(setting.batchSize) + + if (currentState == null) { + changeFeedOptions.setStartFromBeginning(true) + } + else { + + // If the cursor position has not reached the end of the feed, read again + if (currentCursorPosition < currentState.continuationToken.replaceAll("^\"|\"$", "").toInt) { + if (initialState != null) + changeFeedOptions.setRequestContinuation(initialState.continuationToken) + else + changeFeedOptions.setStartFromBeginning(true) + return changeFeedOptions + } + + currentState.continuationToken match { + case null => changeFeedOptions.setStartFromBeginning(true) + case "" => changeFeedOptions.setStartFromBeginning(true) + case t => changeFeedOptions.setRequestContinuation(t) + } + } + return changeFeedOptions + } + + private def getCosmosDBReaderChangeFeedState(partition: String): CosmosDBReaderChangeFeedState = { + var state: CosmosDBReaderChangeFeedState = null + if (context != null) { + val offset = context.offsetStorageReader.offset(sourcePartition(partition)) + if (offset != null) { + state = new Gson().fromJson(offset.get(SOURCE_OFFSET_FIELD).toString(), classOf[CosmosDBReaderChangeFeedState]) + } + } + return state + } + + private def sourcePartition(partition: String): util.Map[String, String] = { + val map = new java.util.HashMap[String,String] + map.put(SOURCE_PARTITION_FIELD, partition) + return map + } + + private def sourceOffset(offset: String): util.Map[String, String] = { + val map = new java.util.HashMap[String,String] + map.put(SOURCE_OFFSET_FIELD, offset) + return map + } +} \ No newline at end of file diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBReaderChangeFeedState.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBReaderChangeFeedState.scala new file mode 100644 index 0000000..1abeaef --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBReaderChangeFeedState.scala @@ -0,0 +1,7 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +case class CosmosDBReaderChangeFeedState(partition: String, + continuationToken: String, + lsn: String) { + +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnector.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnector.scala new file mode 100644 index 0000000..951f62a --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnector.scala @@ -0,0 +1,83 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +import java.util + +import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError +import com.microsoft.azure.cosmosdb._ + +import scala.collection.JavaConversions._ +import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel} +import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProvider, CosmosDBProviderImpl} +import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, CosmosDBConfigConstants} +import org.apache.kafka.common.config.ConfigDef +import org.apache.kafka.connect.connector.Task +import org.apache.kafka.connect.source.SourceConnector +import org.apache.kafka.connect.util.ConnectorUtils +import scala.util.{Failure, Success, Try} +import scala.collection.JavaConverters._ + +class CosmosDBSourceConnector extends SourceConnector with HandleRetriableError { + + + private var configProps: util.Map[String, String] = _ + private var numWorkers: Int = 0 + val cosmosDBProvider: CosmosDBProvider = CosmosDBProviderImpl + override def version(): String = getClass.getPackage.getImplementationVersion + + override def start(props: util.Map[String, String]): Unit = { + logger.info("Starting CosmosDBSourceConnector") + configProps = props + } + + override def taskClass(): Class[_ <: Task] = classOf[CosmosDBSourceTask] + + override def taskConfigs(maxTasks: Int): util.List[util.Map[String, String]] = { + try { + val config: CosmosDBConfig = CosmosDBConfig(ConnectorConfig.sourceConfigDef, configProps) + val database: String = config.getString(CosmosDBConfigConstants.DATABASE_CONFIG) + val collection: String = config.getString(CosmosDBConfigConstants.COLLECTION_CONFIG) + val settings: CosmosDBClientSettings = CosmosDBClientSettings( + config.getString(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG), + config.getPassword(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG).value(), + database, + collection, + ConnectionPolicy.GetDefault(), + ConsistencyLevel.Session + ) + logger.debug("Settings for Cosmos Db connection: ", settings) + + val client = cosmosDBProvider.getClient(settings) + + val collectionLink = CosmosDBProviderImpl.getCollectionLink(database, collection) + val changeFeedObservable = client.readPartitionKeyRanges(collectionLink, null) + var results = List[PartitionKeyRange]() + changeFeedObservable.toBlocking().forEach(x => results = results ++ x.getResults()) + val numberOfPartitions = results.map(p => p.getId) + numWorkers = Math.min(numberOfPartitions.size(), maxTasks) + logger.info(s"Setting task configurations for $numWorkers workers.") + val groups = ConnectorUtils.groupPartitions(numberOfPartitions, maxTasks) + groups + .withFilter(g => g.nonEmpty) + .map { g => + val taskConfigs = new java.util.HashMap[String, String](this.configProps) + taskConfigs.put(CosmosDBConfigConstants.ASSIGNED_PARTITIONS, g.mkString(",")) + taskConfigs + } + } + catch { + case f: Throwable => + logger.error(s"Couldn't initialize CosmosDb with settings: ${f.getMessage}", f) + HandleRetriableError(Failure(f)) + return null + } + } + + override def config(): ConfigDef = ConnectorConfig.sourceConfigDef + + override def stop(): Unit = { + logger.info("Stopping CosmosDBSourceConnector") + } + + def getNumberOfWorkers(): Int = numWorkers + +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceSettings.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceSettings.scala new file mode 100644 index 0000000..0d02737 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceSettings.scala @@ -0,0 +1,12 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +case class CosmosDBSourceSettings( + database: String, + collection: String, + assignedPartition: String, + batchSize: Int, + bufferSize: Int, + timeout: Int, + topicName: String, + ) { +} \ No newline at end of file diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceTask.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceTask.scala new file mode 100644 index 0000000..f63e69b --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceTask.scala @@ -0,0 +1,150 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +import java.util + +import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError +import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, CosmosDBConfigConstants} +import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProvider, CosmosDBProviderImpl} +import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient +import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel} +import com.typesafe.scalalogging.StrictLogging +import org.apache.kafka.connect.errors.ConnectException +import org.apache.kafka.connect.source.{SourceRecord, SourceTask} +import com.microsoft.azure.cosmosdb.kafka.connect.processor._ +import com.microsoft.azure.cosmosdb.kafka.connect.source.CosmosDBReader +import scala.collection.JavaConversions._ +import scala.collection.mutable +import scala.util.{Failure, Success, Try} + +class CosmosDBSourceTask extends SourceTask with StrictLogging with HandleRetriableError{ + + val readers = mutable.Map.empty[String, CosmosDBReader] + private var client: AsyncDocumentClient = null + private var database: String = "" + private var collection: String = "" + private var taskConfig: Option[CosmosDBConfig] = None + private var bufferSize: Option[Int] = None + private var batchSize: Option[Int] = None + private var timeout: Option[Int] = None + private var topicName: String = "" + private var postProcessors = List.empty[PostProcessor] + val cosmosDBProvider: CosmosDBProvider = CosmosDBProviderImpl + + override def start(props: util.Map[String, String]): Unit = { + logger.info("Starting CosmosDBSourceTask") + + var config: util.Map[String, String] = null + + if (context != null) { + config = if (context.configs().isEmpty) props else context.configs() + } + else { + config = props + } + + // Get Configuration for this Task + try{ + taskConfig = Some(CosmosDBConfig(ConnectorConfig.sourceConfigDef, config)) + //HandleError(Success(config)) + } + catch{ + case f: Throwable => + logger.error(s"Couldn't start Cosmos DB Source due to configuration error: ${f.getMessage}", f) + HandleRetriableError(Failure(f)) + } + + /*taskConfig = Try(CosmosDBConfig(ConnectorConfig.sourceConfigDef, config)) match { + case Failure(f) => throw new ConnectException("Couldn't start CosmosDBSource due to configuration error.", f) + case Success(s) => Some(s) + }*/ + + // Add configured Post-Processors if exist in configuration file + if(taskConfig.get.getString(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR)!=null){ + val processorClassNames = taskConfig.get.getString(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR) + postProcessors = PostProcessor.createPostProcessorList(processorClassNames, taskConfig.get) + } + + + // Get CosmosDB Connection + val endpoint: String = taskConfig.get.getString(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG) + val masterKey: String = taskConfig.get.getPassword(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG).value() + database = taskConfig.get.getString(CosmosDBConfigConstants.DATABASE_CONFIG) + collection = taskConfig.get.getString(CosmosDBConfigConstants.COLLECTION_CONFIG) + + // Source Collection + val clientSettings = CosmosDBClientSettings( + endpoint, + masterKey, + database, + collection, + ConnectionPolicy.GetDefault(), + ConsistencyLevel.Session + ) + + try{ + client = cosmosDBProvider.getClient(clientSettings) + logger.info("Connection to CosmosDB established.") + }catch{ + case f: Throwable => + logger.error(s"Couldn't connect to CosmosDB.: ${f.getMessage}", f) + HandleRetriableError(Failure(f)) + } + + + /*client = Try(CosmosDBProvider.getClient(clientSettings)) match { + case Success(conn) => + logger.info("Connection to CosmosDB established.") + conn + case Failure(f) => throw new ConnectException(s"Couldn't connect to CosmosDB.", f) + }*/ + + // Get bufferSize and batchSize + bufferSize = Some(taskConfig.get.getInt(CosmosDBConfigConstants.READER_BUFFER_SIZE)) + batchSize = Some(taskConfig.get.getInt(CosmosDBConfigConstants.BATCH_SIZE)) + timeout = Some(taskConfig.get.getInt(CosmosDBConfigConstants.TIMEOUT)) + + // Get Topic + topicName = taskConfig.get.getString(CosmosDBConfigConstants.TOPIC_CONFIG) + + // Get the List of Assigned Partitions + val assigned = taskConfig.get.getString(CosmosDBConfigConstants.ASSIGNED_PARTITIONS).split(",").toList + + // Set up Readers + assigned.map(partition => { + val setting = new CosmosDBSourceSettings(database, collection, partition, batchSize.get, bufferSize.get, timeout.get, topicName) + readers += partition -> new CosmosDBReader(client, setting, context) + }) + + } + + override def stop(): Unit = { + logger.info("Stopping CosmosDBSourceTask") + } + + override def poll(): util.List[SourceRecord] = { + try{ + if(postProcessors.isEmpty){ + return readers.flatten(reader => reader._2.processChanges()).toList + }else{ + return readers.flatten(reader => reader._2.processChanges()).toList.map(sr => applyPostProcessing(sr)) + } + }catch{ + case f: Exception => + logger.debug(s"Couldn't create a list of source records ${f.getMessage}", f) + HandleRetriableError(Failure(f)) + return null + } + return null + } + + override def version(): String = getClass.getPackage.getImplementationVersion + + def getReaders(): mutable.Map[String, CosmosDBReader] = readers + + private def applyPostProcessing(sourceRecord: SourceRecord): SourceRecord = + postProcessors.foldLeft(sourceRecord)((r, p) => { + //println(p.getClass.toString) + p.runPostProcess(r) + }) + +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/DocumentClientBuilder.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/DocumentClientBuilder.scala new file mode 100644 index 0000000..0606b89 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/DocumentClientBuilder.scala @@ -0,0 +1,27 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +import com.microsoft.azure.cosmosdb.rx._; +import com.microsoft.azure.cosmosdb._; + +object DocumentClientBuilder { + + def createConnectionPolicy(): ConnectionPolicy = { + val policy = new ConnectionPolicy() + policy.setConnectionMode(ConnectionMode.Direct) + return policy + } + + def buildAsyncDocumentClient(cosmosServiceEndpoint: String, cosmosKey: String): AsyncDocumentClient = { + new AsyncDocumentClient.Builder() + .withServiceEndpoint(cosmosServiceEndpoint) + .withMasterKeyOrResourceToken(cosmosKey) + .withConnectionPolicy(createConnectionPolicy()) + .withConsistencyLevel(ConsistencyLevel.Eventual) + .build() + } + + def getCollectionLink(databaseName: String, collectionName: String) = "/dbs/%s/colls/%s".format(databaseName, collectionName) + + def getDatabaseLink(databaseName: String) = "/dbs/%s".format(databaseName) + +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/DocumentCollectionInfo.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/DocumentCollectionInfo.scala new file mode 100644 index 0000000..a74e5db --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/DocumentCollectionInfo.scala @@ -0,0 +1,5 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +class DocumentCollectionInfo(val uri: String, val masterKey: String, val databaseName: String, val collectionName: String) { + +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/Main.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/Main.scala new file mode 100644 index 0000000..c6c91d0 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/Main.scala @@ -0,0 +1,65 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +import java.util.Properties + +import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants +import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster +import org.apache.kafka.connect.runtime.distributed.DistributedConfig +import org.apache.kafka.connect.runtime.{ConnectorConfig, WorkerConfig} + +object Main { + + var COSMOSDB_TOPIC: String = "test_topic_issue49" + + def main(args: Array[String]): Unit = { + val workerProperties: Properties = getWorkerProperties(KafkaCluster.BrokersList.toString) + val connectorProperties: Properties = getConnectorProperties() + KafkaCluster.startEmbeddedConnect(workerProperties, List(connectorProperties)) + if (KafkaCluster.kafkaConnectEnabled) { + println("Kafka Connector Enabled") + } + } + + def getWorkerProperties(bootstrapServers: String): Properties = { + val workerProperties: Properties = new Properties() + workerProperties.put(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers) + workerProperties.put(DistributedConfig.GROUP_ID_CONFIG, "cosmosdb") + workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-config") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-offset") + workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-status") + workerProperties.put(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter") + workerProperties.put(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter") + workerProperties.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "30000") + workerProperties.put(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG, "1") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG, "1") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG, "1") + workerProperties.put(DistributedConfig.STATUS_STORAGE_PARTITIONS_CONFIG, "1") + workerProperties.put(DistributedConfig.STATUS_STORAGE_REPLICATION_FACTOR_CONFIG, "1") + return workerProperties + } + + def getConnectorProperties(): Properties = { + val connectorProperties: Properties = new Properties() + connectorProperties.put(ConnectorConfig.NAME_CONFIG, "CosmosDBSourceConnector") + connectorProperties.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG , "com.microsoft.azure.cosmosdb.kafka.connect.source.CosmosDBSourceConnector") + connectorProperties.put(ConnectorConfig.TASKS_MAX_CONFIG , "1") + connectorProperties.put("connect.cosmosdb.connection.endpoint" , "https://localhost:8888") + connectorProperties.put("connect.cosmosdb.master.key", "C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw==") + connectorProperties.put("connect.cosmosdb.database" , "database") + connectorProperties.put("connect.cosmosdb.collection" , "collection1") + +// connectorProperties.put("connect.cosmosdb.connection.endpoint" , "https://dmcosmos.documents.azure.com:443") +// connectorProperties.put("connect.cosmosdb.master.key", "YAopQ0edHWK9v8yV7IpCU1WzvFQkPvpHWDGmjhpXC0swlmibZgHkgqVDiTRG3abFM2PfYoWKPOVFjL7OTJOPsA==") +// connectorProperties.put("connect.cosmosdb.database" , "kafka-connector") +// connectorProperties.put("connect.cosmosdb.collection" , "source") + + connectorProperties.put("connect.cosmosdb.topic.name" , COSMOSDB_TOPIC) + connectorProperties.put(CosmosDBConfigConstants.BATCH_SIZE, "100") + connectorProperties.put(CosmosDBConfigConstants.TIMEOUT, "1") + connectorProperties.put(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR, "com.microsoft.azure.cosmosdb.kafka.connect.processor.source.SelectorSourcePostProcessor") + + + + return connectorProperties + } +} \ No newline at end of file diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/PartitionFeedReader.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/PartitionFeedReader.scala new file mode 100644 index 0000000..9ab50a3 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/PartitionFeedReader.scala @@ -0,0 +1,56 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +import java.util.concurrent.CountDownLatch + +import com.microsoft.azure.cosmosdb.rx._ +import com.microsoft.azure.cosmosdb._ + +import scala.collection.JavaConversions._ + +class PartitionFeedReader(asyncClient: AsyncDocumentClient, databaseName: String, collectionName: String, partitionKeyRangeId: String, partitionFeedStateManager: PartitionLeaseStateManager, changeFeedProcessorOptions: ChangeFeedProcessorOptions) { + + var partitionFeedState = partitionFeedStateManager.load(partitionKeyRangeId) + + private def createChangeFeedOptionsFromState(): ChangeFeedOptions = { + val changeFeedOptions = new ChangeFeedOptions() + changeFeedOptions.setPartitionKeyRangeId(partitionKeyRangeId) + changeFeedOptions.setMaxItemCount(changeFeedProcessorOptions.queryPartitionsMaxBatchSize) + + partitionFeedState.continuationToken match { + case null => changeFeedOptions.setStartFromBeginning(true) + case "" => changeFeedOptions.setStartFromBeginning(true) + case t => changeFeedOptions.setRequestContinuation(t) + } + + return changeFeedOptions + } + + def readChangeFeed(documentProcessor: List[String] => Unit, completionLatch: CountDownLatch) { + val collectionLink = "/dbs/%s/colls/%s".format(databaseName, collectionName) + val changeFeedOptions = createChangeFeedOptionsFromState() + val changeFeedObservable = asyncClient.queryDocumentChangeFeed(collectionLink, changeFeedOptions) + + changeFeedObservable + // Process documents + .doOnNext(feedResponse => { + val documents = feedResponse.getResults().map(d => d.toJson()) // ready to send to Kafka + documentProcessor(documents.toList) // callback passing the list of documents + }) + // Logging + .doOnNext(feedResponse => { + println("Count: " + feedResponse.getResults().length) + println("ResponseContinuation: " + feedResponse.getResponseContinuation()) + }) + // Save state ... save offset + .flatMap(feedResponse => { + println("Saving State!") + val continuationToken = feedResponse.getResponseContinuation().replaceAll("^\"|\"$", "") + partitionFeedState = new PartitionFeedState(partitionKeyRangeId, continuationToken) + partitionFeedStateManager.save(partitionFeedState) + }) + .subscribe( + v => {}, // Every response - can have multiple documents + e => completionLatch.countDown(), // when error + () => completionLatch.countDown()) // final execution + } +} \ No newline at end of file diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/PartitionFeedState.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/PartitionFeedState.scala new file mode 100644 index 0000000..c0eaa37 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/PartitionFeedState.scala @@ -0,0 +1,5 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +class PartitionFeedState(val id: String, val continuationToken: String) { + def this(id: String) = this(id, null) +} \ No newline at end of file diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/PartitionLeaseStateManager.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/PartitionLeaseStateManager.scala new file mode 100644 index 0000000..96d059e --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/PartitionLeaseStateManager.scala @@ -0,0 +1,47 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +import rx.{Observable, _} +import com.microsoft.azure.cosmosdb.rx._ +import com.microsoft.azure.cosmosdb._ +import com.google.gson._ + + +class PartitionLeaseStateManager(asyncClient: AsyncDocumentClient, databaseName: String, collectionName: String) { + + private val gson = new Gson() + + def save(partitionFeedState: PartitionFeedState): Observable[ResourceResponse[Document]] = { + val json = gson.toJson(partitionFeedState) + val document = new Document(json) + val collectionLink = DocumentClientBuilder.getCollectionLink(databaseName, collectionName) + + val createDocumentObservable = asyncClient.upsertDocument(collectionLink, document, null, false) + + return createDocumentObservable + } + + def load(partitionKeyRangeId: String): PartitionFeedState = { + val collectionLink = DocumentClientBuilder.getCollectionLink(databaseName, collectionName) + val querySpec = new SqlQuerySpec("SELECT * FROM " + collectionName + " where " + collectionName + ".id = @id", + new SqlParameterCollection( + new SqlParameter("@id", partitionKeyRangeId) + )) + + val queryOptions = new FeedOptions() + queryOptions.setEnableCrossPartitionQuery(true) + + val queryFeedObservable = asyncClient.queryDocuments(collectionLink, querySpec, queryOptions) + + try { + val results = queryFeedObservable.toBlocking().single().getResults() + val partitionFeedState = results.iterator().next().toJson() + return gson.fromJson(partitionFeedState, classOf[PartitionFeedState]) + } + catch { + case error: Throwable => { + System.err.println("Error when getting last state from partitionKeyRangeId. Details: " + error) + return new PartitionFeedState(partitionKeyRangeId) + } + } + } +} diff --git a/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/SampleConsumer.scala b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/SampleConsumer.scala new file mode 100644 index 0000000..7ea0ac2 --- /dev/null +++ b/src/main/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/SampleConsumer.scala @@ -0,0 +1,47 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +import java.util +import java.util.{Collections, Properties} + +import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster +import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer} +import org.apache.kafka.common.serialization.StringDeserializer + +object SampleConsumer { + + var COSMOSDB_TOPIC: String = "cosmosdb-source-topic" + + def main(args: Array[String]): Unit = { + + try { + + val properties = new Properties() + properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KafkaCluster.BrokersList) + properties.put(ConsumerConfig.CLIENT_ID_CONFIG, "sample_debugger_consumer-01") + properties.put(ConsumerConfig.GROUP_ID_CONFIG, "debugger_consumergroup") + properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true") + properties.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000") + properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") + properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer]) + properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer]) + + val consumer = new KafkaConsumer[String, String](properties) + + consumer.subscribe(Collections.singletonList(COSMOSDB_TOPIC)) + val documents = new util.ArrayList[String] + while (true) { + val records = consumer.poll(java.time.Duration.ofMillis(100)) + records.forEach(r => { + val document = r.value() + documents.add(document) + }) + } + } + catch { + case e: Exception => { + println(s" Exception ${e.getMessage() }") + } + } + } + +} \ No newline at end of file diff --git a/src/test/java/com/microsoft/azure/AppTest.java b/src/test/java/com/microsoft/azure/AppTest.java deleted file mode 100644 index 67a603a..0000000 --- a/src/test/java/com/microsoft/azure/AppTest.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.microsoft.azure; - -import static org.junit.Assert.assertTrue; - -import org.junit.Test; - -/** - * Unit test for simple App. - */ -public class AppTest -{ - /** - * Rigorous Test :-) - */ - @Test - public void shouldAnswerWithTrue() - { - assertTrue( true ); - } -} diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/MockCosmosDBProvider.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/MockCosmosDBProvider.scala new file mode 100644 index 0000000..4572cb8 --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/MockCosmosDBProvider.scala @@ -0,0 +1,32 @@ +package com.microsoft.azure.cosmosdb.kafka.connect +import java.util.ArrayList +import java.util.HashMap +import java.util.concurrent.CountDownLatch + +import com.microsoft.azure.cosmosdb.Document +import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient +import org.mockito.MockitoSugar.mock + + +object MockCosmosDBProvider extends CosmosDBProvider { + + var CosmosDBCollections: HashMap[String, ArrayList[Document]] = new HashMap[String, ArrayList[Document]] + + def setupCollections[T](collectionNames: List[String]): Unit ={ + collectionNames.foreach(c => CosmosDBCollections.put(c, new ArrayList[Document]())) + } + + def getDocumentsByCollection(collectionName: String): ArrayList[Document] = { + return CosmosDBCollections.get(collectionName) + } + + override def upsertDocuments[T](docs: List[T], databaseName: String, collectionName: String, completionLatch: CountDownLatch): Unit = { + if(CosmosDBCollections.containsKey(collectionName)){ + docs.foreach(d => CosmosDBCollections.get(collectionName).add(d.asInstanceOf[Document])) + } + } + + override def getClient(settings: CosmosDBClientSettings): AsyncDocumentClient = { + return mock[AsyncDocumentClient] + } +} diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/MockCosmosDBReader.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/MockCosmosDBReader.scala new file mode 100644 index 0000000..4fd21ea --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/MockCosmosDBReader.scala @@ -0,0 +1,52 @@ +package com.microsoft.azure.cosmosdb.kafka.connect + +import java.util +import java.util.UUID.randomUUID + +import com.microsoft.azure.cosmosdb.kafka.connect.model.CosmosDBDocumentTest +import com.microsoft.azure.cosmosdb.kafka.connect.source.{CosmosDBReader, CosmosDBSourceSettings} +import java.util.{ArrayList, Properties, UUID} +import java.util.UUID._ + +import com.google.gson.Gson +import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient +import org.apache.kafka.connect.source.{SourceRecord, SourceTaskContext} +import org.apache.kafka.connect.storage.OffsetStorageReader +import org.mockito.MockitoSugar.mock + + +class MockCosmosDBReader (private val client: AsyncDocumentClient, + override val setting: CosmosDBSourceSettings, + private val context: SourceTaskContext) extends CosmosDBReader(client, setting,context) { + + private val SOURCE_PARTITION_FIELD = "partition" + private val SOURCE_OFFSET_FIELD = "changeFeedState" + + override def processChanges(): util.List[SourceRecord] = { + //Return a mock doc list + + /* val records = new util.ArrayList[SourceRecord] + val jsonFile = """{"id": "9","_rid": "tqZSAOCV8ekBAAAAAAAAAA==","_self": "dbs/tqZSAA==/colls/tqZSAOCV8ek=/docs/tqZSAOCV8ekBAAAAAAAAAA==/","_etag": "\"00000000-0000-0000-2bcf-cab592a001d5\"","_attachments": "attachments/","_ts": 1561519953}""" + records.add(new SourceRecord( + sourcePartition(setting.assignedPartition), + sourceOffset(new Gson().toJson(1)), + setting.topicName, + null, + jsonFile + ))*/ + return mock[util.ArrayList[SourceRecord]] + + } + private def sourcePartition(partition: String): util.Map[String, String] = { + val map = new java.util.HashMap[String,String] + map.put(SOURCE_PARTITION_FIELD, partition) + return map + } + + private def sourceOffset(offset: String): util.Map[String, String] = { + val map = new java.util.HashMap[String,String] + map.put(SOURCE_OFFSET_FIELD, offset) + return map + } + +} diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/common/ErrorHandler/ErrorHandlerTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/common/ErrorHandler/ErrorHandlerTest.scala new file mode 100644 index 0000000..b39b389 --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/common/ErrorHandler/ErrorHandlerTest.scala @@ -0,0 +1,37 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler + +import org.apache.kafka.connect.errors.{ConnectException, RetriableException} + +import scala.util.{Failure, Try} +import org.scalatest.WordSpec + + +class HandleRetriableErrorTest extends WordSpec with HandleRetriableError { + + initializeErrorHandler(10) + + "should decrement number of retries" in { + + intercept[RetriableException] { + try { + 1 / 0 + } catch { + case t: Throwable => + HandleRetriableError(Failure(t)) + } + } + } + + initializeErrorHandler(0) + "should throw ConnectException when retries = 0" in { + + intercept[ConnectException] { + try { + 1 / 0 + } catch { + case t: Throwable => + HandleRetriableError(Failure(t)) + } + } + } +} diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/CosmosDBConfigTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/CosmosDBConfigTest.scala new file mode 100644 index 0000000..40edcf5 --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/CosmosDBConfigTest.scala @@ -0,0 +1,62 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.config + +import org.apache.kafka.common.config.ConfigException +import org.scalatest.{Matchers, WordSpec} + +import collection.JavaConverters._ + +class CosmosDBConfigTest extends WordSpec with Matchers { + "CosmosDBConfig" should { + "throw an exception if endpoint not present" in { + val map = Map( + "foo" -> "f", + ).asJava + + val caught = intercept[ConfigException] { + CosmosDBConfig(ConnectorConfig.baseConfigDef, map) + } + + caught.getMessage should startWith(s"""Missing required configuration "${CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG}" """) + } + + "throw an exception if master key not present" in { + val map = Map( + CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> "f" + ).asJava + + val caught = intercept[ConfigException] { + CosmosDBConfig(ConnectorConfig.baseConfigDef, map) + } + + caught.getMessage should startWith(s"""Missing required configuration "${CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG}" """) + } + + "throw an exception if database not present" in { + val map = Map( + CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> "f", + CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> "f", + CosmosDBConfigConstants.COLLECTION_CONFIG -> "f", + ).asJava + + val caught = intercept[ConfigException] { + CosmosDBConfig(ConnectorConfig.baseConfigDef, map) + } + + caught.getMessage should startWith(s"""Missing required configuration "${CosmosDBConfigConstants.DATABASE_CONFIG}" """) + } + + "throw an exception if collection not present" in { + val map = Map( + CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> "f", + CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> "f", + CosmosDBConfigConstants.DATABASE_CONFIG -> "f", + ).asJava + + val caught = intercept[ConfigException] { + CosmosDBConfig(ConnectorConfig.baseConfigDef, map) + } + + caught.getMessage should startWith(s"""Missing required configuration "${CosmosDBConfigConstants.COLLECTION_CONFIG}" """) + } + } +} diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/TestConfigurations.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/TestConfigurations.scala new file mode 100644 index 0000000..9a9df75 --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/config/TestConfigurations.scala @@ -0,0 +1,113 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.config + +import java.util.Properties + +import com.google.common.base.Strings +import com.typesafe.config.ConfigFactory +import org.apache.commons.lang3.StringUtils +import org.apache.kafka.clients.producer.ProducerConfig +import org.apache.kafka.connect.runtime.WorkerConfig +import org.apache.kafka.connect.runtime.distributed.DistributedConfig + +object TestConfigurations { + + lazy private val config = ConfigFactory.load() + lazy private val CosmosDBConfig = config.getConfig("CosmosDB") + + // Replace ENDPOINT and MASTER_KEY with values from your Azure Cosmos DB account. + // The default values are credentials of the local emulator, which are not used in any production environment. + var ENDPOINT : String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("endpoint")), "https://localhost:8081/") + var MASTER_KEY: String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("masterKey")), "C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw==") + var DATABASE : String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("database")), "database") + var SOURCE_COLLECTION : String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("collection")), "collection1") + var SINK_COLLECTION : String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("collection")), "collection2") + var TOPIC : String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("topic")), "topic_test") + + def getSourceWorkerProperties(bootstrapServers: String): Properties = { + val workerProperties: Properties = new Properties() + workerProperties.put(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers) + workerProperties.put(DistributedConfig.GROUP_ID_CONFIG, "cosmosdb") + workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-config") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-offset") + workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-status") + workerProperties.put(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter") + workerProperties.put(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter") + workerProperties.put("value.converter.schemas.enable", "false") + workerProperties.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "30000") + workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-config") + workerProperties.put(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG, "1") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG, "1") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG, "1") + workerProperties.put(DistributedConfig.STATUS_STORAGE_PARTITIONS_CONFIG, "1") + workerProperties.put(DistributedConfig.STATUS_STORAGE_REPLICATION_FACTOR_CONFIG, "1") + return workerProperties + } + + def getSinkWorkerProperties(bootstrapServers: String): Properties = { + val workerProperties: Properties = new Properties() + workerProperties.put(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers) + workerProperties.put(DistributedConfig.GROUP_ID_CONFIG, "cosmosdb-01") + workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-sink-config") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-sink-offset") + workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-sink-status") + workerProperties.put(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter") + workerProperties.put(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter") + workerProperties.put("value.converter.schemas.enable", "false") + workerProperties.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "30000") + workerProperties.put(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG, "1") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG, "1") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG, "1") + workerProperties.put(DistributedConfig.STATUS_STORAGE_PARTITIONS_CONFIG, "1") + workerProperties.put(DistributedConfig.STATUS_STORAGE_REPLICATION_FACTOR_CONFIG, "1") + return workerProperties + } + + def getSourceConnectorProperties(): Properties = { + val connectorProperties: Properties = new Properties() + connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG, "CosmosDBSourceConnector") + connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG , "com.microsoft.azure.cosmosdb.kafka.connect.source.CosmosDBSourceConnector") + connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG , "1") + connectorProperties.put(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG, ENDPOINT) + connectorProperties.put(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG, MASTER_KEY) + connectorProperties.put(CosmosDBConfigConstants.DATABASE_CONFIG, DATABASE) + connectorProperties.put(CosmosDBConfigConstants.COLLECTION_CONFIG, SOURCE_COLLECTION) + connectorProperties.put(CosmosDBConfigConstants.TOPIC_CONFIG, TOPIC) + connectorProperties.put(CosmosDBConfigConstants.BATCH_SIZE, "10") + connectorProperties.put(CosmosDBConfigConstants.READER_BUFFER_SIZE, "1000") + connectorProperties.put(CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_CONFIG, "3") + connectorProperties.put(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR, "com.microsoft.azure.cosmosdb.kafka.connect.processor.source.SelectorSourcePostProcessor") + connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.ERRORS_RETRY_TIMEOUT_CONFIG, "3") + return connectorProperties + } + + def getSinkConnectorProperties(): Properties = { + val connectorProperties: Properties = new Properties() + connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG, "CosmosDBSinkConnector") + connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG , "com.microsoft.azure.cosmosdb.kafka.connect.sink.CosmosDBSinkConnector") + connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG , "1") + connectorProperties.put(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG, ENDPOINT) + connectorProperties.put(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG, MASTER_KEY) + connectorProperties.put(CosmosDBConfigConstants.DATABASE_CONFIG, DATABASE) + connectorProperties.put(CosmosDBConfigConstants.COLLECTION_CONFIG, SINK_COLLECTION) + connectorProperties.put(CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG, s"$SINK_COLLECTION#$TOPIC") + connectorProperties.put("topics", TOPIC) // constant required by sink connector + connectorProperties.put(CosmosDBConfigConstants.TOPIC_CONFIG, TOPIC ) + connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.ERRORS_RETRY_TIMEOUT_CONFIG, "3") +// connectorProperties.put(CosmosDBConfigConstants.SINK_POST_PROCESSOR, "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor") + return connectorProperties + } + + def getProducerProperties(bootstrapServers: String): Properties = { + val producerProperties: Properties = new Properties() + producerProperties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers) + producerProperties.put(ProducerConfig.ACKS_CONFIG, "all") + producerProperties.put(ProducerConfig.RETRIES_CONFIG, "3") + producerProperties.put(ProducerConfig.BATCH_SIZE_CONFIG, "10") + producerProperties.put(ProducerConfig.LINGER_MS_CONFIG, "1") + producerProperties.put(ProducerConfig.BUFFER_MEMORY_CONFIG, "33554432") + producerProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonSerializer") + producerProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonSerializer") + return producerProperties + } + +} \ No newline at end of file diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/model/Address.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/model/Address.scala new file mode 100644 index 0000000..1d7b163 --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/model/Address.scala @@ -0,0 +1,21 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.model + + +class Address(var city: String, var state: String) { + + def setCity (city:String) { + this.city = city + } + + def setAge (state:String) { + this.state = state + } + + def getCity () : String = { + city + } + + def getState () : String = { + state + } +} \ No newline at end of file diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/model/CosmosDBDocumentTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/model/CosmosDBDocumentTest.scala new file mode 100644 index 0000000..001d115 --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/model/CosmosDBDocumentTest.scala @@ -0,0 +1,29 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.model + +import java.util.UUID + +class CosmosDBDocumentTest(var id: String, var message: String, var testID: UUID) { + def getId(): String = { + return id + } + + def getMessage(): String = { + return message + } + + def getTestID(): UUID = { + return testID + } + + def setId(id: String) = { + this.id = id + } + + def setMessage(message: String) = { + this.message = message + } + + def setTestID(testID: UUID) = { + this.testID = testID + } +} \ No newline at end of file diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/model/KafkaPayloadTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/model/KafkaPayloadTest.scala new file mode 100644 index 0000000..e604baa --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/model/KafkaPayloadTest.scala @@ -0,0 +1,14 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.model + +import java.util.UUID + +case class KafkaPayloadTest( + id: String, + message: String, + testID: UUID, + _rid: String, + _self: String, + _etag: String, + _attachments: String, + _ts: Long +) \ No newline at end of file diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/DocumentIdSinkPostProcessorTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/DocumentIdSinkPostProcessorTest.scala new file mode 100644 index 0000000..d455c8a --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/DocumentIdSinkPostProcessorTest.scala @@ -0,0 +1,76 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.processor + +import com.google.gson._ +import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, TestConfigurations} +import com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.DocumentIdSinkPostProcessor +import org.scalatest.{FlatSpec, GivenWhenThen} + +import scala.collection.JavaConverters._ + +class DocumentIdSinkPostProcessorTest extends FlatSpec with GivenWhenThen { + + val sourceRecord: String = + """ + |{ + | "firstName": "John", + | "lastName": "Smith" + |} + """.stripMargin + + "'id' field" should "be created or replaced with value taken from specified field" in { + + val expectedRecord = + """ + |{ + | "firstName": "John", + | "lastName": "Smith", + | "id": "John" + |} + """.stripMargin + + Given("an existing field") + val connectorProperties = TestConfigurations.getSourceConnectorProperties() + connectorProperties.put("connect.cosmosdb.sink.post-processor.documentId.field", "firstName") + val config = new CosmosDBConfig(ConnectorConfig.baseConfigDef, connectorProperties.asScala.asJava) + + When("JSON document is processed") + val jsonParser = new JsonParser() + val json: JsonObject = jsonParser.parse(sourceRecord).getAsJsonObject + val postProcessor = new DocumentIdSinkPostProcessor() + postProcessor.configure(config) + + Then("'id' is replaced with specified existing field value") + val processed = postProcessor.runJsonPostProcess(json) + val expected = jsonParser.parse(expectedRecord).getAsJsonObject + assert(processed.equals(expected)) + } + + "null 'id' field" should "be generated if requested field doesn't exists" in { + + val expectedRecord = + """ + |{ + | "firstName": "John", + | "lastName": "Smith", + | "id": null + |} + """.stripMargin + + Given("a non-existing field") + val connectorProperties = TestConfigurations.getSourceConnectorProperties() + connectorProperties.put("connect.cosmosdb.sink.post-processor.documentId.field", "notExists") + val config = new CosmosDBConfig(ConnectorConfig.baseConfigDef, connectorProperties.asScala.asJava) + + When("JSON document is processed") + val jsonParser = new JsonParser() + val json: JsonObject = jsonParser.parse(sourceRecord).getAsJsonObject + val postProcessor = new DocumentIdSinkPostProcessor() + postProcessor.configure(config) + + Then("'id' is set to null") + val processed = postProcessor.runJsonPostProcess(json) + val expected = jsonParser.parse(expectedRecord).getAsJsonObject + assert(processed.equals(expected)) + } + +} diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SelectorPostProcessorTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SelectorPostProcessorTest.scala new file mode 100644 index 0000000..92f41df --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SelectorPostProcessorTest.scala @@ -0,0 +1,151 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.processor + +import scala.collection.JavaConverters._ +import com.google.gson._ +import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, TestConfigurations} +import com.microsoft.azure.cosmosdb.kafka.connect.processor.source.SelectorSourcePostProcessor +import org.scalatest.{FlatSpec, GivenWhenThen} + +class SelectorPostProcessorTest extends FlatSpec with GivenWhenThen { + + val sourceRecord: String = + """ + |{ + | "firstName": "John", + | "lastName": "Smith", + | "isAlive": true, + | "age": 27, + | "address": { + | "streetAddress": "21 2nd Street", + | "city": "New York", + | "state": "NY", + | "postalCode": "10021-3100" + | }, + | "phoneNumbers": [ + | { + | "type": "home", + | "number": "212 555-1234" + | }, + | { + | "type": "office", + | "number": "646 555-4567" + | }, + | { + | "type": "mobile", + | "number": "123 456-7890" + | } + | ], + | "children": [], + | "spouse": null, + | "id": "f355b7ff-e522-6906-c169-6d53e7ab046b", + | "_rid": "tA4eAIlHRkMFAAAAAAAAAA==", + | "_self": "dbs/tA4eAA==/colls/tA4eAIlHRkM=/docs/tA4eAIlHRkMFAAAAAAAAAA==/", + | "_etag": "\"39022ddc-0000-0700-0000-5d094f610000\"", + | "_attachments": "attachments/", + | "_ts": 1560891233 + |} + """.stripMargin + + "Post Processor" should "remove configured fields" in { + + val expectedRecord = + """ + |{ + | "firstName": "John", + | "lastName": "Smith", + | "isAlive": true, + | "age": 27, + | "address": { + | "streetAddress": "21 2nd Street", + | "city": "New York", + | "state": "NY", + | "postalCode": "10021-3100" + | }, + | "phoneNumbers": [ + | { + | "type": "home", + | "number": "212 555-1234" + | }, + | { + | "type": "office", + | "number": "646 555-4567" + | }, + | { + | "type": "mobile", + | "number": "123 456-7890" + | } + | ], + | "children": [], + | "spouse": null + |} + """.stripMargin + + Given("Post Processor configuration") + val connectorProperties = TestConfigurations.getSourceConnectorProperties() + connectorProperties.put("connect.cosmosdb.source.post-processor.selector.type", "Exclude") + connectorProperties.put("connect.cosmosdb.source.post-processor.selector.fields", "id, _rid, _self, _etag, _attachments, _ts, _lsn, _metadata") + val config = new CosmosDBConfig(ConnectorConfig.baseConfigDef, connectorProperties.asScala.asJava) + + When("JSON document is processed") + val jsonParser = new JsonParser() + val json: JsonObject = jsonParser.parse(sourceRecord).getAsJsonObject + val postProcessor = new SelectorSourcePostProcessor() + postProcessor.configure(config) + + Then("specified JSON properties are removed") + val processed = postProcessor.runJsonPostProcess(json) + val expected = jsonParser.parse(expectedRecord).getAsJsonObject + assert(processed.equals(expected)) + } + + "Post Processor" should "keep only configured fields" in { + + val expectedRecord = + """ + |{ + | "firstName": "John", + | "lastName": "Smith", + | "address": { + | "streetAddress": "21 2nd Street", + | "city": "New York", + | "state": "NY", + | "postalCode": "10021-3100" + | }, + | "phoneNumbers": [ + | { + | "type": "home", + | "number": "212 555-1234" + | }, + | { + | "type": "office", + | "number": "646 555-4567" + | }, + | { + | "type": "mobile", + | "number": "123 456-7890" + | } + | ], + | "children": [], + | "spouse": null + |} + """.stripMargin + + Given("Post Processor configuration") + val connectorProperties = TestConfigurations.getSourceConnectorProperties() + connectorProperties.put("connect.cosmosdb.source.post-processor.selector.type", "Include") + connectorProperties.put("connect.cosmosdb.source.post-processor.selector.fields", "firstName, lastName, address, phoneNumbers, children, spouse") + val config = new CosmosDBConfig(ConnectorConfig.baseConfigDef, connectorProperties.asScala.asJava) + + When("JSON document is processed") + val jsonParser = new JsonParser() + val json: JsonObject = jsonParser.parse(sourceRecord).getAsJsonObject + val postProcessor = new SelectorSourcePostProcessor() + postProcessor.configure(config) + + Then("only specified JSON properties are kept") + val processed = postProcessor.runJsonPostProcess(json) + val expected = jsonParser.parse(expectedRecord).getAsJsonObject + assert(processed.equals(expected)) + } + +} diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SinkPostProcessorTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SinkPostProcessorTest.scala new file mode 100644 index 0000000..b3d8f8b --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SinkPostProcessorTest.scala @@ -0,0 +1,72 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.processor + +import java.util.Properties + +import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations} +import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster +import org.apache.kafka.connect.runtime.WorkerConfig +import org.apache.kafka.connect.runtime.distributed.DistributedConfig + +// TODO: This should be removed from here and refactored into an Integration Test + +object SinkPostProcessorTest { + + var COSMOSDB_TOPIC: String = "cosmosdb-source-topic" + + def main(args: Array[String]): Unit = { + val workerProperties: Properties = getWorkerProperties(KafkaCluster.BrokersList.toString) + val connectorProperties: Properties = getConnectorProperties() + + // Add Sink Post Processors + val postProcessors = + "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.DocumentIdSinkPostProcessor" :: + "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor" :: + "com.microsoft.azure.cosmosdb.kafka.connect.processor.SampleConsoleWriterPostProcessor" :: + Nil + connectorProperties.put(CosmosDBConfigConstants.SINK_POST_PROCESSOR, postProcessors.mkString(",")) + + // Configure Sink Post Processor + connectorProperties.put("connect.cosmosdb.sink.post-processor.selector.type", "Include") + connectorProperties.put("connect.cosmosdb.sink.post-processor.selector.fields", "id, firstName, lastName, age, address, children, spouse") + connectorProperties.put("connect.cosmosdb.sink.post-processor.documentId.field", "lastName") + + KafkaCluster.startEmbeddedConnect(workerProperties, List(connectorProperties)) + if (KafkaCluster.kafkaConnectEnabled) { + println("Kafka Connector Enabled") + } + } + + def getWorkerProperties(bootstrapServers: String): Properties = { + val workerProperties: Properties = new Properties() + + workerProperties.put(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers) + workerProperties.put(DistributedConfig.GROUP_ID_CONFIG, "cosmosdb-01") + workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-sink-config") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-sink-offset") + workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-sink-status") + workerProperties.put(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter") + workerProperties.put(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter") + workerProperties.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "30000") + workerProperties.put(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG, "1") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG, "1") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG, "1") + workerProperties.put(DistributedConfig.STATUS_STORAGE_PARTITIONS_CONFIG, "1") + workerProperties.put(DistributedConfig.STATUS_STORAGE_REPLICATION_FACTOR_CONFIG, "1") + + workerProperties + } + + def getConnectorProperties(): Properties = { + val connectorProperties = TestConfigurations.getSinkConnectorProperties() + + connectorProperties.put(CosmosDBConfigConstants.COLLECTION_CONFIG, "destination") + connectorProperties.put(CosmosDBConfigConstants.TOPIC_CONFIG, COSMOSDB_TOPIC) + connectorProperties.put("topics", COSMOSDB_TOPIC) + connectorProperties.put(CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_CONFIG, "3") + + + connectorProperties + } + + +} diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SourcePostProcessorTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SourcePostProcessorTest.scala new file mode 100644 index 0000000..11fed8c --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/processor/SourcePostProcessorTest.scala @@ -0,0 +1,75 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.processor + +import java.util.Properties + +import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations} +import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster +import org.apache.kafka.connect.runtime.WorkerConfig +import org.apache.kafka.connect.runtime.distributed.DistributedConfig +import org.scalatest.{FlatSpec, GivenWhenThen} + +// TODO: This should be removed from here and refactored into an Integration Test + +object SourcePostProcessorTest { + + var COSMOSDB_TOPIC: String = "cosmosdb-source-topic" + + def main(args: Array[String]): Unit = { + + val workerProperties: Properties = getWorkerProperties(KafkaCluster.BrokersList.toString) + val connectorProperties: Properties = getConnectorProperties() + + // Add Source Post Processors + val postProcessors = + "com.microsoft.azure.cosmosdb.kafka.connect.processor.source.SelectorSourcePostProcessor" :: + "com.microsoft.azure.cosmosdb.kafka.connect.processor.SampleConsoleWriterPostProcessor" :: + Nil + connectorProperties.put(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR, postProcessors.mkString(",")) + + // Configure Source Post Processor + connectorProperties.put("connect.cosmosdb.source.post-processor.selector.type", "Exclude") + connectorProperties.put("connect.cosmosdb.source.post-processor.selector.fields", "id, _rid, _self, _etag, _attachments, _ts, _lsn, _metadata") + + //connectorProperties.put("connect.cosmosdb.source.post-processor.selector.type", "Include") + //connectorProperties.put("connect.cosmosdb.source.post-processor.selector.fields", "id, firstName, lastName, age") + + // Run Embedded Kafka Cluster + KafkaCluster.startEmbeddedConnect(workerProperties, List(connectorProperties)) + if (KafkaCluster.kafkaConnectEnabled) { + println("Kafka Connector Enabled") + } + } + + def getWorkerProperties(bootstrapServers: String): Properties = { + val workerProperties: Properties = new Properties() + + workerProperties.put(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers) + workerProperties.put(DistributedConfig.GROUP_ID_CONFIG, "cosmosdb") + workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-config") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-offset") + workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-status") + workerProperties.put(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter") + workerProperties.put(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter") + workerProperties.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "30000") + workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-config") + workerProperties.put(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG, "1") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-offset") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG, "1") + workerProperties.put(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG, "1") + workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-status") + workerProperties.put(DistributedConfig.STATUS_STORAGE_PARTITIONS_CONFIG, "1") + workerProperties.put(DistributedConfig.STATUS_STORAGE_REPLICATION_FACTOR_CONFIG, "1") + + workerProperties + } + + def getConnectorProperties(): Properties = { + val connectorProperties = TestConfigurations.getSourceConnectorProperties() + + connectorProperties.put(CosmosDBConfigConstants.COLLECTION_CONFIG, "source") + connectorProperties.put(CosmosDBConfigConstants.TOPIC_CONFIG, COSMOSDB_TOPIC) + connectorProperties.put("topics", COSMOSDB_TOPIC) + + connectorProperties + } +} \ No newline at end of file diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/provider/CosmosDBProviderImplTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/provider/CosmosDBProviderImplTest.scala new file mode 100644 index 0000000..5593784 --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/provider/CosmosDBProviderImplTest.scala @@ -0,0 +1,43 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.provider + +import java.util.concurrent.CountDownLatch + +import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations +import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProviderImpl} +import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel} +import com.typesafe.scalalogging.LazyLogging +import org.apache.kafka.connect.errors.ConnectException +import org.scalatest.{FlatSpec, GivenWhenThen} + +import scala.util.{Failure, Success, Try} + +class CosmosDBProviderImplTest extends FlatSpec with GivenWhenThen with LazyLogging { + + "CosmosDBProviderTest" should "read collection with a given name" in { + Given("A collection name") + val clientSettings = CosmosDBClientSettings( + TestConfigurations.ENDPOINT, + TestConfigurations.MASTER_KEY, + TestConfigurations.DATABASE, + TestConfigurations.SOURCE_COLLECTION, + ConnectionPolicy.GetDefault(), + ConsistencyLevel.Session + ) + val client = Try(CosmosDBProviderImpl.getClient(clientSettings)) match { + case Success(conn) => + logger.info("Connection to CosmosDB established.") + conn + case Failure(f) => throw new ConnectException(s"Couldn't connect to CosmosDB.", f) + } + + When("Call CosmosDB readcollection") + logger.info("readCollection in CosmosDB .") + + val docCollQry = CosmosDBProviderImpl.queryCollection(TestConfigurations.DATABASE, TestConfigurations.SOURCE_COLLECTION, new CountDownLatch(1)).toBlocking.single + logger.info(docCollQry.getResults.size.toString) + + Then(s"Verify collection of messages is equal to inserted") + assert(docCollQry.getResults.size != 0) + } + +} diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnectorTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnectorTest.scala new file mode 100644 index 0000000..bb8509a --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnectorTest.scala @@ -0,0 +1,34 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.sink + +import com.google.common.collect.Maps +import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations} +import org.apache.kafka.connect.runtime.ConnectorConfig +import org.scalatest.{FlatSpec, GivenWhenThen} + + +class CosmosDBSinkConnectorTest extends FlatSpec with GivenWhenThen { + "CosmosDBSinkConnector" should "Validate all input properties and generate right set of task config properties" in { + Given("Valid set of input properties") + val props = TestConfigurations.getSinkConnectorProperties() + val connector = new CosmosDBSinkConnector + When("Start and TaskConfig are called in right order") + connector.start(Maps.fromProperties(props)) + val numTasks = props.getProperty(ConnectorConfig.TASKS_MAX_CONFIG).toInt + val taskConfigs = connector.taskConfigs(numTasks) + + Then("The TaskConfigs have all the expected properties") + assert(taskConfigs.size() == numTasks) + for (i <- 0 until numTasks) { + val taskConfig: java.util.Map[String, String] = taskConfigs.get(i) + assert(taskConfig.containsKey(ConnectorConfig.NAME_CONFIG)) + assert(taskConfig.containsKey(ConnectorConfig.CONNECTOR_CLASS_CONFIG)) + assert(taskConfig.containsKey(ConnectorConfig.TASKS_MAX_CONFIG)) + assert(taskConfig.containsKey(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG)) + assert(taskConfig.containsKey(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG)) + assert(taskConfig.containsKey(CosmosDBConfigConstants.DATABASE_CONFIG)) + assert(taskConfig.containsKey(CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG)) + assert(taskConfig.containsKey(CosmosDBConfigConstants.COLLECTION_CONFIG)) + assert(taskConfig.containsKey(CosmosDBConfigConstants.TOPIC_CONFIG)) + } + } +} \ No newline at end of file diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnectorWriterTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnectorWriterTest.scala new file mode 100644 index 0000000..0e98da5 --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkConnectorWriterTest.scala @@ -0,0 +1,19 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.sink + +import java.util.Properties + +import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations +import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants +import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster + +object SinkConnectWriterTest { + + def main(args: Array[String]): Unit = { + val workerProperties: Properties = TestConfigurations.getSinkWorkerProperties(KafkaCluster.BrokersList.toString) + val connectorProperties: Properties = TestConfigurations.getSinkConnectorProperties() + KafkaCluster.startEmbeddedConnect(workerProperties, List(connectorProperties)) + if (KafkaCluster.kafkaConnectEnabled) { + println("Kafka Connector Enabled") + } + } +} diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkTaskTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkTaskTest.scala new file mode 100644 index 0000000..ce1c6a4 --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBSinkTaskTest.scala @@ -0,0 +1,156 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.sink + +import java.util.ArrayList + +import com.microsoft.azure.cosmosdb.kafka.connect.MockCosmosDBProvider +import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations.{DATABASE, ENDPOINT, MASTER_KEY} +import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants +import org.apache.kafka.connect.data.Schema +import org.apache.kafka.connect.sink.SinkRecord +import org.scalatest.{FlatSpec, GivenWhenThen} +import scala.collection.JavaConverters._ +import scala.collection.mutable + + +class CosmosDBSinkTaskTest extends FlatSpec with GivenWhenThen { + + val PARTITION = 0 + + private val TOPIC = "topic" + private val TOPIC_2 = "topic2" + private val TOPIC_3 = "topic3" + private val TOPIC_4 = "topic4" + private val TOPIC_5 = "topic5" + + private val COLLECTION = "collection" + private val COLLECTION_2 = "collection2" + private val COLLECTION_3 = "collection3" + + + "CosmosDBSinkConnector start" should "Populate a simple collection topic map according to the configuration in settings" in { + Given("A Cosmos DB Provider and settings with a collection topic mapping") + val mockCosmosProvider = MockCosmosDBProvider + mockCosmosProvider.setupCollections(List(COLLECTION)) + + val sinkTask = new CosmosDBSinkTask { override val cosmosDBProvider = mockCosmosProvider } + val map = Map( + org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG -> "CosmosDBSinkConnector", + org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG -> "com.microsoft.azure.cosmosdb.kafka.connect.sink.CosmosDBSinkConnector", + org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG -> "1", + CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> ENDPOINT, + CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> MASTER_KEY, + CosmosDBConfigConstants.DATABASE_CONFIG -> DATABASE, + CosmosDBConfigConstants.COLLECTION_CONFIG -> s"$COLLECTION", + CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG -> s"$COLLECTION#$TOPIC", + "topics" -> s"$TOPIC", + CosmosDBConfigConstants.TOPIC_CONFIG -> s"$TOPIC", + CosmosDBConfigConstants.SINK_POST_PROCESSOR -> "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor" + ).asJava + + When("The sink task is started") + sinkTask.start(map) + + Then("The collection topic map should contain the proper mapping") + val expectedMap = mutable.HashMap[String, String](TOPIC -> COLLECTION) + assert(sinkTask.collectionTopicMap == expectedMap) + } + + + "CosmosDBSinkConnector start" should "Populate a complex collection topic map according to the configuration in settings" in { + Given("A Cosmos DB Provider and settings with a collection topic mapping") + val mockCosmosProvider = MockCosmosDBProvider + mockCosmosProvider.setupCollections(List(COLLECTION)) + + val sinkTask = new CosmosDBSinkTask { override val cosmosDBProvider = mockCosmosProvider } + val map = Map( + org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG -> "CosmosDBSinkConnector", + org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG -> "com.microsoft.azure.cosmosdb.kafka.connect.sink.CosmosDBSinkConnector", + org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG -> "1", + CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> ENDPOINT, + CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> MASTER_KEY, + CosmosDBConfigConstants.DATABASE_CONFIG -> DATABASE, + CosmosDBConfigConstants.COLLECTION_CONFIG -> s"$COLLECTION,$COLLECTION_2,$COLLECTION_3", + CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG -> s"$COLLECTION#$TOPIC,$COLLECTION#$TOPIC_2,$COLLECTION_2#$TOPIC_3,$COLLECTION_3#$TOPIC_4,$COLLECTION_3#$TOPIC_5", + "topics" -> s"$TOPIC,$TOPIC_2,$TOPIC_3,$TOPIC_4,$TOPIC_5", + CosmosDBConfigConstants.TOPIC_CONFIG -> s"$TOPIC,$TOPIC_2,$TOPIC_3,$TOPIC_4,$TOPIC_5", + CosmosDBConfigConstants.SINK_POST_PROCESSOR -> "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor" + ).asJava + + When("The sink task is started") + sinkTask.start(map) + + Then("The collection topic map should contain the proper mapping") + val expectedMap = mutable.HashMap[String, String](TOPIC -> COLLECTION, + TOPIC_2 -> COLLECTION, + TOPIC_3 -> COLLECTION_2, + TOPIC_4 -> COLLECTION_3, + TOPIC_5 -> COLLECTION_3) + assert(sinkTask.collectionTopicMap == expectedMap) + } + + + "CosmosDBSinkConnector start" should "Populate the collection topic map with collection name as topic name if no config is given" in { + Given("A Cosmos DB Provider and settings without a collection topic mapping") + val mockCosmosProvider = MockCosmosDBProvider + mockCosmosProvider.setupCollections(List(COLLECTION)) + + val sinkTask = new CosmosDBSinkTask { override val cosmosDBProvider = mockCosmosProvider } + val map = Map( + org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG -> "CosmosDBSinkConnector", + org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG -> "com.microsoft.azure.cosmosdb.kafka.connect.sink.CosmosDBSinkConnector", + org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG -> "1", + CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> ENDPOINT, + CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> MASTER_KEY, + CosmosDBConfigConstants.DATABASE_CONFIG -> DATABASE, + CosmosDBConfigConstants.COLLECTION_CONFIG -> "", + CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG -> "", + "topics" -> s"$TOPIC,$TOPIC_2", + CosmosDBConfigConstants.TOPIC_CONFIG -> s"$TOPIC,$TOPIC_2", + CosmosDBConfigConstants.SINK_POST_PROCESSOR -> "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor" + ).asJava + + When("The sink task is started") + sinkTask.start(map) + + Then("The collection topic map should contain the proper mapping") + val expectedMap = mutable.HashMap[String, String](TOPIC -> TOPIC, + TOPIC_2 -> TOPIC_2) + assert(sinkTask.collectionTopicMap == expectedMap) + } + + + "CosmosDBSinkConnector put" should "Write records from topics in the proper collections according to the map" in { + Given("A Cosmos DB Provider and a configured Cosmos DB Collection") + val mockCosmosProvider = MockCosmosDBProvider + mockCosmosProvider.setupCollections(List(COLLECTION)) + + val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"message1 payload\"}", 0) + val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"message2 payload\"}", 0) + val records = new ArrayList[SinkRecord] + records.add(record1) + records.add(record2) + + val sinkTask = new CosmosDBSinkTask { override val cosmosDBProvider = mockCosmosProvider } + val map = Map( + org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG -> "CosmosDBSinkConnector", + org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG -> "com.microsoft.azure.cosmosdb.kafka.connect.sink.CosmosDBSinkConnector", + org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG -> "1", + CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> ENDPOINT, + CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> MASTER_KEY, + CosmosDBConfigConstants.DATABASE_CONFIG -> DATABASE, + CosmosDBConfigConstants.COLLECTION_CONFIG -> COLLECTION, + CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG -> s"$COLLECTION#$TOPIC", + "topics" -> TOPIC, + CosmosDBConfigConstants.TOPIC_CONFIG -> TOPIC, + CosmosDBConfigConstants.SINK_POST_PROCESSOR -> "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor" + ).asJava + sinkTask.start(map) + + When("Records are passed to the put method") + sinkTask.put(records) + + Then("The Cosmos DB collection should contain all of the records") + val documents = mockCosmosProvider.getDocumentsByCollection(COLLECTION) + assert(documents.size == 2) + } +} diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBWriterTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBWriterTest.scala new file mode 100644 index 0000000..bf6fc81 --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/sink/CosmosDBWriterTest.scala @@ -0,0 +1,216 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.sink + +import java.util.ArrayList +import com.microsoft.azure.cosmosdb.Document +import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations.{DATABASE, ENDPOINT, MASTER_KEY} +import com.microsoft.azure.cosmosdb.kafka.connect.MockCosmosDBProvider +import org.apache.kafka.connect.data.Schema +import org.apache.kafka.connect.sink.SinkRecord +import org.scalatest.{FlatSpec, GivenWhenThen} + +import java.util +import scala.collection.mutable.HashMap + +class CosmosDBWriterTest extends FlatSpec with GivenWhenThen { + + private val PARTITION = 0 + + private val TOPIC = "topic" + private val TOPIC_2 = "topic2" + private val TOPIC_3 = "topic3" + private val TOPIC_4 = "topic4" + private val TOPIC_5 = "topic5" + + private val COLLECTION = "collection" + private val COLLECTION_2 = "collection2" + private val COLLECTION_3 = "collection3" + + // NOTE: All schemas are sent as null during testing because we are not currently enforcing them. + // We simply need to validate the presence of the schema object doesn't break the writer. + "CosmosDBWriter write" should "Write records formatted as a raw json string with schema" in { + Given("A Cosmos DB Provider, a configured Cosmos DB Collection and sample Sink Records") + + // Instantiate the MockCosmosDBProvider and Setup the Collections + val mockCosmosProvider = MockCosmosDBProvider + mockCosmosProvider.setupCollections(List(COLLECTION)) + + // Map the Topic and Collections + val collectionTopicMap: HashMap[String, String] = HashMap[String, String]((TOPIC, COLLECTION)) + + // Set up Writer + val setting = new CosmosDBSinkSettings(ENDPOINT, MASTER_KEY, DATABASE, collectionTopicMap) + val writer = new CosmosDBWriter(setting, mockCosmosProvider) + + // Create sample SinkRecords + val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"schema\": \"null\", \"payload\": {\"message\": \"message1 payload\"}}", 0) + val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"schema\": \"null\", \"payload\": {\"message\": \"message2 payload\"}}", 0) + + When("Records are passed to the write method") + writer.write(Seq(record1, record2)) + + Then("The Cosmos DB collection should contain all of the records") + val documents: ArrayList[Document] = mockCosmosProvider.getDocumentsByCollection(COLLECTION) + assert(documents.size == 2) + + // Check the schema wasn't written with the payload + assert(documents.get(0).get("schema") == null) + assert(documents.get(1).get("schema") == null) + assert(documents.get(0).get("message") == "message1 payload") + assert(documents.get(1).get("message") == "message2 payload") + } + + + "CosmosDBWriter write" should "Write records formatted as a raw json string without schema" in { + Given("A Cosmos DB Provider, a configured Cosmos DB Collection and sample Sink Records") + + // Instantiate the MockCosmosDBProvider and Setup the Collections + val mockCosmosProvider = MockCosmosDBProvider + mockCosmosProvider.setupCollections(List(COLLECTION)) + + // Map the Topic and Collections + val collectionTopicMap: HashMap[String, String] = HashMap[String, String]((TOPIC, COLLECTION)) + + // Set up Writer + val setting = new CosmosDBSinkSettings(ENDPOINT, MASTER_KEY, DATABASE, collectionTopicMap) + val writer = new CosmosDBWriter(setting, mockCosmosProvider) + + // Create sample SinkRecords + val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"message1 payload\"}", 0) + val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"message2 payload\"}", 0) + + When("Records are passed to the write method") + writer.write(Seq(record1, record2)) + + Then("The Cosmos DB collection should contain all of the records") + val documents = mockCosmosProvider.getDocumentsByCollection(COLLECTION) + + assert(documents.size == 2) + assert(documents.get(0).get("message") == "message1 payload") + assert(documents.get(1).get("message") == "message2 payload") + } + + + "CosmosDBWriter write" should "Write records formatted as hash map without schema" in { + Given("A Cosmos DB Provider, a configured Cosmos DB Collection and sample Sink Records") + + // Instantiate the MockCosmosDBProvider and Setup the Collections + val mockCosmosProvider = MockCosmosDBProvider + mockCosmosProvider.setupCollections(List(COLLECTION)) + + // Map the Topic and Collections + val collectionTopicMap: HashMap[String, String] = HashMap[String, String]((TOPIC, COLLECTION)) + + // Set up Writer + val setting = new CosmosDBSinkSettings(ENDPOINT, MASTER_KEY, DATABASE, collectionTopicMap) + val writer = new CosmosDBWriter(setting, mockCosmosProvider) + + // Create sample SinkRecords + val payload1= new util.HashMap[String, String]() + payload1.put("message", "message1 payload") + val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, null, payload1, 0) + + val payload2= new util.HashMap[String, String]() + payload2.put("message", "message2 payload") + val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, null, payload2, 0) + + When("Records are passed to the write method") + writer.write(Seq(record1, record2)) + + Then("The Cosmos DB collection should contain all of the records") + val documents = mockCosmosProvider.getDocumentsByCollection(COLLECTION) + + assert(documents.size == 2) + assert(documents.get(0).get("message") == "message1 payload") + assert(documents.get(1).get("message") == "message2 payload") + } + + + "CosmosDBWriter write" should "Write records formatted as hash map with schema" in { + Given("A Cosmos DB Provider, a configured Cosmos DB Collection and sample Sink Records") + + // Instantiate the MockCosmosDBProvider and Setup the Collections + val mockCosmosProvider = MockCosmosDBProvider + mockCosmosProvider.setupCollections(List(COLLECTION)) + + // Map the Topic and Collections + val collectionTopicMap: HashMap[String, String] = HashMap[String, String]((TOPIC, COLLECTION)) + + // Set up Writer + val setting = new CosmosDBSinkSettings(ENDPOINT, MASTER_KEY, DATABASE, collectionTopicMap) + val writer = new CosmosDBWriter(setting, mockCosmosProvider) + + // Create sample SinkRecords + val payload1 = new util.HashMap[String, String]() + payload1.put("message", "message1 payload") + val map1 = new util.HashMap[String, util.HashMap[String, String]]() + map1.put("schema", null) + map1.put("payload", payload1) + val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, null, map1, 0) + + val payload2 = new util.HashMap[String, String]() + payload2.put("message", "message2 payload") + val map2 = new util.HashMap[String, util.HashMap[String, String]]() + map2.put("schema", null) + map2.put("payload", payload2) + val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, null, map2, 0) + + When("Records are passed to the write method") + writer.write(Seq(record1, record2)) + + Then("The Cosmos DB collection should contain all of the records") + val documents = mockCosmosProvider.getDocumentsByCollection(COLLECTION) + + assert(documents.size == 2) + + // Check the schema wasn't written with the payload + assert(documents.get(0).get("schema") == null) + assert(documents.get(1).get("schema") == null) + assert(documents.get(0).get("message") == "message1 payload") + assert(documents.get(1).get("message") == "message2 payload") + } + + + "CosmosDBWriter write" should "Write records in the proper collections according to a complex map" in { + Given("A Cosmos DB Provider, a configured Cosmos DB Collection and sample Sink Records") + + // Instantiate the MockCosmosDBProvider and Setup the Collections + val mockCosmosProvider = MockCosmosDBProvider + mockCosmosProvider.setupCollections(List(COLLECTION,COLLECTION_2,COLLECTION_3)) + + // Map the Topic and Collections + val collectionTopicMap: HashMap[String, String] = HashMap[String, String]((TOPIC, COLLECTION), + (TOPIC_2, COLLECTION), + (TOPIC_3, COLLECTION_2), + (TOPIC_4, COLLECTION_3), + (TOPIC_5, COLLECTION_3)) + + // Set up Writer + val setting = new CosmosDBSinkSettings(ENDPOINT, MASTER_KEY, DATABASE, collectionTopicMap) + val writer = new CosmosDBWriter(setting, mockCosmosProvider) + + // Create sample SinkRecords + val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic payload\"}", 0) + val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic payload\"}", 0) + val record3 = new SinkRecord(TOPIC_2, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic2 payload\"}", 0) + val record4 = new SinkRecord(TOPIC_2, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic2 payload\"}", 0) + val record5 = new SinkRecord(TOPIC_3, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic3 payload\"}", 0) + val record6 = new SinkRecord(TOPIC_3, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic3 payload\"}", 0) + val record7 = new SinkRecord(TOPIC_4, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic4 payload\"}", 0) + val record8 = new SinkRecord(TOPIC_4, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic4 payload\"}", 0) + val record9 = new SinkRecord(TOPIC_5, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic5 payload\"}", 0) + val record10 = new SinkRecord(TOPIC_5, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic5 payload\"}", 0) + val record11 = new SinkRecord(TOPIC_5, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic5 payload\"}", 0) + + When("Records are passed to the write method") + writer.write(Seq(record1, record2, record3, record4, record5, record6, record7, record8, record9, record10, record11)) + + Then("The Cosmos DB collection should contain all of the records") + val documents = mockCosmosProvider.getDocumentsByCollection(COLLECTION) + val documents2 = mockCosmosProvider.getDocumentsByCollection(COLLECTION_2) + val documents3 = mockCosmosProvider.getDocumentsByCollection(COLLECTION_3) + + assert(documents.size == 4) + assert(documents2.size == 2) + assert(documents3.size == 5) + } +} diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnectorReaderTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnectorReaderTest.scala new file mode 100644 index 0000000..31ae3ef --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnectorReaderTest.scala @@ -0,0 +1,65 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.sink + +import java.util.Properties +import java.util.UUID.randomUUID + +import com.fasterxml.jackson.databind.{JsonNode, ObjectMapper} +import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants +import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster +import com.microsoft.azure.cosmosdb.kafka.connect.model.CosmosDBDocumentTest +import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.ObjectMapper +import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations +import org.apache.kafka.connect.runtime.distributed.DistributedConfig +import org.apache.kafka.connect.runtime.{ConnectorConfig, WorkerConfig} + + +object SourceConnectReaderTest { + + def main(args: Array[String]): Unit = { + val workerProperties: Properties = TestConfigurations.getSourceWorkerProperties(KafkaCluster.BrokersList.toString) + val connectorProperties: Properties = TestConfigurations.getSourceConnectorProperties() + KafkaCluster.startEmbeddedConnect(workerProperties, List(connectorProperties)) + if (KafkaCluster.kafkaConnectEnabled) { + println("Kafka Connector Enabled") + } + + // Write 20 messages to the kafka topic to be consumed + val producerProps: Properties = TestConfigurations.getProducerProperties(KafkaCluster.BrokersList.toString) + val producer = new KafkaProducer[Nothing, JsonNode](producerProps) + val testUUID = randomUUID() + + val objectMapper: ObjectMapper = new ObjectMapper + + //schema JSON test + for (i <- 1 to 4) { + val json = scala.io.Source.fromFile(getClass.getResource(s"/test$i.json").toURI.getPath).mkString + val mapper = new ObjectMapper + val jsonNode: JsonNode = mapper.readTree(json) + producer.send(new ProducerRecord[Nothing, JsonNode](TestConfigurations.TOPIC, jsonNode)) + + } + + //schema-less JSON test + for (i <- 5 to 8) { + val json = scala.io.Source.fromFile(getClass.getResource(s"/test$i.json").toURI.getPath).mkString + val mapper = new ObjectMapper + val jsonNode: JsonNode = mapper.readTree(json) + producer.send(new ProducerRecord[Nothing, JsonNode](TestConfigurations.TOPIC, jsonNode)) + + } + + // JSON string test no schema + for (i <- 9 until 15) { + val message = new CosmosDBDocumentTest(s"$i", s"message $i", testUUID) + val jsonNode: JsonNode = objectMapper.valueToTree(message) + + println("sending message: ", jsonNode.findPath("id")) + producer.send(new ProducerRecord[Nothing, JsonNode](TestConfigurations.TOPIC, jsonNode)) + } + + producer.flush() + producer.close() + } +} \ No newline at end of file diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnectorTaskTestMock.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnectorTaskTestMock.scala new file mode 100644 index 0000000..04c2442 --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnectorTaskTestMock.scala @@ -0,0 +1,226 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +import java.util +import java.util.UUID.randomUUID +import java.util.concurrent.{CountDownLatch, TimeUnit} +import java.util.{ArrayList, Properties, UUID} + +import _root_.rx.Observable +import _root_.rx.lang.scala.JavaConversions._ +import com.google.common.collect.Maps +import com.google.gson.Gson +import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel, Document, ResourceResponse} +import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProvider, CosmosDBProviderImpl, MockCosmosDBProvider, MockCosmosDBReader} +import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations.{DATABASE, ENDPOINT, MASTER_KEY} +import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations} +import com.microsoft.azure.cosmosdb.kafka.connect.model.{CosmosDBDocumentTest, KafkaPayloadTest} +import com.typesafe.scalalogging.LazyLogging +import org.apache.kafka.connect.data.Schema +import org.apache.kafka.connect.errors.ConnectException +import org.apache.kafka.connect.sink.SinkRecord +import org.scalatest.{FlatSpec, GivenWhenThen} +import org.mockito.MockitoSugar.mock + +import scala.collection.JavaConverters._ +import scala.collection.mutable +import scala.util.{Failure, Success, Try} + +class CosmosDBSourceConnectorTaskTestMock extends FlatSpec with GivenWhenThen with LazyLogging { + + private val NUM_DOCS: Int = 20 + private val DOC_SIZE: Int = 313 + private var testUUID: UUID = null + private var batchSize = NUM_DOCS + private var bufferSize = batchSize * DOC_SIZE + + "CosmosDBSourceTask start" should "Initialize all properties" in { + Given("A list of properties for CosmosSourceTask") + val props = TestConfigurations.getSourceConnectorProperties() + // Add the assigned partitions + props.put(CosmosDBConfigConstants.ASSIGNED_PARTITIONS, "0,1") + + When("CosmosSourceTask is started") + val mockCosmosProvider = MockCosmosDBProvider + val task = new CosmosDBSourceTask { override val cosmosDBProvider = mockCosmosProvider } + task.start(Maps.fromProperties(props)) + + Then("CosmosSourceTask should properly initialized the readers") + val readers = task.getReaders() + readers.foreach(r => assert(r._1 == r._2.setting.assignedPartition)) + assert(readers.size == 2) + } + + "CosmosDBSourceTask poll" should "Return a list of SourceRecords with the right format" in { + Given("A set of SourceConnector properties") + val props: Properties = TestConfigurations.getSourceConnectorProperties() + props.setProperty(CosmosDBConfigConstants.BATCH_SIZE, NUM_DOCS.toString) + props.setProperty(CosmosDBConfigConstants.READER_BUFFER_SIZE, "10000") + props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000") + + + Then(s"Start the SourceConnector and return the taskConfigs") + // Declare a collection to store the messages from SourceRecord + val kafkaMessages = new util.ArrayList[KafkaPayloadTest] + + // Start CosmosDBSourceConnector and return the taskConfigs + val connector = new CosmosDBSourceConnector + connector.start(Maps.fromProperties(props)) + val taskConfigs = connector.taskConfigs(2) + + taskConfigs.forEach(config => { + When("CosmosSourceTask is started and poll is called") + + + val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]} + task.start(config) + + val sourceRecords = task.poll() + + Then("It returns a list of SourceRecords") + assert(sourceRecords != null) + val gson = new Gson() + sourceRecords.forEach(r => { + val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest]) + if (message.testID == testUUID) { + kafkaMessages.add(message) + } + }) + }) + } + + "CosmosDBSourceTask poll" should "Return a list of SourceRecords based on the batchSize" in { + Given("A set of SourceConnector properties") + val props: Properties = TestConfigurations.getSourceConnectorProperties() + props.setProperty(CosmosDBConfigConstants.READER_BUFFER_SIZE, "10000") + props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000") + + Then(s"Start the SourceConnector and return the taskConfigs") + // Declare a collection to store the messages from SourceRecord + val kafkaMessages = new util.ArrayList[KafkaPayloadTest] + + // Start CosmosDBSourceConnector and return the taskConfigs + val connector = new CosmosDBSourceConnector + connector.start(Maps.fromProperties(props)) + val taskConfigs = connector.taskConfigs(2) + val numWorkers = connector.getNumberOfWorkers() + taskConfigs.forEach(config => { + When("CosmosSourceTask is started and poll is called") + val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]} + task.start(config) + batchSize = config.get(CosmosDBConfigConstants.BATCH_SIZE).toInt + val sourceRecords = task.poll() + Then("It returns a list of SourceRecords") + assert(sourceRecords != null) + val gson = new Gson() + sourceRecords.forEach(r => { + val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest]) + if (message.testID == testUUID) { + kafkaMessages.add(message) + } + }) + }) + + Then(s"Make sure collection of messages is equal to ${batchSize * numWorkers}") + assert(kafkaMessages.size() == batchSize * numWorkers) + + + } + + "CosmosDBSourceTask poll" should "Return a list of SourceRecords based on the bufferSize" in { + Given("A set of SourceConnector properties") + val props: Properties = TestConfigurations.getSourceConnectorProperties() + props.setProperty(CosmosDBConfigConstants.BATCH_SIZE, NUM_DOCS.toString) + props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000") + + Then(s"Start the SourceConnector and return the taskConfigs") + // Declare a collection to store the messages from SourceRecord + val kafkaMessages = new util.ArrayList[KafkaPayloadTest] + + // Start CosmosDBSourceConnector and return the taskConfigs + val connector = new CosmosDBSourceConnector + connector.start(Maps.fromProperties(props)) + val taskConfigs = connector.taskConfigs(2) + val numWorkers = connector.getNumberOfWorkers() + taskConfigs.forEach(config => { + When("CosmosSourceTask is started and poll is called") + val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]} + task.start(config) + bufferSize = config.get(CosmosDBConfigConstants.READER_BUFFER_SIZE).toInt + val sourceRecords = task.poll() + Then("It returns a list of SourceRecords") + assert(sourceRecords != null) + val gson = new Gson() + sourceRecords.forEach(r => { + val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest]) + if (message.testID == testUUID) { + kafkaMessages.add(message) + } + }) + }) + + val minSize = (bufferSize * numWorkers) + val maxSize = ((bufferSize + DOC_SIZE) * numWorkers) + Then(s"Make sure number of bytes in the collection of messages is between ${minSize} and ${maxSize}") + assert(kafkaMessages.size() * DOC_SIZE >= minSize && kafkaMessages.size() * DOC_SIZE <= maxSize) + + } + + + private def mockDocuments(): ArrayList[CosmosDBDocumentTest] = { + val documents: ArrayList[CosmosDBDocumentTest] = new ArrayList[CosmosDBDocumentTest] + testUUID = randomUUID() + + for (i <- 1 to NUM_DOCS) { + val doc = new CosmosDBDocumentTest(i.toString, s"Message ${i}", testUUID) + documents.add(doc) + } + return documents + } + + + private def insertDocuments(cosmosDBProvider: CosmosDBProvider = CosmosDBProviderImpl) = { + + // Source Collection + val clientSettings = CosmosDBClientSettings( + TestConfigurations.ENDPOINT, + TestConfigurations.MASTER_KEY, + TestConfigurations.DATABASE, + TestConfigurations.SOURCE_COLLECTION, + ConnectionPolicy.GetDefault(), + ConsistencyLevel.Session + ) + //logger.info(""); + val client = Try(cosmosDBProvider.getClient(clientSettings)) match { + case Success(conn) => + logger.info("Connection to CosmosDB established.") + conn + case Failure(f) => throw new ConnectException(s"Couldn't connect to CosmosDB.", f) + } + + val gson: Gson = new Gson() + val upsertDocumentsOBs: util.ArrayList[Observable[ResourceResponse[Document]]] = new util.ArrayList[Observable[ResourceResponse[Document]]] + val completionLatch = new CountDownLatch(1) + val forcedScalaObservable: _root_.rx.lang.scala.Observable[ResourceResponse[Document]] = Observable.merge(upsertDocumentsOBs) + mockDocuments().forEach(record => { + val json = gson.toJson(record) + val document = new Document(json) + val obs = client.upsertDocument(CosmosDBProviderImpl.getCollectionLink(TestConfigurations.DATABASE, TestConfigurations.SOURCE_COLLECTION), document, null, false) + upsertDocumentsOBs.add(obs) + }) + + forcedScalaObservable + .map(r => r.getRequestCharge) + .reduce((sum, value) => sum + value) + .subscribe( + t => logger.info(s"upsertDocuments total RU charge is $t"), + e => { + logger.error(s"error upserting documents e:${e.getMessage()} stack:${e.getStackTrace().toString()}") + completionLatch.countDown() + }, + () => { + logger.info("upsertDocuments completed") + completionLatch.countDown() + } + ) + } +} diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnectorTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnectorTest.scala new file mode 100644 index 0000000..add7e90 --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceConnectorTest.scala @@ -0,0 +1,35 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +import com.google.common.collect.Maps +import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations} +import org.apache.kafka.connect.runtime.ConnectorConfig +import org.scalatest.{FlatSpec, GivenWhenThen} + +class CosmosDBSourceConnectorTest extends FlatSpec with GivenWhenThen { + "CosmosDBSourceConnector" should "Validate all input properties and generate right set of task config properties" in { + Given("Valid set of input properties") + val props = TestConfigurations.getSourceConnectorProperties() + val connector = new CosmosDBSourceConnector + When("Start and TaskConfig are called in right order") + connector.start(Maps.fromProperties(props)) + val taskConfigs = connector.taskConfigs(3) + val numWorkers = connector.getNumberOfWorkers + Then("The TaskConfigs have all the expected properties") + assert(taskConfigs.size() == numWorkers) + for (i <- 0 until numWorkers) { + val taskConfig: java.util.Map[String, String] = taskConfigs.get(i) + assert(taskConfig.containsKey(ConnectorConfig.NAME_CONFIG)) + assert(taskConfig.containsKey(ConnectorConfig.CONNECTOR_CLASS_CONFIG)) + assert(taskConfig.containsKey(ConnectorConfig.TASKS_MAX_CONFIG)) + assert(taskConfig.containsKey(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG)) + assert(taskConfig.containsKey(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG)) + assert(taskConfig.containsKey(CosmosDBConfigConstants.DATABASE_CONFIG)) + assert(taskConfig.containsKey(CosmosDBConfigConstants.COLLECTION_CONFIG)) + assert(taskConfig.containsKey(CosmosDBConfigConstants.TOPIC_CONFIG)) + Then("Validate assigned partition") + val partition = taskConfig.get(CosmosDBConfigConstants.ASSIGNED_PARTITIONS) + assert(partition.size == 1) + assert(partition == i.toString) + } + } +} diff --git a/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceTaskTest.scala b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceTaskTest.scala new file mode 100644 index 0000000..619e41c --- /dev/null +++ b/src/test/scala/com/microsoft/azure/cosmosdb/kafka/connect/source/CosmosDBSourceTaskTest.scala @@ -0,0 +1,230 @@ +package com.microsoft.azure.cosmosdb.kafka.connect.source + +import java.util +import java.util.UUID._ +import java.util.concurrent.{CountDownLatch, TimeUnit} +import java.util.{ArrayList, Properties, UUID} + +import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProvider, CosmosDBProviderImpl, MockCosmosDBProvider} +import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations.{DATABASE, ENDPOINT, MASTER_KEY} +import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants +import org.apache.kafka.connect.data.Schema +import org.apache.kafka.connect.sink.SinkRecord +import org.scalatest.{FlatSpec, GivenWhenThen} + +import scala.collection.JavaConverters._ +import scala.collection.mutable +import _root_.rx.Observable +import _root_.rx.lang.scala.JavaConversions._ +import com.google.common.collect.Maps +import com.google.gson.Gson +import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations} +import com.microsoft.azure.cosmosdb.kafka.connect.model.{CosmosDBDocumentTest, KafkaPayloadTest} +import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel, Document, ResourceResponse} +import com.typesafe.scalalogging.LazyLogging +import org.apache.kafka.connect.errors.ConnectException +import org.mockito.MockitoSugar.mock +import org.scalatest.{FlatSpec, GivenWhenThen} + +import scala.util.{Failure, Success, Try} + + +class CosmosDBSourceTaskTest extends FlatSpec with GivenWhenThen with LazyLogging { + + private val NUM_DOCS: Int = 20 + private val DOC_SIZE: Int = 313 + private var testUUID: UUID = null + private var batchSize = NUM_DOCS + private var bufferSize = batchSize * DOC_SIZE + + "CosmosDBSourceTask start" should "Initialize all properties" in { + Given("A list of properties for CosmosSourceTask") + val props = TestConfigurations.getSourceConnectorProperties() + // Add the assigned partitions + props.put(CosmosDBConfigConstants.ASSIGNED_PARTITIONS, "0,1") + + When("CosmosSourceTask is started") + val mockCosmosProvider = MockCosmosDBProvider + val task = new CosmosDBSourceTask { override val cosmosDBProvider = mockCosmosProvider } + task.start(Maps.fromProperties(props)) + + Then("CosmosSourceTask should properly initialized the readers") + val readers = task.getReaders() + readers.foreach(r => assert(r._1 == r._2.setting.assignedPartition)) + assert(readers.size == 2) + } + + "CosmosDBSourceTask poll" should "Return a list of SourceRecords with the right format" in { + Given("A set of SourceConnector properties") + val props: Properties = TestConfigurations.getSourceConnectorProperties() + props.setProperty(CosmosDBConfigConstants.BATCH_SIZE, NUM_DOCS.toString) + props.setProperty(CosmosDBConfigConstants.READER_BUFFER_SIZE, "10000") + props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000") + + + Then(s"Start the SourceConnector and return the taskConfigs") + // Declare a collection to store the messages from SourceRecord + val kafkaMessages = new util.ArrayList[KafkaPayloadTest] + + // Start CosmosDBSourceConnector and return the taskConfigs + val connector = new CosmosDBSourceConnector + connector.start(Maps.fromProperties(props)) + val taskConfigs = connector.taskConfigs(2) + + taskConfigs.forEach(config => { + When("CosmosSourceTask is started and poll is called") + + + val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]} + task.start(config) + + val sourceRecords = task.poll() + + Then("It returns a list of SourceRecords") + assert(sourceRecords != null) + val gson = new Gson() + sourceRecords.forEach(r => { + val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest]) + if (message.testID == testUUID) { + kafkaMessages.add(message) + } + }) + }) + } + + "CosmosDBSourceTask poll" should "Return a list of SourceRecords based on the batchSize" in { + Given("A set of SourceConnector properties") + val props: Properties = TestConfigurations.getSourceConnectorProperties() + props.setProperty(CosmosDBConfigConstants.READER_BUFFER_SIZE, "10000") + props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000") + + Then(s"Start the SourceConnector and return the taskConfigs") + // Declare a collection to store the messages from SourceRecord + val kafkaMessages = new util.ArrayList[KafkaPayloadTest] + + // Start CosmosDBSourceConnector and return the taskConfigs + val connector = new CosmosDBSourceConnector + connector.start(Maps.fromProperties(props)) + val taskConfigs = connector.taskConfigs(2) + val numWorkers = connector.getNumberOfWorkers() + taskConfigs.forEach(config => { + When("CosmosSourceTask is started and poll is called") + val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]} + task.start(config) + batchSize = config.get(CosmosDBConfigConstants.BATCH_SIZE).toInt + val sourceRecords = task.poll() + Then("It returns a list of SourceRecords") + assert(sourceRecords != null) + val gson = new Gson() + sourceRecords.forEach(r => { + val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest]) + if (message.testID == testUUID) { + kafkaMessages.add(message) + } + }) + }) + + Then(s"Make sure collection of messages is equal to ${batchSize * numWorkers}") + assert(kafkaMessages.size() == batchSize * numWorkers) + + + } + + "CosmosDBSourceTask poll" should "Return a list of SourceRecords based on the bufferSize" in { + Given("A set of SourceConnector properties") + val props: Properties = TestConfigurations.getSourceConnectorProperties() + props.setProperty(CosmosDBConfigConstants.BATCH_SIZE, NUM_DOCS.toString) + props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000") + + Then(s"Start the SourceConnector and return the taskConfigs") + // Declare a collection to store the messages from SourceRecord + val kafkaMessages = new util.ArrayList[KafkaPayloadTest] + + // Start CosmosDBSourceConnector and return the taskConfigs + val connector = new CosmosDBSourceConnector + connector.start(Maps.fromProperties(props)) + val taskConfigs = connector.taskConfigs(2) + val numWorkers = connector.getNumberOfWorkers() + taskConfigs.forEach(config => { + When("CosmosSourceTask is started and poll is called") + val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]} + task.start(config) + bufferSize = config.get(CosmosDBConfigConstants.READER_BUFFER_SIZE).toInt + val sourceRecords = task.poll() + Then("It returns a list of SourceRecords") + assert(sourceRecords != null) + val gson = new Gson() + sourceRecords.forEach(r => { + val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest]) + if (message.testID == testUUID) { + kafkaMessages.add(message) + } + }) + }) + + val minSize = (bufferSize * numWorkers) + val maxSize = ((bufferSize + DOC_SIZE) * numWorkers) + Then(s"Make sure number of bytes in the collection of messages is between ${minSize} and ${maxSize}") + assert(kafkaMessages.size() * DOC_SIZE >= minSize && kafkaMessages.size() * DOC_SIZE <= maxSize) + + } + + + private def mockDocuments(): ArrayList[CosmosDBDocumentTest] = { + val documents: ArrayList[CosmosDBDocumentTest] = new ArrayList[CosmosDBDocumentTest] + testUUID = randomUUID() + + for (i <- 1 to NUM_DOCS) { + val doc = new CosmosDBDocumentTest(i.toString, s"Message ${i}", testUUID) + documents.add(doc) + } + return documents + } + + + private def insertDocuments(cosmosDBProvider: CosmosDBProvider = CosmosDBProviderImpl) = { + + // Source Collection + val clientSettings = CosmosDBClientSettings( + TestConfigurations.ENDPOINT, + TestConfigurations.MASTER_KEY, + TestConfigurations.DATABASE, + TestConfigurations.SOURCE_COLLECTION, + ConnectionPolicy.GetDefault(), + ConsistencyLevel.Session + ) + //logger.info(""); + val client = Try(cosmosDBProvider.getClient(clientSettings)) match { + case Success(conn) => + logger.info("Connection to CosmosDB established.") + conn + case Failure(f) => throw new ConnectException(s"Couldn't connect to CosmosDB.", f) + } + + val gson: Gson = new Gson() + val upsertDocumentsOBs: util.ArrayList[Observable[ResourceResponse[Document]]] = new util.ArrayList[Observable[ResourceResponse[Document]]] + val completionLatch = new CountDownLatch(1) + val forcedScalaObservable: _root_.rx.lang.scala.Observable[ResourceResponse[Document]] = Observable.merge(upsertDocumentsOBs) + mockDocuments().forEach(record => { + val json = gson.toJson(record) + val document = new Document(json) + val obs = client.upsertDocument(CosmosDBProviderImpl.getCollectionLink(TestConfigurations.DATABASE, TestConfigurations.SOURCE_COLLECTION), document, null, false) + upsertDocumentsOBs.add(obs) + }) + + forcedScalaObservable + .map(r => r.getRequestCharge) + .reduce((sum, value) => sum + value) + .subscribe( + t => logger.info(s"upsertDocuments total RU charge is $t"), + e => { + logger.error(s"error upserting documents e:${e.getMessage()} stack:${e.getStackTrace().toString()}") + completionLatch.countDown() + }, + () => { + logger.info("upsertDocuments completed") + completionLatch.countDown() + } + ) + } +} \ No newline at end of file