moving old scala code to its own branch
This commit is contained in:
Родитель
7fcb06805e
Коммит
67f31ecada
|
@ -0,0 +1,3 @@
|
|||
azure-pipelines.yaml
|
||||
.git
|
||||
.vscode
|
|
@ -1,3 +1,85 @@
|
|||
.idea/
|
||||
/target/*
|
||||
/project/*
|
||||
|
||||
### Intellij ###
|
||||
|
||||
.idea/**
|
||||
|
||||
### mac ##
|
||||
.DS_Store
|
||||
C:/
|
||||
|
||||
# User-specific stuff
|
||||
.idea/**/workspace.xml
|
||||
.idea/**/tasks.xml
|
||||
.idea/**/usage.statistics.xml
|
||||
.idea/**/dictionaries
|
||||
.idea/**/shelf
|
||||
|
||||
# Generated files
|
||||
.idea/**/contentModel.xml
|
||||
|
||||
# Sensitive or high-churn files
|
||||
.idea/**/dataSources/
|
||||
.idea/**/dataSources.ids
|
||||
.idea/**/dataSources.local.xml
|
||||
.idea/**/sqlDataSources.xml
|
||||
.idea/**/dynamic.xml
|
||||
.idea/**/uiDesigner.xml
|
||||
.idea/**/dbnavigator.xml
|
||||
|
||||
# Gradle
|
||||
.idea/**/gradle.xml
|
||||
.idea/**/libraries
|
||||
|
||||
# Gradle and Maven with auto-import
|
||||
# When using Gradle or Maven with auto-import, you should exclude module files,
|
||||
# since they will be recreated, and may cause churn. Uncomment if using
|
||||
# auto-import.
|
||||
.idea/modules.xml
|
||||
.idea/*.iml
|
||||
.idea/modules
|
||||
.idea/misc.xml
|
||||
.idea/sbt.xml
|
||||
.idea/vcs.xml
|
||||
|
||||
# CMake
|
||||
cmake-build-*/
|
||||
|
||||
# Mongo Explorer plugin
|
||||
.idea/**/mongoSettings.xml
|
||||
|
||||
# File-based project format
|
||||
*.iws
|
||||
|
||||
# IntelliJ
|
||||
out/
|
||||
|
||||
# mpeltonen/sbt-idea plugin
|
||||
.idea_modules/
|
||||
|
||||
# JIRA plugin
|
||||
atlassian-ide-plugin.xml
|
||||
|
||||
# Created by https://www.gitignore.io/api/sbt,scala
|
||||
|
||||
### SBT ###
|
||||
# Simple Build Tool
|
||||
# http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control
|
||||
|
||||
dist/*
|
||||
target/
|
||||
*.iml
|
||||
lib_managed/
|
||||
src_managed/
|
||||
project/boot/
|
||||
project/plugins/project/
|
||||
.history
|
||||
.cache
|
||||
.lib/
|
||||
|
||||
### Scala ###
|
||||
*.class
|
||||
*.log
|
||||
|
||||
# End of https://www.gitignore.io/api/sbt,scala
|
||||
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
# ----- Base Java - Check Dependencies ----
|
||||
FROM openjdk:8u212-b04-jdk-stretch AS base
|
||||
# Env variables
|
||||
ENV SCALA_VERSION=2.12.8
|
||||
ENV SBT_VERSION=1.2.8
|
||||
ENV HOME=/app
|
||||
WORKDIR $HOME
|
||||
|
||||
# Install sbt
|
||||
RUN \
|
||||
curl -L -o sbt-$SBT_VERSION.deb https://dl.bintray.com/sbt/debian/sbt-$SBT_VERSION.deb && \
|
||||
dpkg -i sbt-$SBT_VERSION.deb && \
|
||||
rm sbt-$SBT_VERSION.deb && \
|
||||
apt-get update && \
|
||||
apt-get install sbt
|
||||
|
||||
#
|
||||
# ----Build the app ----
|
||||
FROM base AS build
|
||||
ADD . $HOME
|
||||
RUN sbt compile
|
||||
|
||||
#
|
||||
# ---- Publish the App ----
|
||||
FROM build AS release
|
||||
EXPOSE 8888
|
||||
CMD sbt run
|
||||
|
||||
|
40
README.md
40
README.md
|
@ -1,5 +1,41 @@
|
|||
# Kafka Connect for Azure Cosmos DB
|
||||
________________________
|
||||
|
||||
This connector is currently undergoing a major refresh.
|
||||
Stay tuned for new Java version targeted at Cosmos DB V4 Java SDK
|
||||
**Kafka Connect for Azure Cosmos DB** consists of 2 connectors -
|
||||
|
||||
A **Source Connector** which is used to pump data from [Azure Cosmos DB](https://azure.microsoft.com/services/cosmos-db//) via its Change Feed to [Apache Kafka](https://kafka.apache.org/).
|
||||
|
||||
A **Sink Connector** reads messages from Kafka and sends them to Cosmos DB.
|
||||
|
||||
## Contribute
|
||||
This project welcomes contributions, feedback and suggestions.
|
||||
If you would like to become a contributor to this project, please refer to our [Contribution Guide](CONTRIBUTING.MD).
|
||||
|
||||
## Setup
|
||||
|
||||
### [Source Connector](doc/README_Source.md)
|
||||
|
||||
### [Sink Connector](doc/README_Sink.md)
|
||||
|
||||
|
||||
## Configuration
|
||||
|
||||
|
||||
## References
|
||||
It is worth looking through this material to get better understanding of how Kafka Connect and these connectors work and how to use them.
|
||||
|
||||
[Kafka Connect](https://docs.confluent.io/current/connect/index.html)
|
||||
|
||||
[Kafka Connect Concepts](https://docs.confluent.io/current/connect/concepts.html)
|
||||
|
||||
[Installing and Configuring Kafka Connect](https://docs.confluent.io/current/connect/userguide.html)
|
||||
|
||||
[Tutorial: Moving Data In and Out of Kafka](https://docs.confluent.io/current/connect/quickstart.html)
|
||||
|
||||
It is also worth understanding how Cosmos DB and its Change Feed works
|
||||
|
||||
[Cosmos DB](https://docs.microsoft.com/en-us/azure/cosmos-db/introduction)
|
||||
|
||||
[Cosmos DB Change feed](https://docs.microsoft.com/azure/cosmos-db/change-feed)
|
||||
|
||||
[Cosmos DB Change feed processor](https://docs.microsoft.com/en-us/azure/cosmos-db/change-feed-processor)
|
|
@ -0,0 +1,38 @@
|
|||
# Starter pipeline
|
||||
# Start with a minimal pipeline that you can customize to build and deploy your code.
|
||||
# Add steps that build, run tests, deploy, and more:
|
||||
# https://aka.ms/yaml
|
||||
|
||||
trigger:
|
||||
- master
|
||||
|
||||
variables: # pipeline-level
|
||||
projName: 'kafka-connect-cosmosdb'
|
||||
topicName: '$(Build.SourceBranchName)'
|
||||
releaseversion: '$(Build.BuildNumber)'
|
||||
appName: 'kafkaconnectcosmosdb'
|
||||
|
||||
stages:
|
||||
- stage: Build_Container
|
||||
displayName: Build the App and publish it in Azure Container Registry
|
||||
jobs:
|
||||
- job: BuildJob
|
||||
pool:
|
||||
vmImage: 'ubuntu-latest'
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: 'docker build --target=build -t $(appName)acr.azurecr.io/$(appname):canary .'
|
||||
|
||||
- task: Bash@3
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: 'docker cp app/cosmosdbkafkaconnector.jar $(Build.ArtifactStagingDirectory)'
|
||||
|
||||
- task: PublishBuildArtifacts@1
|
||||
inputs:
|
||||
PathtoPublish: '$(Build.ArtifactStagingDirectory)'
|
||||
ArtifactName: 'drop'
|
||||
publishLocation: 'Container'
|
|
@ -0,0 +1,30 @@
|
|||
name := "com.microsoft.azure.cosmosdb.kafka.connect"
|
||||
organization := "com.microsoft.azure"
|
||||
version := "0.0.1-preview"
|
||||
scalaVersion := "2.12.8"
|
||||
|
||||
libraryDependencies += "com.microsoft.azure" % "azure-cosmosdb" % "2.4.4"
|
||||
|
||||
libraryDependencies += "javax.ws.rs" % "javax.ws.rs-api" % "2.1.1" artifacts Artifact("javax.ws.rs-api", "jar", "jar")
|
||||
libraryDependencies += "com.typesafe.scala-logging" %% "scala-logging" % "3.9.2"
|
||||
libraryDependencies += "ch.qos.logback" % "logback-classic" % "1.2.3"
|
||||
libraryDependencies += "com.google.code.gson" % "gson" % "2.8.5"
|
||||
libraryDependencies += "io.reactivex" %% "rxscala" % "0.26.5"
|
||||
libraryDependencies += "org.json4s" %% "json4s-jackson" % "3.5.0"
|
||||
libraryDependencies += "org.mockito" % "mockito-scala_2.12" % "1.5.11"
|
||||
|
||||
libraryDependencies += "org.apache.kafka" %% "kafka" % "2.2.0" % Compile classifier "test"
|
||||
libraryDependencies += "org.apache.kafka" %% "kafka" % "2.2.0" % Compile
|
||||
libraryDependencies += "org.apache.kafka" % "kafka-clients" % "2.2.0" % Compile classifier "test"
|
||||
libraryDependencies += "org.apache.kafka" % "kafka-clients" % "2.2.0" % Compile
|
||||
libraryDependencies += "org.apache.kafka" % "connect-api" % "2.2.0" % Compile
|
||||
libraryDependencies += "org.apache.kafka" % "connect-runtime" % "2.2.0" % Compile
|
||||
|
||||
trapExit := false
|
||||
fork in run := true
|
||||
|
||||
libraryDependencies += "org.scalactic" %% "scalactic" % "3.0.5"
|
||||
libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.7" % "test"
|
||||
libraryDependencies += "com.typesafe" % "config" % "1.3.3" % "test"
|
||||
|
||||
licenses += ("MIT", url("https://github.com/Microsoft/kafka-connect-cosmosdb/blob/master/LICENSE"))
|
87
pom.xml
87
pom.xml
|
@ -1,87 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.microsoft.azure</groupId>
|
||||
<artifactId>cosmosdb.kafka.connect</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
|
||||
<name>cosmosdb.kafka.connect</name>
|
||||
<!-- FIXME change it to the project's website -->
|
||||
<url>http://www.example.com</url>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<maven.compiler.source>1.8</maven.compiler.source>
|
||||
<maven.compiler.target>1.8</maven.compiler.target>
|
||||
<kafka.version>2.5.0</kafka.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.kafka</groupId>
|
||||
<artifactId>connect-api</artifactId>
|
||||
<version>${kafka.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.kafka</groupId>
|
||||
<artifactId>kafka-clients</artifactId>
|
||||
<version>${kafka.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.microsoft.azure</groupId>
|
||||
<artifactId>azure-cosmosdb</artifactId>
|
||||
<version>2.6.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.11</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
|
||||
<plugins>
|
||||
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
|
||||
<plugin>
|
||||
<artifactId>maven-clean-plugin</artifactId>
|
||||
<version>3.1.0</version>
|
||||
</plugin>
|
||||
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
|
||||
<plugin>
|
||||
<artifactId>maven-resources-plugin</artifactId>
|
||||
<version>3.0.2</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.8.0</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<version>3.0.2</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-install-plugin</artifactId>
|
||||
<version>2.5.2</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-deploy-plugin</artifactId>
|
||||
<version>2.8.2</version>
|
||||
</plugin>
|
||||
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
|
||||
<plugin>
|
||||
<artifactId>maven-site-plugin</artifactId>
|
||||
<version>3.7.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-project-info-reports-plugin</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
</build>
|
||||
</project>
|
|
@ -1,5 +0,0 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect;
|
||||
|
||||
public class CosmosDBProvider{
|
||||
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.sink;
|
||||
|
||||
import org.apache.kafka.common.config.ConfigDef;
|
||||
import org.apache.kafka.connect.connector.Task;
|
||||
import org.apache.kafka.connect.sink.SinkConnector;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class CosmosDBSinkConnector extends SinkConnector {
|
||||
|
||||
@Override
|
||||
public void start(Map<String, String> map) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Class<? extends Task> taskClass() {
|
||||
throw new IllegalStateException("Not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Map<String, String>> taskConfigs(int i) {
|
||||
throw new IllegalStateException("Not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stop() {
|
||||
throw new IllegalStateException("Not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ConfigDef config() {
|
||||
throw new IllegalStateException("Not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String version() {
|
||||
return this.getClass().getPackage().getImplementationVersion();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source;
|
||||
|
||||
import org.apache.kafka.common.config.ConfigDef;
|
||||
import org.apache.kafka.connect.connector.Task;
|
||||
import org.apache.kafka.connect.source.SourceConnector;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class CosmosDBSourceConnector extends SourceConnector {
|
||||
|
||||
@Override
|
||||
public void start(Map<String, String> map) {
|
||||
throw new IllegalStateException("Not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Class<? extends Task> taskClass() {
|
||||
throw new IllegalStateException("Not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Map<String, String>> taskConfigs(int i) {
|
||||
throw new IllegalStateException("Not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stop() {
|
||||
throw new IllegalStateException("Not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ConfigDef config() {
|
||||
throw new IllegalStateException("Not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String version() {
|
||||
return this.getClass().getPackage().getImplementationVersion();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
<configuration>
|
||||
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<encoder>
|
||||
<pattern>%d{HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<root level="error">
|
||||
<appender-ref ref="STDOUT" />
|
||||
</root>
|
||||
</configuration>
|
|
@ -0,0 +1,43 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfig, CosmosDBConfigConstants}
|
||||
import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel}
|
||||
|
||||
case class CosmosDBClientSettings(
|
||||
endpoint:String,
|
||||
masterkey:String,
|
||||
database:String,
|
||||
collection:String,
|
||||
connectionPolicy:ConnectionPolicy,
|
||||
consistencyLevel:ConsistencyLevel
|
||||
)
|
||||
|
||||
object CosmosDBClientSettings{
|
||||
def apply(config: CosmosDBConfig): CosmosDBClientSettings = {
|
||||
val endpoint:String = config.getString(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG)
|
||||
require(endpoint.trim.nonEmpty, s"Invalid value for ${CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG}")
|
||||
require(endpoint.startsWith("https://"), s"""Invalid value for ${CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG} - endpoint must start with "https://"""")
|
||||
|
||||
val masterKey:String = config.getPassword(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG).value()
|
||||
require(masterKey.trim.nonEmpty, s"Invalid value for ${CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG}")
|
||||
|
||||
val database:String = config.getString(CosmosDBConfigConstants.DATABASE_CONFIG)
|
||||
require(database.trim.nonEmpty, s"Invalid value for ${CosmosDBConfigConstants.DATABASE_CONFIG}")
|
||||
|
||||
val collection:String = config.getString(CosmosDBConfigConstants.COLLECTION_CONFIG)
|
||||
require(collection.trim.nonEmpty, s"Invalid value for ${CosmosDBConfigConstants.COLLECTION_CONFIG}")
|
||||
|
||||
//TODO: make this configurable
|
||||
val connectionPolicy = ConnectionPolicy.GetDefault()
|
||||
|
||||
//TODO: make this configurable
|
||||
val consistencyLevel = ConsistencyLevel.Session
|
||||
|
||||
new CosmosDBClientSettings(endpoint,
|
||||
masterKey,
|
||||
database,
|
||||
collection,
|
||||
connectionPolicy,
|
||||
consistencyLevel)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect
|
||||
|
||||
import java.util.concurrent.CountDownLatch
|
||||
import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient
|
||||
|
||||
trait CosmosDBProvider {
|
||||
def getClient(settings: CosmosDBClientSettings): AsyncDocumentClient
|
||||
def upsertDocuments[T](docs: scala.List[T], databaseName: String, collectionName: String, completionLatch: CountDownLatch): Unit
|
||||
}
|
|
@ -0,0 +1,249 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect
|
||||
|
||||
import java.util
|
||||
import java.util.List
|
||||
import java.util.concurrent.CountDownLatch
|
||||
|
||||
import _root_.rx.Observable
|
||||
import _root_.rx.lang.scala.JavaConversions._
|
||||
import com.microsoft.azure.cosmosdb._
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError
|
||||
import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient
|
||||
|
||||
import scala.util.{Failure, Success}
|
||||
|
||||
object CosmosDBProviderImpl extends HandleRetriableError with CosmosDBProvider {
|
||||
|
||||
private val requestOptionsInsert = new RequestOptions
|
||||
requestOptionsInsert.setConsistencyLevel(ConsistencyLevel.Session)
|
||||
|
||||
initializeErrorHandler(2)
|
||||
|
||||
var client: AsyncDocumentClient = _
|
||||
|
||||
def getClient(settings: CosmosDBClientSettings): AsyncDocumentClient = synchronized {
|
||||
if (client == null) {
|
||||
client = new AsyncDocumentClient.Builder()
|
||||
.withServiceEndpoint(settings.endpoint)
|
||||
.withMasterKeyOrResourceToken(settings.masterkey)
|
||||
.withConnectionPolicy(settings.connectionPolicy)
|
||||
.withConsistencyLevel(settings.consistencyLevel)
|
||||
.build()
|
||||
}
|
||||
|
||||
client
|
||||
}
|
||||
|
||||
def getCollectionLink(databaseName: String, collectionName: String) = "/dbs/%s/colls/%s".format(databaseName, collectionName)
|
||||
|
||||
def createDatabaseIfNotExists(databaseName: String): Unit = {
|
||||
|
||||
if (!isDatabaseExists(databaseName)) {
|
||||
val dbDefinition = new Database()
|
||||
dbDefinition.setId(databaseName)
|
||||
|
||||
logger.info(s"Creating Database $databaseName")
|
||||
|
||||
client.createDatabase(dbDefinition, null).toCompletable.await()
|
||||
}
|
||||
}
|
||||
|
||||
def createCollectionIfNotExists(databaseName: String, collectionName: String): Unit = {
|
||||
if (!isCollectionExists(databaseName, collectionName)) {
|
||||
val dbLnk = String.format("/dbs/%s", databaseName)
|
||||
val collDefinition = new DocumentCollection
|
||||
collDefinition.setId(collectionName)
|
||||
|
||||
logger.info(s"Creating Collection $collectionName")
|
||||
|
||||
client.createCollection(dbLnk, collDefinition, null).toCompletable.await()
|
||||
}
|
||||
}
|
||||
|
||||
def isDatabaseExists(databaseName: String): Boolean = {
|
||||
val databaseLink = s"/dbs/$databaseName"
|
||||
val databaseReadObs = client.readDatabase(databaseLink, null)
|
||||
var isDatabaseExists = false
|
||||
|
||||
val db = databaseReadObs
|
||||
.doOnNext((x: ResourceResponse[Database]) => {
|
||||
def foundDataBase(x: ResourceResponse[Database]): Unit = {
|
||||
logger.info(s"Database $databaseName already exists.")
|
||||
isDatabaseExists = true
|
||||
}
|
||||
|
||||
foundDataBase(x)
|
||||
})
|
||||
.onErrorResumeNext((e: Throwable) => {
|
||||
def tryCreateDatabaseOnError(e: Throwable) = {
|
||||
e match {
|
||||
case de: DocumentClientException =>
|
||||
if (de.getStatusCode == 404) {
|
||||
logger.info(s"Database $databaseName does not exist")
|
||||
isDatabaseExists = false
|
||||
}
|
||||
}
|
||||
Observable.empty()
|
||||
}
|
||||
|
||||
tryCreateDatabaseOnError(e)
|
||||
})
|
||||
|
||||
db.toCompletable.await()
|
||||
|
||||
isDatabaseExists
|
||||
}
|
||||
|
||||
def isCollectionExists(databaseName: String, collectionName: String): Boolean = {
|
||||
|
||||
var isCollectionExists = false
|
||||
val dbLnk = s"/dbs/$databaseName"
|
||||
val params = new SqlParameterCollection(new SqlParameter("@id", collectionName))
|
||||
|
||||
val qry = new SqlQuerySpec("SELECT * FROM r where r.id = @id", params)
|
||||
|
||||
client.queryCollections(dbLnk, qry, null).single.flatMap(page => {
|
||||
def foundCollection(page: FeedResponse[DocumentCollection]) = {
|
||||
isCollectionExists = !page.getResults.isEmpty
|
||||
Observable.empty
|
||||
}
|
||||
|
||||
foundCollection(page)
|
||||
}).toCompletable.await()
|
||||
|
||||
isCollectionExists
|
||||
}
|
||||
|
||||
def close(): Unit = {
|
||||
client.close()
|
||||
}
|
||||
|
||||
def readChangeFeed(databaseName: String, collectionName: String): Unit = {
|
||||
//TODO: call Allan's ChangeFeedProcessor here
|
||||
//TODO: ultimately replace Allan's ChangeFeedProcessor with the PG one
|
||||
}
|
||||
|
||||
def createDocuments[T](docs: scala.List[T], databaseName: String, collectionName: String, completionLatch: CountDownLatch): Unit = {
|
||||
val colLnk = s"/dbs/$databaseName/colls/$collectionName"
|
||||
val createDocumentsOBs: List[Observable[ResourceResponse[Document]]] = new util.ArrayList[Observable[ResourceResponse[Document]]]
|
||||
|
||||
docs.foreach(f = t => {
|
||||
val obs = client.createDocument(colLnk, t, null, false)
|
||||
createDocumentsOBs.add(obs)
|
||||
})
|
||||
|
||||
val forcedScalaObservable: _root_.rx.lang.scala.Observable[ResourceResponse[Document]] = Observable.merge(createDocumentsOBs)
|
||||
|
||||
forcedScalaObservable
|
||||
.map(r => r.getRequestCharge)
|
||||
.reduce((sum, value) => sum + value)
|
||||
.subscribe(
|
||||
t => {
|
||||
logger.debug(s"createDocuments total RU charge is $t")
|
||||
HandleRetriableError(Success())
|
||||
},
|
||||
e => {
|
||||
logger.debug(s"error creating documents e:${e.getMessage()} stack:${e.getStackTrace().toString()}")
|
||||
HandleRetriableError(Failure(e))
|
||||
completionLatch.countDown()
|
||||
},
|
||||
() => {
|
||||
logger.info("createDocuments completed")
|
||||
completionLatch.countDown()
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
def upsertDocuments[T](docs: scala.List[T], databaseName: String, collectionName: String, completionLatch: CountDownLatch): Unit = {
|
||||
val colLnk = s"/dbs/$databaseName/colls/$collectionName"
|
||||
val upsertDocumentsOBs: List[Observable[ResourceResponse[Document]]] = new util.ArrayList[Observable[ResourceResponse[Document]]]
|
||||
|
||||
docs.foreach(f = t => {
|
||||
val obs = client.upsertDocument(colLnk, t, null, false)
|
||||
upsertDocumentsOBs.add(obs)
|
||||
})
|
||||
|
||||
val forcedScalaObservable: _root_.rx.lang.scala.Observable[ResourceResponse[Document]] = Observable.merge(upsertDocumentsOBs)
|
||||
|
||||
forcedScalaObservable
|
||||
.map(r => r.getRequestCharge)
|
||||
.reduce((sum, value) => sum + value)
|
||||
.subscribe(
|
||||
t => {
|
||||
logger.debug(s"upsertDocuments total RU charge is $t")
|
||||
HandleRetriableError(Success())
|
||||
},
|
||||
e => {
|
||||
logger.debug(s"error upserting documents e:${e.getMessage()} stack:${e.getStackTrace().toString()}")
|
||||
HandleRetriableError(Failure(e))
|
||||
completionLatch.countDown()
|
||||
},
|
||||
() => {
|
||||
logger.info("upsertDocuments completed")
|
||||
completionLatch.countDown()
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
|
||||
def readCollection(databaseName: String, collectionName: String, completionLatch: CountDownLatch): _root_.rx.lang.scala.Observable[ResourceResponse[DocumentCollection]]= { // Create a Collection
|
||||
val colLnk = s"/dbs/$databaseName/colls/$collectionName"
|
||||
logger.info("reading collection " + colLnk)
|
||||
|
||||
val readDocumentsOBs = client.readCollection(colLnk, null)
|
||||
val forcedScalaObservable: _root_.rx.lang.scala.Observable[ResourceResponse[DocumentCollection]] = readDocumentsOBs
|
||||
|
||||
forcedScalaObservable
|
||||
.subscribe(
|
||||
t => {
|
||||
logger.debug(s"activityId" + t.getActivityId + s"id" + t.getResource.getId)
|
||||
HandleRetriableError(Success())
|
||||
},
|
||||
e => {
|
||||
logger.debug(s"error reading document collection e:${e.getMessage()} stack:${e.getStackTrace().toString()}")
|
||||
HandleRetriableError(Failure(e))
|
||||
completionLatch.countDown()
|
||||
},
|
||||
() => {
|
||||
logger.info("readDocuments completed")
|
||||
completionLatch.countDown()
|
||||
})
|
||||
return forcedScalaObservable
|
||||
|
||||
}
|
||||
|
||||
|
||||
def queryCollection(databaseName: String, collectionName: String, completionLatch: CountDownLatch): _root_.rx.lang.scala.Observable[FeedResponse[DocumentCollection]]= { // Create a Collection
|
||||
val colLnk = s"/dbs/$databaseName/colls/$collectionName"
|
||||
val dbLink = s"/dbs/$databaseName"
|
||||
logger.info("reading collection " + colLnk)
|
||||
|
||||
//val query = "SELECT * from c"
|
||||
val query = String.format("SELECT * from c where c.id = '%s'", collectionName)
|
||||
val options = new FeedOptions
|
||||
options.setMaxItemCount(2)
|
||||
|
||||
val queryCollectionObservable = client.queryCollections(dbLink, query, options)
|
||||
|
||||
val forcedScalaObservable: _root_.rx.lang.scala.Observable[FeedResponse[DocumentCollection]] = queryCollectionObservable
|
||||
|
||||
forcedScalaObservable
|
||||
.subscribe(
|
||||
t => {
|
||||
logger.debug(s"activityId" + t.getActivityId + s"id" + t.getResults.toString)
|
||||
HandleRetriableError(Success())
|
||||
},
|
||||
e => {
|
||||
logger.debug(s"error reading document collection e:${e.getMessage()} stack:${e.getStackTrace().toString()}")
|
||||
HandleRetriableError(Failure(e))
|
||||
completionLatch.countDown()
|
||||
},
|
||||
() => {
|
||||
logger.debug("readDocuments completed")
|
||||
completionLatch.countDown()
|
||||
})
|
||||
return forcedScalaObservable
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect
|
||||
|
||||
import java.util.concurrent.CountDownLatch
|
||||
|
||||
import com.microsoft.azure.cosmosdb._
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfig, CosmosDBConfigConstants}
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
// TODO: Please follow getter and setter model
|
||||
// Otherwise document create fails
|
||||
class SampleDoc() {
|
||||
private var name = ""
|
||||
private var age = 0
|
||||
}
|
||||
|
||||
object Runner extends App{
|
||||
|
||||
val connectionPolicy=new ConnectionPolicy();
|
||||
connectionPolicy.setConnectionMode(ConnectionMode.Direct)
|
||||
connectionPolicy.setMaxPoolSize(600)
|
||||
|
||||
val consistencyLevel = ConsistencyLevel.Session
|
||||
|
||||
val cosmosDBClientSettings=CosmosDBClientSettings(
|
||||
endpoint = "test",
|
||||
masterkey = "test",
|
||||
database = "test",
|
||||
collection = "test",
|
||||
connectionPolicy = connectionPolicy,
|
||||
consistencyLevel = consistencyLevel)
|
||||
|
||||
val client = CosmosDBProviderImpl.getClient(cosmosDBClientSettings)
|
||||
|
||||
CosmosDBProviderImpl.createDatabaseIfNotExists("test8")
|
||||
|
||||
CosmosDBProviderImpl.createCollectionIfNotExists("test8","collection")
|
||||
|
||||
val sampleDoc = new SampleDoc()
|
||||
val docs=List[SampleDoc](sampleDoc)
|
||||
|
||||
CosmosDBProviderImpl.createDocuments[SampleDoc](docs,"test8","collection", new CountDownLatch(1))
|
||||
|
||||
println("End of the Runner.")
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler
|
||||
|
||||
import java.util.Date
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants
|
||||
import com.typesafe.scalalogging.StrictLogging
|
||||
import org.apache.kafka.connect.errors.{ConnectException, RetriableException}
|
||||
|
||||
import scala.util.{Failure, Success, Try}
|
||||
|
||||
|
||||
case class ErrorHandlerObj(remainingRetries: Int, maxRetries: Int, errorMessage: String, lastErrorTimestamp: Date)
|
||||
|
||||
|
||||
trait HandleRetriableError extends StrictLogging{
|
||||
|
||||
var errorHandlerObj: Option[ErrorHandlerObj] = None
|
||||
private var maxRetriesDefault = CosmosDBConfigConstants.ERROR_MAX_RETRIES_DEFAULT
|
||||
|
||||
|
||||
def initializeErrorHandler(maxRetries: Int): Unit = {
|
||||
errorHandlerObj = Some(ErrorHandlerObj(maxRetries, maxRetries, "", new Date()))
|
||||
}
|
||||
|
||||
def HandleRetriableError[A](t : Try[A]) : Option[A] = {
|
||||
if(!errorHandlerObj.isDefined) {
|
||||
logger.info(s"HandleRetriableError not initialized, getting max retries value")
|
||||
maxRetriesDefault = CosmosDBConfigConstants.ERROR_MAX_RETRIES_DEFAULT
|
||||
initializeErrorHandler(maxRetriesDefault)
|
||||
}
|
||||
t
|
||||
match {
|
||||
case Success(s) => {
|
||||
//in case we had previous errors.
|
||||
if (errorHandlerObj.get.remainingRetries != errorHandlerObj.get.maxRetries) {
|
||||
logger.info(s"Message retry is successful.")
|
||||
}
|
||||
//reset ErrorHandlerObj
|
||||
resetErrorHandlerObj()
|
||||
Some(s)
|
||||
}
|
||||
case Failure(f) =>
|
||||
|
||||
//decrement the retry count
|
||||
logger.error(s"Encountered error ${f.getMessage}", f)
|
||||
this.errorHandlerObj = Some(decrementErrorHandlerRetries(errorHandlerObj.get, f.getMessage))
|
||||
//handle policy error
|
||||
handleError(f, errorHandlerObj.get.remainingRetries, errorHandlerObj.get.maxRetries)
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
def resetErrorHandlerObj() = {
|
||||
errorHandlerObj = Some(ErrorHandlerObj(errorHandlerObj.get.maxRetries, errorHandlerObj.get.maxRetries, "", new Date()))
|
||||
}
|
||||
|
||||
private def decrementErrorHandlerRetries(errorHandlerObj: ErrorHandlerObj, msg: String): ErrorHandlerObj = {
|
||||
if (errorHandlerObj.maxRetries == -1) {
|
||||
ErrorHandlerObj(errorHandlerObj.remainingRetries, errorHandlerObj.maxRetries, msg, new Date())
|
||||
} else {
|
||||
ErrorHandlerObj(errorHandlerObj.remainingRetries - 1, errorHandlerObj.maxRetries, msg, new Date())
|
||||
}
|
||||
}
|
||||
|
||||
private def handleError(error: Throwable, retryCount: Int, maxRetries: Int) = {
|
||||
|
||||
//throw connectException
|
||||
if (maxRetries > 0 && retryCount == 0) {
|
||||
throw new ConnectException(error)
|
||||
}
|
||||
else {
|
||||
logger.warn(s"Error policy set to RETRY. Remaining attempts $retryCount")
|
||||
throw new RetriableException(error)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.config
|
||||
|
||||
import java.util
|
||||
|
||||
import org.apache.kafka.common.config.ConfigDef.{Importance, Type, Width}
|
||||
import org.apache.kafka.common.config.{AbstractConfig, ConfigDef}
|
||||
|
||||
object ConnectorConfig {
|
||||
lazy val baseConfigDef: ConfigDef = new ConfigDef()
|
||||
.define(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG, Type.STRING, Importance.HIGH,
|
||||
CosmosDBConfigConstants.CONNECTION_ENDPOINT_DOC, "Connection", 1, Width.LONG,
|
||||
CosmosDBConfigConstants.CONNECTION_ENDPOINT_DISPLAY)
|
||||
|
||||
.define(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG, Type.PASSWORD, Importance.HIGH,
|
||||
CosmosDBConfigConstants.CONNECTION_MASTERKEY_DOC, "Connection", 2, Width.LONG,
|
||||
CosmosDBConfigConstants.CONNECTION_MASTERKEY_DISPLAY)
|
||||
|
||||
.define(CosmosDBConfigConstants.DATABASE_CONFIG, Type.STRING, Importance.HIGH,
|
||||
CosmosDBConfigConstants.DATABASE_CONFIG_DOC, "Database", 1, Width.MEDIUM,
|
||||
CosmosDBConfigConstants.DATABASE_CONFIG_DISPLAY)
|
||||
|
||||
.define(CosmosDBConfigConstants.COLLECTION_CONFIG, Type.STRING, Importance.HIGH,
|
||||
CosmosDBConfigConstants.COLLECTION_CONFIG_DOC, "Collection", 1, Width.MEDIUM,
|
||||
CosmosDBConfigConstants.COLLECTION_CONFIG_DISPLAY)
|
||||
|
||||
.define(CosmosDBConfigConstants.TOPIC_CONFIG, Type.STRING, Importance.HIGH,
|
||||
CosmosDBConfigConstants.TOPIC_CONFIG_DOC, "Topic", 1, Width.MEDIUM,
|
||||
CosmosDBConfigConstants.TOPIC_CONFIG_DISPLAY)
|
||||
|
||||
.define(CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_CONFIG, Type.INT, CosmosDBConfigConstants.ERROR_MAX_RETRIES_DEFAULT, Importance.MEDIUM,
|
||||
CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_DOC, "Common", 1,
|
||||
Width.MEDIUM , CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_DISPLAY)
|
||||
|
||||
|
||||
/**
|
||||
* Holds the extra configurations for the source on top of
|
||||
* the base.
|
||||
**/
|
||||
lazy val sourceConfigDef: ConfigDef = ConnectorConfig.baseConfigDef
|
||||
.define(CosmosDBConfigConstants.ASSIGNED_PARTITIONS, Type.STRING, "", Importance.HIGH,
|
||||
CosmosDBConfigConstants.ASSIGNED_PARTITIONS_DOC, "Source", 1, Width.MEDIUM,
|
||||
CosmosDBConfigConstants.ASSIGNED_PARTITIONS_DISPLAY)
|
||||
.define(CosmosDBConfigConstants.READER_BUFFER_SIZE, Type.INT, CosmosDBConfigConstants.READER_BUFFER_SIZE_DEFAULT, Importance.MEDIUM,
|
||||
CosmosDBConfigConstants.READER_BUFFER_SIZE_DOC, "Source", 2, Width.LONG,
|
||||
CosmosDBConfigConstants.READER_BUFFER_SIZE_DISPLAY)
|
||||
.define(CosmosDBConfigConstants.BATCH_SIZE, Type.INT, CosmosDBConfigConstants.BATCH_SIZE_DEFAULT, Importance.MEDIUM,
|
||||
CosmosDBConfigConstants.BATCH_SIZE_DOC, "Source", 3, Width.LONG,
|
||||
CosmosDBConfigConstants.BATCH_SIZE_DISPLAY)
|
||||
.define(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR, Type.STRING, CosmosDBConfigConstants.SOURCE_POST_PROCESSOR_DEFAULT, Importance.MEDIUM,
|
||||
CosmosDBConfigConstants.SOURCE_POST_PROCESSOR_DOC, "Source", 4, Width.LONG,
|
||||
CosmosDBConfigConstants.SOURCE_POST_PROCESSOR_DISPLAY)
|
||||
.define(CosmosDBConfigConstants.TIMEOUT, Type.INT, CosmosDBConfigConstants.TIMEOUT_DEFAULT, Importance.MEDIUM,
|
||||
CosmosDBConfigConstants.TIMEOUT_DOC, "Source", 4, Width.LONG,
|
||||
CosmosDBConfigConstants.TIMEOUT_DISPLAY)
|
||||
|
||||
/**
|
||||
* Holds the extra configurations for the sink on top of
|
||||
* the base.
|
||||
**/
|
||||
|
||||
lazy val sinkConfigDef: ConfigDef = ConnectorConfig.baseConfigDef
|
||||
.define(CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG, Type.STRING, Importance.HIGH,
|
||||
CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG_DOC, "Map", 1, Width.MEDIUM,
|
||||
CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG_DISPLAY)
|
||||
.define(CosmosDBConfigConstants.SINK_POST_PROCESSOR, Type.STRING, CosmosDBConfigConstants.SINK_POST_PROCESSOR_DEFAULT, Importance.MEDIUM,
|
||||
CosmosDBConfigConstants.SINK_POST_PROCESSOR_DOC, "Sink", 1, Width.LONG,
|
||||
CosmosDBConfigConstants.SINK_POST_PROCESSOR_DISPLAY)
|
||||
// .define(CosmosDBConfigConstants.EXTRA_SINK_CONFIG_01, Type.STRING, Importance.HIGH,
|
||||
// CosmosDBConfigConstants.EXTRA_SINK_CONFIG_01_DOC, "Sink", 1, Width.MEDIUM,
|
||||
// CosmosDBConfigConstants.EXTRA_SINK_CONFIG_01_DISPLAY)
|
||||
// .define(CosmosDBConfigConstants.EXTRA_SINK_CONFIG_02, Type.STRING, Importance.HIGH,
|
||||
// CosmosDBConfigConstants.EXTRA_SINK_CONFIG_02_DOC, "Sink", 2, Width.MEDIUM,
|
||||
// CosmosDBConfigConstants.EXTRA_SINK_CONFIG_02_DISPLAY)
|
||||
|
||||
lazy val commonConfigDef: ConfigDef = ConnectorConfig.baseConfigDef
|
||||
.define(CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_CONFIG, Type.INT, CosmosDBConfigConstants.ERROR_MAX_RETRIES_DEFAULT, Importance.MEDIUM,
|
||||
CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_DOC, "Common", 1,
|
||||
Width.MEDIUM , CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_DISPLAY)
|
||||
|
||||
}
|
||||
|
||||
case class CosmosDBConfig(config: ConfigDef, props: util.Map[String, String])
|
||||
extends AbstractConfig(config, props)
|
|
@ -0,0 +1,70 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.config
|
||||
|
||||
object CosmosDBConfigConstants {
|
||||
val CONNECTOR_PREFIX = "connect.cosmosdb"
|
||||
|
||||
val CONNECTION_ENDPOINT_CONFIG = s"$CONNECTOR_PREFIX.connection.endpoint"
|
||||
val CONNECTION_ENDPOINT_DOC = "The Cosmos DB endpoint."
|
||||
val CONNECTION_ENDPOINT_DISPLAY = "Endpoint"
|
||||
|
||||
val CONNECTION_MASTERKEY_CONFIG = s"$CONNECTOR_PREFIX.master.key"
|
||||
val CONNECTION_MASTERKEY_DOC = "The connection master key."
|
||||
val CONNECTION_MASTERKEY_DISPLAY = "Master Key"
|
||||
|
||||
val DATABASE_CONFIG = s"$CONNECTOR_PREFIX.database"
|
||||
val DATABASE_CONFIG_DISPLAY = "Database Name."
|
||||
val DATABASE_CONFIG_DOC = "The Cosmos DB target database."
|
||||
|
||||
val COLLECTION_CONFIG = s"$CONNECTOR_PREFIX.collections"
|
||||
val COLLECTION_CONFIG_DISPLAY = "Collection Names List."
|
||||
val COLLECTION_CONFIG_DOC = "A comma delimited list of target collection names."
|
||||
|
||||
val TOPIC_CONFIG = s"$CONNECTOR_PREFIX.topic.name"
|
||||
val TOPIC_CONFIG_DISPLAY = "Topic Names List."
|
||||
val TOPIC_CONFIG_DOC = "A comma delimited list of target Kafka Topics."
|
||||
|
||||
val COLLECTION_TOPIC_MAP_CONFIG = s"$CONNECTOR_PREFIX.collections.topicmap"
|
||||
val COLLECTION_TOPIC_MAP_CONFIG_DISPLAY = "Collection Topic Map."
|
||||
val COLLECTION_TOPIC_MAP_CONFIG_DOC = "A comma delimited list of collections mapped to their partitions. Formatted coll1#topic1,coll2#topic2."
|
||||
|
||||
//for the source task, the connector will set this for the each source task
|
||||
val ASSIGNED_PARTITIONS = s"$CONNECTOR_PREFIX.assigned.partitions"
|
||||
val ASSIGNED_PARTITIONS_DOC = "The CosmosDB partitions a task has been assigned."
|
||||
val ASSIGNED_PARTITIONS_DISPLAY = "Assigned Partitions."
|
||||
|
||||
val BATCH_SIZE = s"$CONNECTOR_PREFIX.task.batch.size"
|
||||
val BATCH_SIZE_DISPLAY = "Batch Size."
|
||||
val BATCH_SIZE_DOC = "The max number of of documents the source task will buffer before send them to Kafka."
|
||||
val BATCH_SIZE_DEFAULT = 100
|
||||
|
||||
val READER_BUFFER_SIZE = s"$CONNECTOR_PREFIX.task.buffer.size"
|
||||
val READER_BUFFER_SIZE_DISPLAY = "Reader Buffer Size."
|
||||
val READER_BUFFER_SIZE_DOC = "The max size the collection of documents the source task will buffer before send them to Kafka."
|
||||
val READER_BUFFER_SIZE_DEFAULT = 10000
|
||||
|
||||
val SOURCE_POST_PROCESSOR = s"$CONNECTOR_PREFIX.source.post-processor"
|
||||
val SOURCE_POST_PROCESSOR_DISPLAY = "Source Post-Processor List"
|
||||
val SOURCE_POST_PROCESSOR_DOC = "Comma-separated list of Source Post-Processor class names to use for post-processing"
|
||||
val SOURCE_POST_PROCESSOR_DEFAULT = ""
|
||||
|
||||
val SINK_POST_PROCESSOR = s"$CONNECTOR_PREFIX.sink.post-processor"
|
||||
val SINK_POST_PROCESSOR_DISPLAY = "Sink Post-Processor List"
|
||||
val SINK_POST_PROCESSOR_DOC = "Comma-separated list of Source Post-Processor class names to use for post-processing"
|
||||
val SINK_POST_PROCESSOR_DEFAULT = ""
|
||||
|
||||
val DEFAULT_POLL_INTERVAL = 1000
|
||||
|
||||
val ERRORS_RETRY_TIMEOUT_CONFIG = "errors.retry.timeout"
|
||||
val ERROR_MAX_RETRIES_DEFAULT = 3
|
||||
val ERRORS_RETRY_TIMEOUT_DISPLAY = "Retry Timeout for Errors"
|
||||
val ERRORS_RETRY_TIMEOUT_DOC = "The maximum duration in milliseconds that a failed operation " +
|
||||
"will be reattempted. The default is 0, which means no retries will be attempted. Use -1 for infinite retries.";
|
||||
|
||||
val TIMEOUT = s"$CONNECTOR_PREFIX.task.timeout"
|
||||
val TIMEOUT_DISPLAY = "Timeout."
|
||||
val TIMEOUT_DOC = "The max number of milliseconds the source task will use to read documents before send them to Kafka."
|
||||
val TIMEOUT_DEFAULT = 5000
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.kafka
|
||||
|
||||
|
||||
import org.apache.kafka.common.utils.SystemTime
|
||||
import org.apache.kafka.common.utils.Time
|
||||
import org.apache.kafka.common.utils.Utils
|
||||
import org.apache.kafka.connect.runtime.{ConnectorConfig, Herder, Worker}
|
||||
import org.apache.kafka.connect.runtime.distributed.DistributedConfig
|
||||
import org.apache.kafka.connect.runtime.distributed.DistributedHerder
|
||||
import org.apache.kafka.connect.runtime.rest.entities.ConnectorInfo
|
||||
import org.apache.kafka.connect.storage._
|
||||
import org.apache.kafka.connect.util.FutureCallback
|
||||
import java.util.Properties
|
||||
import java.util.UUID
|
||||
import java.util.concurrent.CountDownLatch
|
||||
import java.util.concurrent.ExecutionException
|
||||
import java.util.concurrent.TimeUnit
|
||||
import java.util.concurrent.TimeoutException
|
||||
import java.util.concurrent.atomic.AtomicBoolean
|
||||
import scala.collection.JavaConversions._
|
||||
import com.typesafe.scalalogging.StrictLogging
|
||||
import org.apache.kafka.connect.runtime.isolation.Plugins
|
||||
|
||||
/**
|
||||
* Embedded Kafka Connect server as per KIP-26
|
||||
*/
|
||||
case class EmbeddedConnect(workerConfig: Properties, connectorConfigs: List[Properties]) extends StrictLogging {
|
||||
|
||||
private val REQUEST_TIMEOUT_MS = 120000
|
||||
private val startLatch: CountDownLatch = new CountDownLatch(1)
|
||||
private val shutdown: AtomicBoolean = new AtomicBoolean(false)
|
||||
private val stopLatch: CountDownLatch = new CountDownLatch(1)
|
||||
|
||||
private var worker: Worker = _
|
||||
private var herder: DistributedHerder = _
|
||||
|
||||
// ConnectEmbedded - throws Exception
|
||||
val time: Time = new SystemTime()
|
||||
val config: DistributedConfig = new DistributedConfig(Utils.propsToStringMap(workerConfig))
|
||||
|
||||
val offsetBackingStore: KafkaOffsetBackingStore = new KafkaOffsetBackingStore()
|
||||
offsetBackingStore.configure(config)
|
||||
//not sure if this is going to work but because we don't have advertised url we can get at least a fairly random
|
||||
val workerId: String = UUID.randomUUID().toString
|
||||
println("---> " + config.toString)
|
||||
worker = new Worker(workerId, time, new Plugins(Map.empty[String, String]), config, offsetBackingStore)
|
||||
|
||||
val statusBackingStore: StatusBackingStore = new KafkaStatusBackingStore(time, worker.getInternalValueConverter)
|
||||
statusBackingStore.configure(config)
|
||||
|
||||
val configBackingStore: ConfigBackingStore = new KafkaConfigBackingStore(worker.getInternalValueConverter, config, worker.configTransformer())
|
||||
|
||||
//advertisedUrl = "" as we don't have the rest server - hopefully this will not break anything
|
||||
herder = new DistributedHerder(config, time, worker, "KafkaCluster1",statusBackingStore, configBackingStore, "")
|
||||
|
||||
def start(): Unit = {
|
||||
try {
|
||||
logger.info("Kafka ConnectEmbedded starting")
|
||||
|
||||
sys.ShutdownHookThread {
|
||||
logger.info("exiting")
|
||||
try {
|
||||
startLatch.await()
|
||||
EmbeddedConnect.this.stop()
|
||||
} catch {
|
||||
case e: InterruptedException => logger.error("Interrupted in shutdown hook while waiting for Kafka Connect startup to finish");
|
||||
}
|
||||
}
|
||||
worker.start()
|
||||
herder.start()
|
||||
|
||||
logger.info("Kafka ConnectEmbedded started")
|
||||
|
||||
connectorConfigs.foreach { connectorConfig: Properties =>
|
||||
val callback = new FutureCallback[Herder.Created[ConnectorInfo]]()
|
||||
val name = connectorConfig.getProperty(ConnectorConfig.NAME_CONFIG)
|
||||
herder.putConnectorConfig(name, Utils.propsToStringMap(connectorConfig), true, callback)
|
||||
callback.get(REQUEST_TIMEOUT_MS, TimeUnit.MILLISECONDS)
|
||||
}
|
||||
|
||||
} catch {
|
||||
case e: InterruptedException => logger.error("Starting interrupted ", e)
|
||||
case e: ExecutionException => logger.error("Submitting connector config failed", e.getCause)
|
||||
case e: TimeoutException => logger.error("Submitting connector config timed out", e)
|
||||
case e: Exception => logger.error("Starting failed", e)
|
||||
} finally {
|
||||
startLatch.countDown()
|
||||
}
|
||||
}
|
||||
|
||||
def stop(): Unit = {
|
||||
try {
|
||||
val wasShuttingDown = shutdown.getAndSet(true)
|
||||
if (!wasShuttingDown) {
|
||||
logger.info("Kafka ConnectEmbedded stopping")
|
||||
herder.stop()
|
||||
worker.stop()
|
||||
logger.info("Kafka ConnectEmbedded stopped")
|
||||
}
|
||||
} finally {
|
||||
stopLatch.countDown()
|
||||
}
|
||||
}
|
||||
|
||||
def awaitStop(): Unit = {
|
||||
try {
|
||||
stopLatch.await()
|
||||
} catch {
|
||||
case e: InterruptedException => logger.error("Interrupted waiting for Kafka Connect to shutdown")
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.kafka
|
||||
|
||||
import java.util.Properties
|
||||
|
||||
import kafka.server.{KafkaConfig, KafkaServer}
|
||||
import kafka.utils.{CoreUtils, TestUtils}
|
||||
import kafka.zk.EmbeddedZookeeper
|
||||
import org.apache.kafka.common.security.auth.SecurityProtocol
|
||||
import org.apache.kafka.common.utils.SystemTime
|
||||
|
||||
import scala.collection.immutable.IndexedSeq
|
||||
|
||||
|
||||
object KafkaCluster extends AutoCloseable {
|
||||
|
||||
private val Zookeeper = new EmbeddedZookeeper
|
||||
val brokersNumber = 1
|
||||
val ZookeeperConnection = s"localhost:${Zookeeper.port}"
|
||||
var Connect: EmbeddedConnect = _
|
||||
var kafkaConnectEnabled: Boolean = false
|
||||
val BrokersConfig: IndexedSeq[KafkaConfig] = (1 to brokersNumber).map(i => getKafkaConfig(i))
|
||||
val Brokers: IndexedSeq[KafkaServer] = BrokersConfig.map(TestUtils.createServer(_, new SystemTime()))
|
||||
val BrokersList: String = TestUtils.getBrokerListStrFromServers(Brokers, SecurityProtocol.PLAINTEXT)
|
||||
System.setProperty("http.nonProxyHosts", "localhost|0.0.0.0|127.0.0.1")
|
||||
|
||||
def startEmbeddedConnect(workerConfig: Properties, connectorConfigs: List[Properties]): Unit = {
|
||||
kafkaConnectEnabled = true
|
||||
Connect = EmbeddedConnect(workerConfig, connectorConfigs)
|
||||
Connect.start()
|
||||
}
|
||||
|
||||
private def injectProperties(props: Properties, brokerId: Int): Unit = {
|
||||
props.setProperty("log.dir", s"C:/Temp/kafka-logs-${brokerId}")
|
||||
props.setProperty("auto.create.topics.enable", "true")
|
||||
props.setProperty("num.partitions", "1")
|
||||
}
|
||||
|
||||
private def getKafkaConfig(brokerId: Int): KafkaConfig = {
|
||||
val props: Properties = TestUtils.createBrokerConfig(
|
||||
brokerId,
|
||||
ZookeeperConnection,
|
||||
enableControlledShutdown = false,
|
||||
enableDeleteTopic = false,
|
||||
TestUtils.RandomPort,
|
||||
interBrokerSecurityProtocol = None,
|
||||
trustStoreFile = None,
|
||||
None,
|
||||
enablePlaintext = true,
|
||||
enableSaslPlaintext = false,
|
||||
TestUtils.RandomPort,
|
||||
enableSsl = false,
|
||||
TestUtils.RandomPort,
|
||||
enableSaslSsl = false,
|
||||
TestUtils.RandomPort,
|
||||
None)
|
||||
injectProperties(props, brokerId)
|
||||
KafkaConfig.fromProps(props)
|
||||
}
|
||||
|
||||
def close(): Unit = {
|
||||
if (kafkaConnectEnabled) {
|
||||
Connect.stop()
|
||||
}
|
||||
Brokers.foreach { server =>
|
||||
server.shutdown
|
||||
CoreUtils.delete(server.config.logDirs)
|
||||
}
|
||||
Zookeeper.shutdown()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.processor
|
||||
|
||||
import com.google.gson._
|
||||
import org.apache.kafka.connect.sink.SinkRecord
|
||||
import org.apache.kafka.connect.source.SourceRecord
|
||||
|
||||
abstract class JsonPostProcessor extends PostProcessor {
|
||||
|
||||
override final def runPostProcess(sourceRecord: SourceRecord): SourceRecord = {
|
||||
val jsonParser = new JsonParser()
|
||||
val json: JsonObject = jsonParser.parse(sourceRecord.value().toString).getAsJsonObject
|
||||
|
||||
val processedJson = runJsonPostProcess(json)
|
||||
|
||||
val result = new SourceRecord(
|
||||
sourceRecord.sourcePartition,
|
||||
sourceRecord.sourceOffset,
|
||||
sourceRecord.topic,
|
||||
null,
|
||||
processedJson.toString
|
||||
)
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
override def runPostProcess(sinkRecord: SinkRecord): SinkRecord = {
|
||||
val jsonParser = new JsonParser()
|
||||
val json: JsonObject = jsonParser.parse(sinkRecord.value().toString).getAsJsonObject
|
||||
|
||||
val processedJson = runJsonPostProcess(json)
|
||||
|
||||
val result = new SinkRecord(
|
||||
sinkRecord.topic,
|
||||
sinkRecord.kafkaPartition,
|
||||
sinkRecord.keySchema,
|
||||
sinkRecord.key,
|
||||
sinkRecord.valueSchema,
|
||||
processedJson.toString,
|
||||
sinkRecord.kafkaOffset
|
||||
)
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
def runJsonPostProcess(json: JsonObject): JsonObject
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.processor
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfig
|
||||
import com.typesafe.scalalogging.LazyLogging
|
||||
import org.apache.kafka.connect.sink.SinkRecord
|
||||
import org.apache.kafka.connect.source.SourceRecord
|
||||
|
||||
abstract class PostProcessor {
|
||||
|
||||
def configure(config: CosmosDBConfig): Unit
|
||||
|
||||
def runPostProcess(sourceRecord: SourceRecord): SourceRecord
|
||||
|
||||
def runPostProcess(sinkRecord: SinkRecord): SinkRecord
|
||||
|
||||
}
|
||||
|
||||
object PostProcessor extends AnyRef with LazyLogging {
|
||||
|
||||
def createPostProcessorList(processorClassNames: String, config: CosmosDBConfig): List[PostProcessor] =
|
||||
processorClassNames.split(',').map(c => {
|
||||
logger.info(s"Instantiating ${c} as Post-Processor")
|
||||
if (c.isEmpty) {
|
||||
null
|
||||
} else {
|
||||
val postProcessor = Class.forName(c).newInstance().asInstanceOf[PostProcessor]
|
||||
postProcessor.configure(config)
|
||||
postProcessor
|
||||
}
|
||||
}).filter( e => e != null).toList
|
||||
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.processor
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfig
|
||||
import org.apache.kafka.connect.sink.SinkRecord
|
||||
import org.apache.kafka.connect.source.SourceRecord
|
||||
|
||||
class SampleConsoleWriterPostProcessor extends PostProcessor {
|
||||
|
||||
override def configure(config: CosmosDBConfig): Unit = {
|
||||
|
||||
}
|
||||
|
||||
override def runPostProcess(sourceRecord: SourceRecord): SourceRecord = {
|
||||
println(sourceRecord.value())
|
||||
sourceRecord
|
||||
}
|
||||
|
||||
override def runPostProcess(sinkRecord: SinkRecord): SinkRecord = {
|
||||
println(sinkRecord.value())
|
||||
sinkRecord
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.processor.sink
|
||||
|
||||
import com.google.gson._
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, CosmosDBConfigConstants}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.processor.JsonPostProcessor
|
||||
import org.apache.kafka.common.config.ConfigDef.{Importance, Type, Width}
|
||||
|
||||
class DocumentIdSinkPostProcessor extends JsonPostProcessor {
|
||||
|
||||
var documentIdField: String = ""
|
||||
|
||||
override def configure(config: CosmosDBConfig): Unit = {
|
||||
|
||||
val field = getPostProcessorConfiguration(config)
|
||||
if (field.isDefined) documentIdField = field.get
|
||||
|
||||
}
|
||||
|
||||
override def runJsonPostProcess(json: JsonObject): JsonObject = {
|
||||
|
||||
if (!json.has("id")) {
|
||||
if (json.has(documentIdField))
|
||||
json.addProperty("id", json.get(documentIdField).getAsString)
|
||||
else
|
||||
json.add("id", JsonNull.INSTANCE)
|
||||
}
|
||||
|
||||
json
|
||||
}
|
||||
|
||||
private def getPostProcessorConfiguration(config: CosmosDBConfig): Option[String] =
|
||||
{
|
||||
val CONFIG = s"${CosmosDBConfigConstants.CONNECTOR_PREFIX}.sink.post-processor.documentId.field"
|
||||
val DOC = "JSON field to be used as the Cosmos DB id"
|
||||
val DISPLAY = "JSON Field Path"
|
||||
val DEFAULT = ""
|
||||
|
||||
val postProcessorConfigDef = ConnectorConfig.baseConfigDef
|
||||
|
||||
if(ConnectorConfig.baseConfigDef.configKeys().containsKey(CONFIG)) {
|
||||
ConnectorConfig.baseConfigDef.configKeys().remove(CONFIG)
|
||||
}
|
||||
|
||||
postProcessorConfigDef.define(
|
||||
CONFIG, Type.STRING, DEFAULT, Importance.MEDIUM,
|
||||
DOC, s"PostProcessor:DocumentId",
|
||||
1, Width.LONG, DISPLAY
|
||||
)
|
||||
|
||||
val postProcessorConfig: CosmosDBConfig = CosmosDBConfig(postProcessorConfigDef, config.props)
|
||||
|
||||
val field = Option(postProcessorConfig.getString(CONFIG))
|
||||
|
||||
field
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.processor.sink
|
||||
|
||||
import com.google.gson._
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.processor.JsonPostProcessor
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.processor.`trait`._
|
||||
|
||||
class SelectorSinkPostProcessor extends JsonPostProcessor with Selector {
|
||||
|
||||
override def pipelineStage = "sink"
|
||||
|
||||
override def runJsonPostProcess(json: JsonObject): JsonObject = processor(json)
|
||||
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.processor.source
|
||||
|
||||
import com.google.gson._
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.processor.JsonPostProcessor
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.processor.`trait`._
|
||||
|
||||
class SelectorSourcePostProcessor extends JsonPostProcessor with Selector {
|
||||
|
||||
override def pipelineStage = "source"
|
||||
|
||||
override def runJsonPostProcess(json: JsonObject): JsonObject = processor(json)
|
||||
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.processor.`trait`
|
||||
|
||||
import com.google.gson._
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, CosmosDBConfigConstants}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.processor.PostProcessor
|
||||
import org.apache.kafka.common.config.ConfigDef.{Importance, Type, Width}
|
||||
|
||||
object SelectorType extends Enumeration {
|
||||
type SelectorType = Value
|
||||
val Include, Exclude, All = Value
|
||||
|
||||
def fromString(s: String): Value = values.find(_.toString == s).getOrElse(All)
|
||||
}
|
||||
|
||||
import SelectorType._
|
||||
|
||||
trait Selector extends PostProcessor {
|
||||
|
||||
var selectorFields = Seq.empty[String]
|
||||
var selectorType: SelectorType = SelectorType.Include
|
||||
var processor: JsonObject => JsonObject = includeFields
|
||||
|
||||
def pipelineStage: String
|
||||
|
||||
override def configure(config: CosmosDBConfig): Unit = {
|
||||
|
||||
val configValues = getPostProcessorConfiguration(config)
|
||||
selectorFields = configValues._1
|
||||
selectorType = configValues._2
|
||||
|
||||
processor = selectorType match {
|
||||
case Include => includeFields
|
||||
case Exclude => excludeFields
|
||||
case _ => includeAll
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private def includeAll(json: JsonObject): JsonObject = json
|
||||
|
||||
private def includeFields(json: JsonObject): JsonObject = {
|
||||
|
||||
val toInclude = selectorFields
|
||||
|
||||
val newJson: JsonObject = new JsonObject()
|
||||
|
||||
toInclude.foreach(e => {
|
||||
val j = json.get(e)
|
||||
if (j != null) newJson.add(e, j)
|
||||
})
|
||||
|
||||
newJson
|
||||
|
||||
}
|
||||
|
||||
private def excludeFields(json: JsonObject): JsonObject = {
|
||||
|
||||
val toRemove = selectorFields
|
||||
|
||||
toRemove.foreach(e => json.remove(e))
|
||||
|
||||
json
|
||||
|
||||
}
|
||||
|
||||
private def getPostProcessorConfiguration(config: CosmosDBConfig): (Seq[String], SelectorType) =
|
||||
{
|
||||
val FIELD_CONFIG = s"${CosmosDBConfigConstants.CONNECTOR_PREFIX}.$pipelineStage.post-processor.selector.fields"
|
||||
val FIELD_DOC = "List of fields to be included or excluded in the generated JSON"
|
||||
val FIELD_DISPLAY = "List of fields"
|
||||
val FIELD_DEFAULT = ""
|
||||
|
||||
val TYPE_CONFIG = s"${CosmosDBConfigConstants.CONNECTOR_PREFIX}.$pipelineStage.post-processor.selector.type"
|
||||
val TYPE_DOC = "How the selector should behave: Include or Exclude specified fields in the processed JSON"
|
||||
val TYPE_DISPLAY = "Selector behaviour: Include or Exclued"
|
||||
val TYPE_DEFAULT = ""
|
||||
|
||||
if(ConnectorConfig.baseConfigDef.configKeys().containsKey(FIELD_CONFIG)) {
|
||||
ConnectorConfig.baseConfigDef.configKeys().remove(FIELD_CONFIG)
|
||||
}
|
||||
|
||||
if(ConnectorConfig.baseConfigDef.configKeys().containsKey(TYPE_CONFIG)) {
|
||||
ConnectorConfig.baseConfigDef.configKeys().remove(TYPE_CONFIG)
|
||||
}
|
||||
|
||||
val postProcessorConfigDef = ConnectorConfig.baseConfigDef
|
||||
.define(
|
||||
FIELD_CONFIG, Type.STRING, FIELD_DEFAULT, Importance.MEDIUM,
|
||||
FIELD_DOC, s"PostProcessor:Selector:${pipelineStage}",
|
||||
1, Width.LONG, FIELD_DISPLAY
|
||||
).define(
|
||||
TYPE_CONFIG, Type.STRING, TYPE_DEFAULT, Importance.MEDIUM,
|
||||
TYPE_DOC, s"PostProcessor:Selector:${pipelineStage}",
|
||||
2, Width.LONG, TYPE_DISPLAY
|
||||
)
|
||||
|
||||
val postProcessorConfig: CosmosDBConfig = CosmosDBConfig(postProcessorConfigDef, config.props)
|
||||
|
||||
selectorFields = postProcessorConfig.getString(FIELD_CONFIG).split(',').map(e => e.trim).toSeq
|
||||
selectorType = SelectorType.fromString(postProcessorConfig.getString(TYPE_CONFIG))
|
||||
|
||||
(selectorFields, selectorType)
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.sink
|
||||
|
||||
import java.util
|
||||
|
||||
import org.apache.kafka.connect.data.Struct
|
||||
import org.apache.kafka.connect.data.Schema._
|
||||
import sun.reflect.generics.reflectiveObjects.NotImplementedException
|
||||
|
||||
import scala.collection.JavaConversions._
|
||||
|
||||
trait ConnectCosmosConverter {
|
||||
/**
|
||||
* Converts connect data to json tuples.
|
||||
*
|
||||
* @return converted data
|
||||
*/
|
||||
def toJsonMap(value: Object): List[(String, Object)]
|
||||
}
|
||||
|
||||
/**
|
||||
* Converter of connect data with schema to json tuples.
|
||||
*/
|
||||
object SchemaConnectCosmosConverter extends ConnectCosmosConverter {
|
||||
override def toJsonMap(value: Object): List[(String, Object)] = {
|
||||
val struct = value.asInstanceOf[Struct]
|
||||
var res : Map[String,Object] = Map()
|
||||
|
||||
for (field <- struct.schema().fields()){
|
||||
val fieldName = field.name()
|
||||
val fieldType = field.schema().`type`()
|
||||
|
||||
fieldType match {
|
||||
case Type.INT8 => res += (fieldName-> struct.getInt8(fieldName))
|
||||
case Type.INT16 => res += (fieldName-> struct.getInt16(fieldName))
|
||||
case Type.INT32 => res += (fieldName-> struct.getInt32(fieldName))
|
||||
case Type.INT64 => res += (fieldName-> struct.getInt64(fieldName))
|
||||
case Type.FLOAT32 => res += (fieldName-> struct.getFloat32(fieldName))
|
||||
case Type.FLOAT64 => res += (fieldName-> struct.getFloat64(fieldName))
|
||||
case Type.BOOLEAN => res += (fieldName-> struct.getBoolean(fieldName))
|
||||
case Type.STRING => res += (fieldName-> struct.getString(fieldName))
|
||||
case _ => throw new NotImplementedException()
|
||||
}
|
||||
}
|
||||
|
||||
res.toList
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converter of connect data without schema to json tuples.
|
||||
*/
|
||||
object NoSchemaConnectCosmosConverter extends ConnectCosmosConverter {
|
||||
override def toJsonMap(value: Object): List[(String, Object)] = {
|
||||
value.asInstanceOf[util.HashMap[String,Object]].toList
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.sink
|
||||
|
||||
import java.util
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig}
|
||||
import org.apache.kafka.common.config.ConfigDef
|
||||
import org.apache.kafka.connect.connector.Task
|
||||
import org.apache.kafka.connect.sink.SinkConnector
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.util.{Failure, Success}
|
||||
|
||||
class CosmosDBSinkConnector extends SinkConnector with HandleRetriableError {
|
||||
|
||||
|
||||
private var configProps: util.Map[String, String] = _
|
||||
|
||||
|
||||
override def version(): String = getClass.getPackage.getImplementationVersion
|
||||
|
||||
override def start(props: util.Map[String, String]): Unit = {
|
||||
logger.info("Starting CosmosDBSinkConnector")
|
||||
|
||||
try {
|
||||
initializeErrorHandler(props.get(org.apache.kafka.connect.runtime.ConnectorConfig.ERRORS_RETRY_TIMEOUT_CONFIG).toInt) // TODO: test
|
||||
|
||||
val config = CosmosDBConfig(ConnectorConfig.sinkConfigDef, props)
|
||||
HandleRetriableError(Success(config))
|
||||
}
|
||||
catch{
|
||||
case f: Throwable =>
|
||||
logger.error(s"Couldn't start Cosmos DB Sink due to configuration error: ${f.getMessage}", f)
|
||||
HandleRetriableError(Failure(f))
|
||||
}
|
||||
|
||||
configProps = props
|
||||
|
||||
}
|
||||
|
||||
override def stop(): Unit = {
|
||||
logger.info("Stopping CosmosDBSinkConnector")
|
||||
}
|
||||
|
||||
override def taskClass(): Class[_ <: Task] = classOf[CosmosDBSinkTask]
|
||||
|
||||
override def taskConfigs(maxTasks: Int): util.List[util.Map[String, String]] = {
|
||||
logger.info(s"Setting task configurations for $maxTasks workers with properties $this.configProps")
|
||||
println(this.configProps)
|
||||
|
||||
(1 to maxTasks).map(_ => this.configProps).toList.asJava
|
||||
|
||||
}
|
||||
override def config(): ConfigDef = ConnectorConfig.sinkConfigDef
|
||||
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.sink
|
||||
|
||||
|
||||
import scala.collection.mutable.HashMap
|
||||
|
||||
|
||||
case class CosmosDBSinkSettings(endpoint: String,
|
||||
masterKey: String,
|
||||
database: String,
|
||||
collectionTopicMap: HashMap[String, String]) {
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.sink
|
||||
|
||||
import java.util
|
||||
|
||||
import scala.collection.mutable.HashMap
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, CosmosDBConfigConstants}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProviderImpl, CosmosDBProvider}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.processor._
|
||||
import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient
|
||||
import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel}
|
||||
import com.typesafe.scalalogging.LazyLogging
|
||||
import org.apache.kafka.clients.consumer.OffsetAndMetadata
|
||||
import org.apache.kafka.common.TopicPartition
|
||||
import org.apache.kafka.connect.errors.ConnectException
|
||||
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.util.{Failure, Success, Try}
|
||||
|
||||
class CosmosDBSinkTask extends SinkTask with LazyLogging {
|
||||
|
||||
private var writer: Option[CosmosDBWriter] = None
|
||||
|
||||
private var client: AsyncDocumentClient = null
|
||||
private var database: String = ""
|
||||
private var taskConfig: Option[CosmosDBConfig] = None
|
||||
private var topicNames: Array[String] = null
|
||||
private var postProcessors = List.empty[PostProcessor]
|
||||
val collectionTopicMap: HashMap[String, String] = HashMap.empty[String, String] // Public to allow for testing
|
||||
val cosmosDBProvider: CosmosDBProvider = CosmosDBProviderImpl
|
||||
|
||||
override def start(props: util.Map[String, String]): Unit = {
|
||||
logger.info("Starting CosmosDBSinkTask")
|
||||
|
||||
var config: util.Map[String, String] = null
|
||||
if (context != null) {
|
||||
config = if (context.configs().isEmpty) props else context.configs()
|
||||
}
|
||||
else {
|
||||
config = props
|
||||
}
|
||||
|
||||
// Get Configuration for this Task
|
||||
taskConfig = Try(CosmosDBConfig(ConnectorConfig.sinkConfigDef, config)) match {
|
||||
case Failure(f) => throw new ConnectException("Couldn't start CosmosDBSink due to configuration error.", f)
|
||||
case Success(s) => Some(s)
|
||||
}
|
||||
|
||||
// Add configured Post-Processors
|
||||
val processorClassNames = taskConfig.get.getString(CosmosDBConfigConstants.SINK_POST_PROCESSOR)
|
||||
postProcessors = PostProcessor.createPostProcessorList(processorClassNames, taskConfig.get)
|
||||
|
||||
// Get CosmosDB Connection
|
||||
val endpoint: String = taskConfig.get.getString(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG)
|
||||
val masterKey: String = taskConfig.get.getPassword(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG).value()
|
||||
database = taskConfig.get.getString(CosmosDBConfigConstants.DATABASE_CONFIG)
|
||||
|
||||
// Populate collection topic map
|
||||
// TODO: add support for many to many mapping, this only assumes each topic writes to one collection and multiple topics can write to the same collection
|
||||
val collectionTopicMapString = taskConfig.get.getString(CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG)
|
||||
if(collectionTopicMapString.contains("#")) { // There is at least one pair
|
||||
collectionTopicMapString.split(",").map(_.trim).foreach(
|
||||
m => {
|
||||
val map = m.split("#").map(_.trim)
|
||||
collectionTopicMap.put(map(1), map(0)) // topic, collection
|
||||
})
|
||||
}
|
||||
|
||||
// If there are topics with no mapping, add them to the map with topic name as collection name
|
||||
topicNames = taskConfig.get.getString(CosmosDBConfigConstants.TOPIC_CONFIG).split(",").map(_.trim)
|
||||
topicNames.foreach(
|
||||
t => {
|
||||
if (!collectionTopicMap.contains(t)) {
|
||||
collectionTopicMap.put(t, t) // topic, collection
|
||||
}
|
||||
})
|
||||
|
||||
val clientSettings = CosmosDBClientSettings(
|
||||
endpoint,
|
||||
masterKey,
|
||||
database,
|
||||
null, // Don't pass a collection because our client is potentially for multiple collections
|
||||
ConnectionPolicy.GetDefault(),
|
||||
ConsistencyLevel.Session
|
||||
)
|
||||
client = Try(cosmosDBProvider.getClient(clientSettings)) match {
|
||||
case Success(conn) =>
|
||||
logger.info("Connection to CosmosDB established.")
|
||||
conn
|
||||
case Failure(f) => throw new ConnectException(s"Couldn't connect to CosmosDB.", f)
|
||||
}
|
||||
|
||||
// Set up Writer
|
||||
val setting = new CosmosDBSinkSettings(endpoint, masterKey, database, collectionTopicMap)
|
||||
writer = Option(new CosmosDBWriter(setting, cosmosDBProvider))
|
||||
}
|
||||
|
||||
|
||||
override def put(records: util.Collection[SinkRecord]): Unit = {
|
||||
val seq = records.asScala.toList
|
||||
logger.info(s"Sending ${seq.length} records to writer to be written")
|
||||
|
||||
// Execute PostProcessing
|
||||
val postProcessed = seq.map(sr => applyPostProcessing(sr))
|
||||
|
||||
// Currently only built for messages with JSON payload without schema
|
||||
writer.foreach(w => w.write(postProcessed))
|
||||
}
|
||||
|
||||
override def stop(): Unit = {
|
||||
logger.info("Stopping CosmosDBSinkTask")
|
||||
}
|
||||
|
||||
override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {}
|
||||
|
||||
override def version(): String = getClass.getPackage.getImplementationVersion
|
||||
|
||||
private def applyPostProcessing(sinkRecord: SinkRecord): SinkRecord =
|
||||
postProcessors.foldLeft(sinkRecord)((r, p) => {
|
||||
//println(p.getClass.toString)
|
||||
p.runPostProcess(r)
|
||||
})
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
|
||||
package com.microsoft.azure.cosmosdb.kafka.connect.sink
|
||||
|
||||
import java.util.concurrent.CountDownLatch
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import com.microsoft.azure.cosmosdb._
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.CosmosDBProvider
|
||||
import com.typesafe.scalalogging.StrictLogging
|
||||
import org.apache.kafka.connect.sink.SinkRecord
|
||||
|
||||
|
||||
class CosmosDBWriter(val settings: CosmosDBSinkSettings, val cosmosDBProvider: CosmosDBProvider) extends StrictLogging
|
||||
{
|
||||
private val requestOptionsInsert = new RequestOptions
|
||||
requestOptionsInsert.setConsistencyLevel(ConsistencyLevel.Session)
|
||||
|
||||
def write(records: Seq[SinkRecord]): Unit = {
|
||||
if (records.isEmpty) {
|
||||
logger.info("No records received.")
|
||||
} else {
|
||||
logger.info(s"Received ${records.size} records.")
|
||||
insert(records)
|
||||
}
|
||||
}
|
||||
|
||||
private def insert(records: Seq[SinkRecord]) = {
|
||||
try {
|
||||
|
||||
var docs = List.empty[Document]
|
||||
var collection: String = ""
|
||||
|
||||
records.groupBy(_.topic()).foreach { case (_, groupedRecords) =>
|
||||
groupedRecords.foreach { record =>
|
||||
// Determine which collection to write to
|
||||
if (settings.collectionTopicMap.contains(record.topic))
|
||||
collection = settings.collectionTopicMap(record.topic)
|
||||
else
|
||||
throw new Exception("No sink collection specified for this topic.") // TODO: tie this in with the exception handler
|
||||
|
||||
val content: String = serializeValue(record.value())
|
||||
val document = new Document(content)
|
||||
|
||||
logger.info("Upserting Document object id " + document.get("id") + " into collection " + collection)
|
||||
docs = docs :+ document
|
||||
}
|
||||
// Send current batch of documents and reset the list for the next topic's documents
|
||||
cosmosDBProvider.upsertDocuments[Document](docs, settings.database, collection, new CountDownLatch(1))
|
||||
docs = List.empty[Document]
|
||||
}
|
||||
|
||||
}
|
||||
catch {
|
||||
case t: Throwable =>
|
||||
logger.error(s"There was an error inserting the records ${t.getMessage}", t)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
def close(): Unit = {
|
||||
logger.info("Shutting down CosmosDBWriter.")
|
||||
}
|
||||
|
||||
def serializeValue(value: Any): String = {
|
||||
var content: String = null
|
||||
val om = new ObjectMapper()
|
||||
|
||||
if (!value.isInstanceOf[String]){
|
||||
content = om.writeValueAsString(value)
|
||||
}else {
|
||||
content = value.toString
|
||||
}
|
||||
|
||||
if(om.readTree(content).has("payload")){
|
||||
val temp = om.readTree(content).get("payload")
|
||||
if (temp.isTextual()){ // TextNodes cannot be directly converted to strings
|
||||
content = temp.asText()
|
||||
} else {
|
||||
content = temp.toString
|
||||
}
|
||||
}
|
||||
|
||||
return content
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
trait ChangeFeedObserver {
|
||||
def processChanges(documentList: List[String])
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
import com.microsoft.azure.cosmosdb._
|
||||
import java.util.concurrent.CountDownLatch
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError
|
||||
|
||||
import scala.collection.JavaConversions._
|
||||
|
||||
class ChangeFeedProcessor(feedCollectionInfo: DocumentCollectionInfo, leaseCollectionInfo: DocumentCollectionInfo, changeFeedProcessorOptions: ChangeFeedProcessorOptions, changeFeedObserver: ChangeFeedObserver)extends HandleRetriableError {
|
||||
|
||||
val asyncClientFeed = DocumentClientBuilder.buildAsyncDocumentClient(feedCollectionInfo.uri, feedCollectionInfo.masterKey)
|
||||
val asyncClientLease = DocumentClientBuilder.buildAsyncDocumentClient(leaseCollectionInfo.uri, leaseCollectionInfo.masterKey)
|
||||
|
||||
val partitionLeaseStateManager = new PartitionLeaseStateManager(asyncClientLease, leaseCollectionInfo.databaseName, leaseCollectionInfo.collectionName)
|
||||
val partitionFeedReaders = createPartitionMap()
|
||||
private var run = true
|
||||
|
||||
private def createPartitionMap(): Map[String, PartitionFeedReader] = {
|
||||
val rangeIdList = getPartitionRangeIds()
|
||||
val feedReaderMap = Map(rangeIdList map { partitionKeyRangeId => (partitionKeyRangeId, new PartitionFeedReader(asyncClientFeed, feedCollectionInfo.databaseName, feedCollectionInfo.collectionName, partitionKeyRangeId, partitionLeaseStateManager, changeFeedProcessorOptions)) }: _*)
|
||||
return feedReaderMap
|
||||
}
|
||||
|
||||
private def getPartitionRangeIds(): List[String] = {
|
||||
val collectionLink = DocumentClientBuilder.getCollectionLink(feedCollectionInfo.databaseName, feedCollectionInfo.collectionName)
|
||||
val changeFeedObservable = asyncClientFeed.readPartitionKeyRanges(collectionLink, null)
|
||||
|
||||
var results = List[PartitionKeyRange]()
|
||||
changeFeedObservable.toBlocking().forEach(x => results = results ++ x.getResults())
|
||||
|
||||
return results.map(p => p.getId)
|
||||
}
|
||||
|
||||
def start(): Unit = {
|
||||
println("Started!")
|
||||
|
||||
spawn {
|
||||
do {
|
||||
val countDownLatch = new CountDownLatch(partitionFeedReaders.size)
|
||||
// Parallel
|
||||
partitionFeedReaders.par.foreach { p => p._2.readChangeFeed(changeFeedObserver.processChanges, countDownLatch) }
|
||||
// Serial:
|
||||
//for ((id, pfr) <- partitionFeedReaders) pfr.readChangeFeed(changeFeedObserver.processChanges, countDownLatch)
|
||||
countDownLatch.await()
|
||||
println("Waiting...")
|
||||
Thread.sleep(changeFeedProcessorOptions.defaultFeedPollDelay)
|
||||
} while (run)
|
||||
}
|
||||
}
|
||||
|
||||
def stop(): Unit = {
|
||||
run = false
|
||||
println("Finished!")
|
||||
}
|
||||
|
||||
private def spawn(p: => Unit) {
|
||||
val t = new Thread() {
|
||||
override def run() = p
|
||||
}
|
||||
t.start()
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
import org.apache.kafka.connect.errors.ConnectException
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError
|
||||
|
||||
import scala.reflect._
|
||||
|
||||
|
||||
class ChangeFeedProcessorBuilder(feedCollectionInfo: DocumentCollectionInfo, leaseCollectionInfo: DocumentCollectionInfo, changeFeedProcessorOptions: ChangeFeedProcessorOptions, changeFeedObserver: ChangeFeedObserver)extends HandleRetriableError {
|
||||
|
||||
def this() = this(null, null, new ChangeFeedProcessorOptions(), null)
|
||||
|
||||
def withFeedCollection(newFeedCollectionInfo: DocumentCollectionInfo): ChangeFeedProcessorBuilder = {
|
||||
guardAgainstNull(newFeedCollectionInfo)
|
||||
return new ChangeFeedProcessorBuilder(newFeedCollectionInfo, this.leaseCollectionInfo, this.changeFeedProcessorOptions, this.changeFeedObserver)
|
||||
}
|
||||
|
||||
def withLeaseCollection(newLeaseCollectionInfo: DocumentCollectionInfo): ChangeFeedProcessorBuilder = {
|
||||
guardAgainstNull(newLeaseCollectionInfo)
|
||||
return new ChangeFeedProcessorBuilder(this.feedCollectionInfo, newLeaseCollectionInfo, this.changeFeedProcessorOptions, this.changeFeedObserver)
|
||||
}
|
||||
|
||||
def withProcessorOptions(newChangeFeedProcessorOptions: ChangeFeedProcessorOptions): ChangeFeedProcessorBuilder = {
|
||||
guardAgainstNull(newChangeFeedProcessorOptions)
|
||||
return new ChangeFeedProcessorBuilder(this.feedCollectionInfo, this.leaseCollectionInfo, newChangeFeedProcessorOptions, this.changeFeedObserver)
|
||||
}
|
||||
|
||||
def withObserver(newChangeFeedObserver: ChangeFeedObserver): ChangeFeedProcessorBuilder = {
|
||||
guardAgainstNull(newChangeFeedObserver)
|
||||
return new ChangeFeedProcessorBuilder(this.feedCollectionInfo, this.leaseCollectionInfo, this.changeFeedProcessorOptions, newChangeFeedObserver)
|
||||
}
|
||||
|
||||
def build(): ChangeFeedProcessor = {
|
||||
guardAgainstNull(this.feedCollectionInfo)
|
||||
guardAgainstNull(this.leaseCollectionInfo)
|
||||
guardAgainstNull(this.changeFeedProcessorOptions)
|
||||
guardAgainstNull(this.changeFeedObserver)
|
||||
|
||||
return new ChangeFeedProcessor(this.feedCollectionInfo, this.leaseCollectionInfo, this.changeFeedProcessorOptions, this.changeFeedObserver)
|
||||
}
|
||||
|
||||
private def guardAgainstNull[T: ClassTag](objectToCheck: T): Unit = {
|
||||
try{
|
||||
val className = classTag[T].runtimeClass.getSimpleName()
|
||||
val messageIfNull = "%s can't be null!".format(className)
|
||||
if (objectToCheck == null) throw new NullPointerException(messageIfNull)
|
||||
|
||||
logger.debug("%s Object initialized".format(className))
|
||||
}catch{
|
||||
case f: Throwable =>
|
||||
throw new ConnectException("%s can't be null!".format(classTag[T].runtimeClass.getSimpleName()), f)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
class ChangeFeedProcessorOptions(val queryPartitionsMaxBatchSize: Int, val defaultFeedPollDelay: Int) {
|
||||
def this() = this(100, 2000)
|
||||
}
|
|
@ -0,0 +1,170 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
import java.util
|
||||
|
||||
import com.google.gson.Gson
|
||||
import com.microsoft.azure.cosmosdb._
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.CosmosDBProviderImpl
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError
|
||||
import com.microsoft.azure.cosmosdb.rx._
|
||||
import org.apache.kafka.connect.source.{SourceRecord, SourceTaskContext}
|
||||
|
||||
import scala.collection.JavaConversions._
|
||||
|
||||
class CosmosDBReader(private val client: AsyncDocumentClient,
|
||||
val setting: CosmosDBSourceSettings,
|
||||
private val context: SourceTaskContext) extends HandleRetriableError {
|
||||
|
||||
|
||||
private val SOURCE_PARTITION_FIELD = "partition"
|
||||
private val SOURCE_OFFSET_FIELD = "changeFeedState"
|
||||
|
||||
// Read the initial state from the offset storage when the CosmosDBReader is instantiated for the
|
||||
// assigned partition
|
||||
private val initialState : CosmosDBReaderChangeFeedState = getCosmosDBReaderChangeFeedState(setting.assignedPartition)
|
||||
// Initialize the current state using the same values of the initial state
|
||||
private var currentState = initialState
|
||||
|
||||
// Initialize variables that control the position of the reading cursor
|
||||
private var lastCursorPosition = -1
|
||||
private var currentCursorPosition = -1
|
||||
|
||||
def processChanges(): util.List[SourceRecord] = {
|
||||
|
||||
|
||||
val records = new util.ArrayList[SourceRecord]
|
||||
var bufferSize = 0
|
||||
|
||||
val collectionLink = CosmosDBProviderImpl.getCollectionLink(setting.database, setting.collection)
|
||||
val changeFeedOptions = createChangeFeedOptions()
|
||||
|
||||
try
|
||||
{
|
||||
|
||||
// Initial position of the reading cursor
|
||||
if (initialState != null)
|
||||
lastCursorPosition = initialState.lsn.toInt
|
||||
else
|
||||
lastCursorPosition = currentCursorPosition
|
||||
|
||||
|
||||
val changeFeedObservable = client.queryDocumentChangeFeed(collectionLink, changeFeedOptions)
|
||||
|
||||
changeFeedObservable
|
||||
.doOnNext(feedResponse => {
|
||||
|
||||
val processingStartTime = System.currentTimeMillis()
|
||||
|
||||
// Return the list of documents in the FeedResponse
|
||||
val documents = feedResponse.getResults()
|
||||
|
||||
documents.foreach(doc => {
|
||||
|
||||
// Update the reader state
|
||||
currentState = new CosmosDBReaderChangeFeedState(
|
||||
setting.assignedPartition,
|
||||
feedResponse.getResponseHeaders.get("etag"),
|
||||
doc.get("_lsn").toString
|
||||
)
|
||||
|
||||
// Update the current reader cursor
|
||||
currentCursorPosition = currentState.lsn.toInt
|
||||
|
||||
// Check if the cursor has moved beyond the last processed position
|
||||
if (currentCursorPosition > lastCursorPosition) {
|
||||
|
||||
// Process new document
|
||||
|
||||
logger.debug(s"Sending document ${doc} to the Kafka topic ${setting.topicName}")
|
||||
logger.debug(s"Current State => Partition: ${currentState.partition}, " +
|
||||
s"ContinuationToken: ${currentState.continuationToken}, " +
|
||||
s"LSN: ${currentState.lsn}")
|
||||
|
||||
records.add(new SourceRecord(
|
||||
sourcePartition(setting.assignedPartition),
|
||||
sourceOffset(new Gson().toJson(currentState)),
|
||||
setting.topicName,
|
||||
null,
|
||||
doc.toJson()
|
||||
))
|
||||
|
||||
// Increment the buffer
|
||||
bufferSize = bufferSize + doc.toJson().getBytes().length
|
||||
|
||||
// Calculate the elapsed time
|
||||
val processingElapsedTime = System.currentTimeMillis() - processingStartTime
|
||||
|
||||
// Returns records based on batch size, buffer size or timeout
|
||||
if (records.size >= setting.batchSize || bufferSize >= setting.bufferSize || processingElapsedTime >= setting.timeout) {
|
||||
return records
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
.doOnCompleted(() => {}) // signal to the consumer that there is no more data available
|
||||
.doOnError((e) => { logger.error(e.getMessage()) }) // signal to the consumer that an error has occurred
|
||||
.subscribe()
|
||||
|
||||
changeFeedObservable.toBlocking.single
|
||||
|
||||
}
|
||||
catch
|
||||
{
|
||||
case f: Throwable =>
|
||||
logger.error(s"Couldn't add documents to the kafka topic: ${f.getMessage}", f)
|
||||
}
|
||||
|
||||
return records
|
||||
}
|
||||
|
||||
private def createChangeFeedOptions(): ChangeFeedOptions = {
|
||||
val changeFeedOptions = new ChangeFeedOptions()
|
||||
changeFeedOptions.setPartitionKeyRangeId(setting.assignedPartition)
|
||||
changeFeedOptions.setMaxItemCount(setting.batchSize)
|
||||
|
||||
if (currentState == null) {
|
||||
changeFeedOptions.setStartFromBeginning(true)
|
||||
}
|
||||
else {
|
||||
|
||||
// If the cursor position has not reached the end of the feed, read again
|
||||
if (currentCursorPosition < currentState.continuationToken.replaceAll("^\"|\"$", "").toInt) {
|
||||
if (initialState != null)
|
||||
changeFeedOptions.setRequestContinuation(initialState.continuationToken)
|
||||
else
|
||||
changeFeedOptions.setStartFromBeginning(true)
|
||||
return changeFeedOptions
|
||||
}
|
||||
|
||||
currentState.continuationToken match {
|
||||
case null => changeFeedOptions.setStartFromBeginning(true)
|
||||
case "" => changeFeedOptions.setStartFromBeginning(true)
|
||||
case t => changeFeedOptions.setRequestContinuation(t)
|
||||
}
|
||||
}
|
||||
return changeFeedOptions
|
||||
}
|
||||
|
||||
private def getCosmosDBReaderChangeFeedState(partition: String): CosmosDBReaderChangeFeedState = {
|
||||
var state: CosmosDBReaderChangeFeedState = null
|
||||
if (context != null) {
|
||||
val offset = context.offsetStorageReader.offset(sourcePartition(partition))
|
||||
if (offset != null) {
|
||||
state = new Gson().fromJson(offset.get(SOURCE_OFFSET_FIELD).toString(), classOf[CosmosDBReaderChangeFeedState])
|
||||
}
|
||||
}
|
||||
return state
|
||||
}
|
||||
|
||||
private def sourcePartition(partition: String): util.Map[String, String] = {
|
||||
val map = new java.util.HashMap[String,String]
|
||||
map.put(SOURCE_PARTITION_FIELD, partition)
|
||||
return map
|
||||
}
|
||||
|
||||
private def sourceOffset(offset: String): util.Map[String, String] = {
|
||||
val map = new java.util.HashMap[String,String]
|
||||
map.put(SOURCE_OFFSET_FIELD, offset)
|
||||
return map
|
||||
}
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
case class CosmosDBReaderChangeFeedState(partition: String,
|
||||
continuationToken: String,
|
||||
lsn: String) {
|
||||
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
import java.util
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError
|
||||
import com.microsoft.azure.cosmosdb._
|
||||
|
||||
import scala.collection.JavaConversions._
|
||||
import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProvider, CosmosDBProviderImpl}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, CosmosDBConfigConstants}
|
||||
import org.apache.kafka.common.config.ConfigDef
|
||||
import org.apache.kafka.connect.connector.Task
|
||||
import org.apache.kafka.connect.source.SourceConnector
|
||||
import org.apache.kafka.connect.util.ConnectorUtils
|
||||
import scala.util.{Failure, Success, Try}
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
class CosmosDBSourceConnector extends SourceConnector with HandleRetriableError {
|
||||
|
||||
|
||||
private var configProps: util.Map[String, String] = _
|
||||
private var numWorkers: Int = 0
|
||||
val cosmosDBProvider: CosmosDBProvider = CosmosDBProviderImpl
|
||||
override def version(): String = getClass.getPackage.getImplementationVersion
|
||||
|
||||
override def start(props: util.Map[String, String]): Unit = {
|
||||
logger.info("Starting CosmosDBSourceConnector")
|
||||
configProps = props
|
||||
}
|
||||
|
||||
override def taskClass(): Class[_ <: Task] = classOf[CosmosDBSourceTask]
|
||||
|
||||
override def taskConfigs(maxTasks: Int): util.List[util.Map[String, String]] = {
|
||||
try {
|
||||
val config: CosmosDBConfig = CosmosDBConfig(ConnectorConfig.sourceConfigDef, configProps)
|
||||
val database: String = config.getString(CosmosDBConfigConstants.DATABASE_CONFIG)
|
||||
val collection: String = config.getString(CosmosDBConfigConstants.COLLECTION_CONFIG)
|
||||
val settings: CosmosDBClientSettings = CosmosDBClientSettings(
|
||||
config.getString(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG),
|
||||
config.getPassword(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG).value(),
|
||||
database,
|
||||
collection,
|
||||
ConnectionPolicy.GetDefault(),
|
||||
ConsistencyLevel.Session
|
||||
)
|
||||
logger.debug("Settings for Cosmos Db connection: ", settings)
|
||||
|
||||
val client = cosmosDBProvider.getClient(settings)
|
||||
|
||||
val collectionLink = CosmosDBProviderImpl.getCollectionLink(database, collection)
|
||||
val changeFeedObservable = client.readPartitionKeyRanges(collectionLink, null)
|
||||
var results = List[PartitionKeyRange]()
|
||||
changeFeedObservable.toBlocking().forEach(x => results = results ++ x.getResults())
|
||||
val numberOfPartitions = results.map(p => p.getId)
|
||||
numWorkers = Math.min(numberOfPartitions.size(), maxTasks)
|
||||
logger.info(s"Setting task configurations for $numWorkers workers.")
|
||||
val groups = ConnectorUtils.groupPartitions(numberOfPartitions, maxTasks)
|
||||
groups
|
||||
.withFilter(g => g.nonEmpty)
|
||||
.map { g =>
|
||||
val taskConfigs = new java.util.HashMap[String, String](this.configProps)
|
||||
taskConfigs.put(CosmosDBConfigConstants.ASSIGNED_PARTITIONS, g.mkString(","))
|
||||
taskConfigs
|
||||
}
|
||||
}
|
||||
catch {
|
||||
case f: Throwable =>
|
||||
logger.error(s"Couldn't initialize CosmosDb with settings: ${f.getMessage}", f)
|
||||
HandleRetriableError(Failure(f))
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
override def config(): ConfigDef = ConnectorConfig.sourceConfigDef
|
||||
|
||||
override def stop(): Unit = {
|
||||
logger.info("Stopping CosmosDBSourceConnector")
|
||||
}
|
||||
|
||||
def getNumberOfWorkers(): Int = numWorkers
|
||||
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
case class CosmosDBSourceSettings(
|
||||
database: String,
|
||||
collection: String,
|
||||
assignedPartition: String,
|
||||
batchSize: Int,
|
||||
bufferSize: Int,
|
||||
timeout: Int,
|
||||
topicName: String,
|
||||
) {
|
||||
}
|
|
@ -0,0 +1,150 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
import java.util
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, CosmosDBConfigConstants}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProvider, CosmosDBProviderImpl}
|
||||
import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient
|
||||
import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel}
|
||||
import com.typesafe.scalalogging.StrictLogging
|
||||
import org.apache.kafka.connect.errors.ConnectException
|
||||
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.processor._
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.source.CosmosDBReader
|
||||
import scala.collection.JavaConversions._
|
||||
import scala.collection.mutable
|
||||
import scala.util.{Failure, Success, Try}
|
||||
|
||||
class CosmosDBSourceTask extends SourceTask with StrictLogging with HandleRetriableError{
|
||||
|
||||
val readers = mutable.Map.empty[String, CosmosDBReader]
|
||||
private var client: AsyncDocumentClient = null
|
||||
private var database: String = ""
|
||||
private var collection: String = ""
|
||||
private var taskConfig: Option[CosmosDBConfig] = None
|
||||
private var bufferSize: Option[Int] = None
|
||||
private var batchSize: Option[Int] = None
|
||||
private var timeout: Option[Int] = None
|
||||
private var topicName: String = ""
|
||||
private var postProcessors = List.empty[PostProcessor]
|
||||
val cosmosDBProvider: CosmosDBProvider = CosmosDBProviderImpl
|
||||
|
||||
override def start(props: util.Map[String, String]): Unit = {
|
||||
logger.info("Starting CosmosDBSourceTask")
|
||||
|
||||
var config: util.Map[String, String] = null
|
||||
|
||||
if (context != null) {
|
||||
config = if (context.configs().isEmpty) props else context.configs()
|
||||
}
|
||||
else {
|
||||
config = props
|
||||
}
|
||||
|
||||
// Get Configuration for this Task
|
||||
try{
|
||||
taskConfig = Some(CosmosDBConfig(ConnectorConfig.sourceConfigDef, config))
|
||||
//HandleError(Success(config))
|
||||
}
|
||||
catch{
|
||||
case f: Throwable =>
|
||||
logger.error(s"Couldn't start Cosmos DB Source due to configuration error: ${f.getMessage}", f)
|
||||
HandleRetriableError(Failure(f))
|
||||
}
|
||||
|
||||
/*taskConfig = Try(CosmosDBConfig(ConnectorConfig.sourceConfigDef, config)) match {
|
||||
case Failure(f) => throw new ConnectException("Couldn't start CosmosDBSource due to configuration error.", f)
|
||||
case Success(s) => Some(s)
|
||||
}*/
|
||||
|
||||
// Add configured Post-Processors if exist in configuration file
|
||||
if(taskConfig.get.getString(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR)!=null){
|
||||
val processorClassNames = taskConfig.get.getString(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR)
|
||||
postProcessors = PostProcessor.createPostProcessorList(processorClassNames, taskConfig.get)
|
||||
}
|
||||
|
||||
|
||||
// Get CosmosDB Connection
|
||||
val endpoint: String = taskConfig.get.getString(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG)
|
||||
val masterKey: String = taskConfig.get.getPassword(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG).value()
|
||||
database = taskConfig.get.getString(CosmosDBConfigConstants.DATABASE_CONFIG)
|
||||
collection = taskConfig.get.getString(CosmosDBConfigConstants.COLLECTION_CONFIG)
|
||||
|
||||
// Source Collection
|
||||
val clientSettings = CosmosDBClientSettings(
|
||||
endpoint,
|
||||
masterKey,
|
||||
database,
|
||||
collection,
|
||||
ConnectionPolicy.GetDefault(),
|
||||
ConsistencyLevel.Session
|
||||
)
|
||||
|
||||
try{
|
||||
client = cosmosDBProvider.getClient(clientSettings)
|
||||
logger.info("Connection to CosmosDB established.")
|
||||
}catch{
|
||||
case f: Throwable =>
|
||||
logger.error(s"Couldn't connect to CosmosDB.: ${f.getMessage}", f)
|
||||
HandleRetriableError(Failure(f))
|
||||
}
|
||||
|
||||
|
||||
/*client = Try(CosmosDBProvider.getClient(clientSettings)) match {
|
||||
case Success(conn) =>
|
||||
logger.info("Connection to CosmosDB established.")
|
||||
conn
|
||||
case Failure(f) => throw new ConnectException(s"Couldn't connect to CosmosDB.", f)
|
||||
}*/
|
||||
|
||||
// Get bufferSize and batchSize
|
||||
bufferSize = Some(taskConfig.get.getInt(CosmosDBConfigConstants.READER_BUFFER_SIZE))
|
||||
batchSize = Some(taskConfig.get.getInt(CosmosDBConfigConstants.BATCH_SIZE))
|
||||
timeout = Some(taskConfig.get.getInt(CosmosDBConfigConstants.TIMEOUT))
|
||||
|
||||
// Get Topic
|
||||
topicName = taskConfig.get.getString(CosmosDBConfigConstants.TOPIC_CONFIG)
|
||||
|
||||
// Get the List of Assigned Partitions
|
||||
val assigned = taskConfig.get.getString(CosmosDBConfigConstants.ASSIGNED_PARTITIONS).split(",").toList
|
||||
|
||||
// Set up Readers
|
||||
assigned.map(partition => {
|
||||
val setting = new CosmosDBSourceSettings(database, collection, partition, batchSize.get, bufferSize.get, timeout.get, topicName)
|
||||
readers += partition -> new CosmosDBReader(client, setting, context)
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
override def stop(): Unit = {
|
||||
logger.info("Stopping CosmosDBSourceTask")
|
||||
}
|
||||
|
||||
override def poll(): util.List[SourceRecord] = {
|
||||
try{
|
||||
if(postProcessors.isEmpty){
|
||||
return readers.flatten(reader => reader._2.processChanges()).toList
|
||||
}else{
|
||||
return readers.flatten(reader => reader._2.processChanges()).toList.map(sr => applyPostProcessing(sr))
|
||||
}
|
||||
}catch{
|
||||
case f: Exception =>
|
||||
logger.debug(s"Couldn't create a list of source records ${f.getMessage}", f)
|
||||
HandleRetriableError(Failure(f))
|
||||
return null
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
override def version(): String = getClass.getPackage.getImplementationVersion
|
||||
|
||||
def getReaders(): mutable.Map[String, CosmosDBReader] = readers
|
||||
|
||||
private def applyPostProcessing(sourceRecord: SourceRecord): SourceRecord =
|
||||
postProcessors.foldLeft(sourceRecord)((r, p) => {
|
||||
//println(p.getClass.toString)
|
||||
p.runPostProcess(r)
|
||||
})
|
||||
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
import com.microsoft.azure.cosmosdb.rx._;
|
||||
import com.microsoft.azure.cosmosdb._;
|
||||
|
||||
object DocumentClientBuilder {
|
||||
|
||||
def createConnectionPolicy(): ConnectionPolicy = {
|
||||
val policy = new ConnectionPolicy()
|
||||
policy.setConnectionMode(ConnectionMode.Direct)
|
||||
return policy
|
||||
}
|
||||
|
||||
def buildAsyncDocumentClient(cosmosServiceEndpoint: String, cosmosKey: String): AsyncDocumentClient = {
|
||||
new AsyncDocumentClient.Builder()
|
||||
.withServiceEndpoint(cosmosServiceEndpoint)
|
||||
.withMasterKeyOrResourceToken(cosmosKey)
|
||||
.withConnectionPolicy(createConnectionPolicy())
|
||||
.withConsistencyLevel(ConsistencyLevel.Eventual)
|
||||
.build()
|
||||
}
|
||||
|
||||
def getCollectionLink(databaseName: String, collectionName: String) = "/dbs/%s/colls/%s".format(databaseName, collectionName)
|
||||
|
||||
def getDatabaseLink(databaseName: String) = "/dbs/%s".format(databaseName)
|
||||
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
class DocumentCollectionInfo(val uri: String, val masterKey: String, val databaseName: String, val collectionName: String) {
|
||||
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
import java.util.Properties
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster
|
||||
import org.apache.kafka.connect.runtime.distributed.DistributedConfig
|
||||
import org.apache.kafka.connect.runtime.{ConnectorConfig, WorkerConfig}
|
||||
|
||||
object Main {
|
||||
|
||||
var COSMOSDB_TOPIC: String = "test_topic_issue49"
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val workerProperties: Properties = getWorkerProperties(KafkaCluster.BrokersList.toString)
|
||||
val connectorProperties: Properties = getConnectorProperties()
|
||||
KafkaCluster.startEmbeddedConnect(workerProperties, List(connectorProperties))
|
||||
if (KafkaCluster.kafkaConnectEnabled) {
|
||||
println("Kafka Connector Enabled")
|
||||
}
|
||||
}
|
||||
|
||||
def getWorkerProperties(bootstrapServers: String): Properties = {
|
||||
val workerProperties: Properties = new Properties()
|
||||
workerProperties.put(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)
|
||||
workerProperties.put(DistributedConfig.GROUP_ID_CONFIG, "cosmosdb")
|
||||
workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-config")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-offset")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-status")
|
||||
workerProperties.put(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
|
||||
workerProperties.put(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
|
||||
workerProperties.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "30000")
|
||||
workerProperties.put(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_PARTITIONS_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
|
||||
return workerProperties
|
||||
}
|
||||
|
||||
def getConnectorProperties(): Properties = {
|
||||
val connectorProperties: Properties = new Properties()
|
||||
connectorProperties.put(ConnectorConfig.NAME_CONFIG, "CosmosDBSourceConnector")
|
||||
connectorProperties.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG , "com.microsoft.azure.cosmosdb.kafka.connect.source.CosmosDBSourceConnector")
|
||||
connectorProperties.put(ConnectorConfig.TASKS_MAX_CONFIG , "1")
|
||||
connectorProperties.put("connect.cosmosdb.connection.endpoint" , "https://localhost:8888")
|
||||
connectorProperties.put("connect.cosmosdb.master.key", "C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw==")
|
||||
connectorProperties.put("connect.cosmosdb.database" , "database")
|
||||
connectorProperties.put("connect.cosmosdb.collection" , "collection1")
|
||||
|
||||
// connectorProperties.put("connect.cosmosdb.connection.endpoint" , "https://dmcosmos.documents.azure.com:443")
|
||||
// connectorProperties.put("connect.cosmosdb.master.key", "YAopQ0edHWK9v8yV7IpCU1WzvFQkPvpHWDGmjhpXC0swlmibZgHkgqVDiTRG3abFM2PfYoWKPOVFjL7OTJOPsA==")
|
||||
// connectorProperties.put("connect.cosmosdb.database" , "kafka-connector")
|
||||
// connectorProperties.put("connect.cosmosdb.collection" , "source")
|
||||
|
||||
connectorProperties.put("connect.cosmosdb.topic.name" , COSMOSDB_TOPIC)
|
||||
connectorProperties.put(CosmosDBConfigConstants.BATCH_SIZE, "100")
|
||||
connectorProperties.put(CosmosDBConfigConstants.TIMEOUT, "1")
|
||||
connectorProperties.put(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR, "com.microsoft.azure.cosmosdb.kafka.connect.processor.source.SelectorSourcePostProcessor")
|
||||
|
||||
|
||||
|
||||
return connectorProperties
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
import java.util.concurrent.CountDownLatch
|
||||
|
||||
import com.microsoft.azure.cosmosdb.rx._
|
||||
import com.microsoft.azure.cosmosdb._
|
||||
|
||||
import scala.collection.JavaConversions._
|
||||
|
||||
class PartitionFeedReader(asyncClient: AsyncDocumentClient, databaseName: String, collectionName: String, partitionKeyRangeId: String, partitionFeedStateManager: PartitionLeaseStateManager, changeFeedProcessorOptions: ChangeFeedProcessorOptions) {
|
||||
|
||||
var partitionFeedState = partitionFeedStateManager.load(partitionKeyRangeId)
|
||||
|
||||
private def createChangeFeedOptionsFromState(): ChangeFeedOptions = {
|
||||
val changeFeedOptions = new ChangeFeedOptions()
|
||||
changeFeedOptions.setPartitionKeyRangeId(partitionKeyRangeId)
|
||||
changeFeedOptions.setMaxItemCount(changeFeedProcessorOptions.queryPartitionsMaxBatchSize)
|
||||
|
||||
partitionFeedState.continuationToken match {
|
||||
case null => changeFeedOptions.setStartFromBeginning(true)
|
||||
case "" => changeFeedOptions.setStartFromBeginning(true)
|
||||
case t => changeFeedOptions.setRequestContinuation(t)
|
||||
}
|
||||
|
||||
return changeFeedOptions
|
||||
}
|
||||
|
||||
def readChangeFeed(documentProcessor: List[String] => Unit, completionLatch: CountDownLatch) {
|
||||
val collectionLink = "/dbs/%s/colls/%s".format(databaseName, collectionName)
|
||||
val changeFeedOptions = createChangeFeedOptionsFromState()
|
||||
val changeFeedObservable = asyncClient.queryDocumentChangeFeed(collectionLink, changeFeedOptions)
|
||||
|
||||
changeFeedObservable
|
||||
// Process documents
|
||||
.doOnNext(feedResponse => {
|
||||
val documents = feedResponse.getResults().map(d => d.toJson()) // ready to send to Kafka
|
||||
documentProcessor(documents.toList) // callback passing the list of documents
|
||||
})
|
||||
// Logging
|
||||
.doOnNext(feedResponse => {
|
||||
println("Count: " + feedResponse.getResults().length)
|
||||
println("ResponseContinuation: " + feedResponse.getResponseContinuation())
|
||||
})
|
||||
// Save state ... save offset
|
||||
.flatMap(feedResponse => {
|
||||
println("Saving State!")
|
||||
val continuationToken = feedResponse.getResponseContinuation().replaceAll("^\"|\"$", "")
|
||||
partitionFeedState = new PartitionFeedState(partitionKeyRangeId, continuationToken)
|
||||
partitionFeedStateManager.save(partitionFeedState)
|
||||
})
|
||||
.subscribe(
|
||||
v => {}, // Every response - can have multiple documents
|
||||
e => completionLatch.countDown(), // when error
|
||||
() => completionLatch.countDown()) // final execution
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
class PartitionFeedState(val id: String, val continuationToken: String) {
|
||||
def this(id: String) = this(id, null)
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
import rx.{Observable, _}
|
||||
import com.microsoft.azure.cosmosdb.rx._
|
||||
import com.microsoft.azure.cosmosdb._
|
||||
import com.google.gson._
|
||||
|
||||
|
||||
class PartitionLeaseStateManager(asyncClient: AsyncDocumentClient, databaseName: String, collectionName: String) {
|
||||
|
||||
private val gson = new Gson()
|
||||
|
||||
def save(partitionFeedState: PartitionFeedState): Observable[ResourceResponse[Document]] = {
|
||||
val json = gson.toJson(partitionFeedState)
|
||||
val document = new Document(json)
|
||||
val collectionLink = DocumentClientBuilder.getCollectionLink(databaseName, collectionName)
|
||||
|
||||
val createDocumentObservable = asyncClient.upsertDocument(collectionLink, document, null, false)
|
||||
|
||||
return createDocumentObservable
|
||||
}
|
||||
|
||||
def load(partitionKeyRangeId: String): PartitionFeedState = {
|
||||
val collectionLink = DocumentClientBuilder.getCollectionLink(databaseName, collectionName)
|
||||
val querySpec = new SqlQuerySpec("SELECT * FROM " + collectionName + " where " + collectionName + ".id = @id",
|
||||
new SqlParameterCollection(
|
||||
new SqlParameter("@id", partitionKeyRangeId)
|
||||
))
|
||||
|
||||
val queryOptions = new FeedOptions()
|
||||
queryOptions.setEnableCrossPartitionQuery(true)
|
||||
|
||||
val queryFeedObservable = asyncClient.queryDocuments(collectionLink, querySpec, queryOptions)
|
||||
|
||||
try {
|
||||
val results = queryFeedObservable.toBlocking().single().getResults()
|
||||
val partitionFeedState = results.iterator().next().toJson()
|
||||
return gson.fromJson(partitionFeedState, classOf[PartitionFeedState])
|
||||
}
|
||||
catch {
|
||||
case error: Throwable => {
|
||||
System.err.println("Error when getting last state from partitionKeyRangeId. Details: " + error)
|
||||
return new PartitionFeedState(partitionKeyRangeId)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
import java.util
|
||||
import java.util.{Collections, Properties}
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster
|
||||
import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
|
||||
import org.apache.kafka.common.serialization.StringDeserializer
|
||||
|
||||
object SampleConsumer {
|
||||
|
||||
var COSMOSDB_TOPIC: String = "cosmosdb-source-topic"
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
try {
|
||||
|
||||
val properties = new Properties()
|
||||
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KafkaCluster.BrokersList)
|
||||
properties.put(ConsumerConfig.CLIENT_ID_CONFIG, "sample_debugger_consumer-01")
|
||||
properties.put(ConsumerConfig.GROUP_ID_CONFIG, "debugger_consumergroup")
|
||||
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true")
|
||||
properties.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000")
|
||||
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
|
||||
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
|
||||
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
|
||||
|
||||
val consumer = new KafkaConsumer[String, String](properties)
|
||||
|
||||
consumer.subscribe(Collections.singletonList(COSMOSDB_TOPIC))
|
||||
val documents = new util.ArrayList[String]
|
||||
while (true) {
|
||||
val records = consumer.poll(java.time.Duration.ofMillis(100))
|
||||
records.forEach(r => {
|
||||
val document = r.value()
|
||||
documents.add(document)
|
||||
})
|
||||
}
|
||||
}
|
||||
catch {
|
||||
case e: Exception => {
|
||||
println(s" Exception ${e.getMessage() }")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
package com.microsoft.azure;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Unit test for simple App.
|
||||
*/
|
||||
public class AppTest
|
||||
{
|
||||
/**
|
||||
* Rigorous Test :-)
|
||||
*/
|
||||
@Test
|
||||
public void shouldAnswerWithTrue()
|
||||
{
|
||||
assertTrue( true );
|
||||
}
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect
|
||||
import java.util.ArrayList
|
||||
import java.util.HashMap
|
||||
import java.util.concurrent.CountDownLatch
|
||||
|
||||
import com.microsoft.azure.cosmosdb.Document
|
||||
import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient
|
||||
import org.mockito.MockitoSugar.mock
|
||||
|
||||
|
||||
object MockCosmosDBProvider extends CosmosDBProvider {
|
||||
|
||||
var CosmosDBCollections: HashMap[String, ArrayList[Document]] = new HashMap[String, ArrayList[Document]]
|
||||
|
||||
def setupCollections[T](collectionNames: List[String]): Unit ={
|
||||
collectionNames.foreach(c => CosmosDBCollections.put(c, new ArrayList[Document]()))
|
||||
}
|
||||
|
||||
def getDocumentsByCollection(collectionName: String): ArrayList[Document] = {
|
||||
return CosmosDBCollections.get(collectionName)
|
||||
}
|
||||
|
||||
override def upsertDocuments[T](docs: List[T], databaseName: String, collectionName: String, completionLatch: CountDownLatch): Unit = {
|
||||
if(CosmosDBCollections.containsKey(collectionName)){
|
||||
docs.foreach(d => CosmosDBCollections.get(collectionName).add(d.asInstanceOf[Document]))
|
||||
}
|
||||
}
|
||||
|
||||
override def getClient(settings: CosmosDBClientSettings): AsyncDocumentClient = {
|
||||
return mock[AsyncDocumentClient]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect
|
||||
|
||||
import java.util
|
||||
import java.util.UUID.randomUUID
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.model.CosmosDBDocumentTest
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.source.{CosmosDBReader, CosmosDBSourceSettings}
|
||||
import java.util.{ArrayList, Properties, UUID}
|
||||
import java.util.UUID._
|
||||
|
||||
import com.google.gson.Gson
|
||||
import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient
|
||||
import org.apache.kafka.connect.source.{SourceRecord, SourceTaskContext}
|
||||
import org.apache.kafka.connect.storage.OffsetStorageReader
|
||||
import org.mockito.MockitoSugar.mock
|
||||
|
||||
|
||||
class MockCosmosDBReader (private val client: AsyncDocumentClient,
|
||||
override val setting: CosmosDBSourceSettings,
|
||||
private val context: SourceTaskContext) extends CosmosDBReader(client, setting,context) {
|
||||
|
||||
private val SOURCE_PARTITION_FIELD = "partition"
|
||||
private val SOURCE_OFFSET_FIELD = "changeFeedState"
|
||||
|
||||
override def processChanges(): util.List[SourceRecord] = {
|
||||
//Return a mock doc list
|
||||
|
||||
/* val records = new util.ArrayList[SourceRecord]
|
||||
val jsonFile = """{"id": "9","_rid": "tqZSAOCV8ekBAAAAAAAAAA==","_self": "dbs/tqZSAA==/colls/tqZSAOCV8ek=/docs/tqZSAOCV8ekBAAAAAAAAAA==/","_etag": "\"00000000-0000-0000-2bcf-cab592a001d5\"","_attachments": "attachments/","_ts": 1561519953}"""
|
||||
records.add(new SourceRecord(
|
||||
sourcePartition(setting.assignedPartition),
|
||||
sourceOffset(new Gson().toJson(1)),
|
||||
setting.topicName,
|
||||
null,
|
||||
jsonFile
|
||||
))*/
|
||||
return mock[util.ArrayList[SourceRecord]]
|
||||
|
||||
}
|
||||
private def sourcePartition(partition: String): util.Map[String, String] = {
|
||||
val map = new java.util.HashMap[String,String]
|
||||
map.put(SOURCE_PARTITION_FIELD, partition)
|
||||
return map
|
||||
}
|
||||
|
||||
private def sourceOffset(offset: String): util.Map[String, String] = {
|
||||
val map = new java.util.HashMap[String,String]
|
||||
map.put(SOURCE_OFFSET_FIELD, offset)
|
||||
return map
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler
|
||||
|
||||
import org.apache.kafka.connect.errors.{ConnectException, RetriableException}
|
||||
|
||||
import scala.util.{Failure, Try}
|
||||
import org.scalatest.WordSpec
|
||||
|
||||
|
||||
class HandleRetriableErrorTest extends WordSpec with HandleRetriableError {
|
||||
|
||||
initializeErrorHandler(10)
|
||||
|
||||
"should decrement number of retries" in {
|
||||
|
||||
intercept[RetriableException] {
|
||||
try {
|
||||
1 / 0
|
||||
} catch {
|
||||
case t: Throwable =>
|
||||
HandleRetriableError(Failure(t))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
initializeErrorHandler(0)
|
||||
"should throw ConnectException when retries = 0" in {
|
||||
|
||||
intercept[ConnectException] {
|
||||
try {
|
||||
1 / 0
|
||||
} catch {
|
||||
case t: Throwable =>
|
||||
HandleRetriableError(Failure(t))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.config
|
||||
|
||||
import org.apache.kafka.common.config.ConfigException
|
||||
import org.scalatest.{Matchers, WordSpec}
|
||||
|
||||
import collection.JavaConverters._
|
||||
|
||||
class CosmosDBConfigTest extends WordSpec with Matchers {
|
||||
"CosmosDBConfig" should {
|
||||
"throw an exception if endpoint not present" in {
|
||||
val map = Map(
|
||||
"foo" -> "f",
|
||||
).asJava
|
||||
|
||||
val caught = intercept[ConfigException] {
|
||||
CosmosDBConfig(ConnectorConfig.baseConfigDef, map)
|
||||
}
|
||||
|
||||
caught.getMessage should startWith(s"""Missing required configuration "${CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG}" """)
|
||||
}
|
||||
|
||||
"throw an exception if master key not present" in {
|
||||
val map = Map(
|
||||
CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> "f"
|
||||
).asJava
|
||||
|
||||
val caught = intercept[ConfigException] {
|
||||
CosmosDBConfig(ConnectorConfig.baseConfigDef, map)
|
||||
}
|
||||
|
||||
caught.getMessage should startWith(s"""Missing required configuration "${CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG}" """)
|
||||
}
|
||||
|
||||
"throw an exception if database not present" in {
|
||||
val map = Map(
|
||||
CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> "f",
|
||||
CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> "f",
|
||||
CosmosDBConfigConstants.COLLECTION_CONFIG -> "f",
|
||||
).asJava
|
||||
|
||||
val caught = intercept[ConfigException] {
|
||||
CosmosDBConfig(ConnectorConfig.baseConfigDef, map)
|
||||
}
|
||||
|
||||
caught.getMessage should startWith(s"""Missing required configuration "${CosmosDBConfigConstants.DATABASE_CONFIG}" """)
|
||||
}
|
||||
|
||||
"throw an exception if collection not present" in {
|
||||
val map = Map(
|
||||
CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> "f",
|
||||
CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> "f",
|
||||
CosmosDBConfigConstants.DATABASE_CONFIG -> "f",
|
||||
).asJava
|
||||
|
||||
val caught = intercept[ConfigException] {
|
||||
CosmosDBConfig(ConnectorConfig.baseConfigDef, map)
|
||||
}
|
||||
|
||||
caught.getMessage should startWith(s"""Missing required configuration "${CosmosDBConfigConstants.COLLECTION_CONFIG}" """)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,113 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.config
|
||||
|
||||
import java.util.Properties
|
||||
|
||||
import com.google.common.base.Strings
|
||||
import com.typesafe.config.ConfigFactory
|
||||
import org.apache.commons.lang3.StringUtils
|
||||
import org.apache.kafka.clients.producer.ProducerConfig
|
||||
import org.apache.kafka.connect.runtime.WorkerConfig
|
||||
import org.apache.kafka.connect.runtime.distributed.DistributedConfig
|
||||
|
||||
object TestConfigurations {
|
||||
|
||||
lazy private val config = ConfigFactory.load()
|
||||
lazy private val CosmosDBConfig = config.getConfig("CosmosDB")
|
||||
|
||||
// Replace ENDPOINT and MASTER_KEY with values from your Azure Cosmos DB account.
|
||||
// The default values are credentials of the local emulator, which are not used in any production environment.
|
||||
var ENDPOINT : String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("endpoint")), "https://localhost:8081/")
|
||||
var MASTER_KEY: String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("masterKey")), "C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw==")
|
||||
var DATABASE : String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("database")), "database")
|
||||
var SOURCE_COLLECTION : String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("collection")), "collection1")
|
||||
var SINK_COLLECTION : String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("collection")), "collection2")
|
||||
var TOPIC : String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("topic")), "topic_test")
|
||||
|
||||
def getSourceWorkerProperties(bootstrapServers: String): Properties = {
|
||||
val workerProperties: Properties = new Properties()
|
||||
workerProperties.put(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)
|
||||
workerProperties.put(DistributedConfig.GROUP_ID_CONFIG, "cosmosdb")
|
||||
workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-config")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-offset")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-status")
|
||||
workerProperties.put(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
|
||||
workerProperties.put(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
|
||||
workerProperties.put("value.converter.schemas.enable", "false")
|
||||
workerProperties.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "30000")
|
||||
workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-config")
|
||||
workerProperties.put(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_PARTITIONS_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
|
||||
return workerProperties
|
||||
}
|
||||
|
||||
def getSinkWorkerProperties(bootstrapServers: String): Properties = {
|
||||
val workerProperties: Properties = new Properties()
|
||||
workerProperties.put(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)
|
||||
workerProperties.put(DistributedConfig.GROUP_ID_CONFIG, "cosmosdb-01")
|
||||
workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-sink-config")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-sink-offset")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-sink-status")
|
||||
workerProperties.put(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
|
||||
workerProperties.put(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
|
||||
workerProperties.put("value.converter.schemas.enable", "false")
|
||||
workerProperties.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "30000")
|
||||
workerProperties.put(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_PARTITIONS_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
|
||||
return workerProperties
|
||||
}
|
||||
|
||||
def getSourceConnectorProperties(): Properties = {
|
||||
val connectorProperties: Properties = new Properties()
|
||||
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG, "CosmosDBSourceConnector")
|
||||
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG , "com.microsoft.azure.cosmosdb.kafka.connect.source.CosmosDBSourceConnector")
|
||||
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG , "1")
|
||||
connectorProperties.put(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG, ENDPOINT)
|
||||
connectorProperties.put(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG, MASTER_KEY)
|
||||
connectorProperties.put(CosmosDBConfigConstants.DATABASE_CONFIG, DATABASE)
|
||||
connectorProperties.put(CosmosDBConfigConstants.COLLECTION_CONFIG, SOURCE_COLLECTION)
|
||||
connectorProperties.put(CosmosDBConfigConstants.TOPIC_CONFIG, TOPIC)
|
||||
connectorProperties.put(CosmosDBConfigConstants.BATCH_SIZE, "10")
|
||||
connectorProperties.put(CosmosDBConfigConstants.READER_BUFFER_SIZE, "1000")
|
||||
connectorProperties.put(CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_CONFIG, "3")
|
||||
connectorProperties.put(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR, "com.microsoft.azure.cosmosdb.kafka.connect.processor.source.SelectorSourcePostProcessor")
|
||||
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.ERRORS_RETRY_TIMEOUT_CONFIG, "3")
|
||||
return connectorProperties
|
||||
}
|
||||
|
||||
def getSinkConnectorProperties(): Properties = {
|
||||
val connectorProperties: Properties = new Properties()
|
||||
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG, "CosmosDBSinkConnector")
|
||||
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG , "com.microsoft.azure.cosmosdb.kafka.connect.sink.CosmosDBSinkConnector")
|
||||
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG , "1")
|
||||
connectorProperties.put(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG, ENDPOINT)
|
||||
connectorProperties.put(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG, MASTER_KEY)
|
||||
connectorProperties.put(CosmosDBConfigConstants.DATABASE_CONFIG, DATABASE)
|
||||
connectorProperties.put(CosmosDBConfigConstants.COLLECTION_CONFIG, SINK_COLLECTION)
|
||||
connectorProperties.put(CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG, s"$SINK_COLLECTION#$TOPIC")
|
||||
connectorProperties.put("topics", TOPIC) // constant required by sink connector
|
||||
connectorProperties.put(CosmosDBConfigConstants.TOPIC_CONFIG, TOPIC )
|
||||
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.ERRORS_RETRY_TIMEOUT_CONFIG, "3")
|
||||
// connectorProperties.put(CosmosDBConfigConstants.SINK_POST_PROCESSOR, "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor")
|
||||
return connectorProperties
|
||||
}
|
||||
|
||||
def getProducerProperties(bootstrapServers: String): Properties = {
|
||||
val producerProperties: Properties = new Properties()
|
||||
producerProperties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)
|
||||
producerProperties.put(ProducerConfig.ACKS_CONFIG, "all")
|
||||
producerProperties.put(ProducerConfig.RETRIES_CONFIG, "3")
|
||||
producerProperties.put(ProducerConfig.BATCH_SIZE_CONFIG, "10")
|
||||
producerProperties.put(ProducerConfig.LINGER_MS_CONFIG, "1")
|
||||
producerProperties.put(ProducerConfig.BUFFER_MEMORY_CONFIG, "33554432")
|
||||
producerProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonSerializer")
|
||||
producerProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonSerializer")
|
||||
return producerProperties
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.model
|
||||
|
||||
|
||||
class Address(var city: String, var state: String) {
|
||||
|
||||
def setCity (city:String) {
|
||||
this.city = city
|
||||
}
|
||||
|
||||
def setAge (state:String) {
|
||||
this.state = state
|
||||
}
|
||||
|
||||
def getCity () : String = {
|
||||
city
|
||||
}
|
||||
|
||||
def getState () : String = {
|
||||
state
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.model
|
||||
|
||||
import java.util.UUID
|
||||
|
||||
class CosmosDBDocumentTest(var id: String, var message: String, var testID: UUID) {
|
||||
def getId(): String = {
|
||||
return id
|
||||
}
|
||||
|
||||
def getMessage(): String = {
|
||||
return message
|
||||
}
|
||||
|
||||
def getTestID(): UUID = {
|
||||
return testID
|
||||
}
|
||||
|
||||
def setId(id: String) = {
|
||||
this.id = id
|
||||
}
|
||||
|
||||
def setMessage(message: String) = {
|
||||
this.message = message
|
||||
}
|
||||
|
||||
def setTestID(testID: UUID) = {
|
||||
this.testID = testID
|
||||
}
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.model
|
||||
|
||||
import java.util.UUID
|
||||
|
||||
case class KafkaPayloadTest(
|
||||
id: String,
|
||||
message: String,
|
||||
testID: UUID,
|
||||
_rid: String,
|
||||
_self: String,
|
||||
_etag: String,
|
||||
_attachments: String,
|
||||
_ts: Long
|
||||
)
|
|
@ -0,0 +1,76 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.processor
|
||||
|
||||
import com.google.gson._
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, TestConfigurations}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.DocumentIdSinkPostProcessor
|
||||
import org.scalatest.{FlatSpec, GivenWhenThen}
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
class DocumentIdSinkPostProcessorTest extends FlatSpec with GivenWhenThen {
|
||||
|
||||
val sourceRecord: String =
|
||||
"""
|
||||
|{
|
||||
| "firstName": "John",
|
||||
| "lastName": "Smith"
|
||||
|}
|
||||
""".stripMargin
|
||||
|
||||
"'id' field" should "be created or replaced with value taken from specified field" in {
|
||||
|
||||
val expectedRecord =
|
||||
"""
|
||||
|{
|
||||
| "firstName": "John",
|
||||
| "lastName": "Smith",
|
||||
| "id": "John"
|
||||
|}
|
||||
""".stripMargin
|
||||
|
||||
Given("an existing field")
|
||||
val connectorProperties = TestConfigurations.getSourceConnectorProperties()
|
||||
connectorProperties.put("connect.cosmosdb.sink.post-processor.documentId.field", "firstName")
|
||||
val config = new CosmosDBConfig(ConnectorConfig.baseConfigDef, connectorProperties.asScala.asJava)
|
||||
|
||||
When("JSON document is processed")
|
||||
val jsonParser = new JsonParser()
|
||||
val json: JsonObject = jsonParser.parse(sourceRecord).getAsJsonObject
|
||||
val postProcessor = new DocumentIdSinkPostProcessor()
|
||||
postProcessor.configure(config)
|
||||
|
||||
Then("'id' is replaced with specified existing field value")
|
||||
val processed = postProcessor.runJsonPostProcess(json)
|
||||
val expected = jsonParser.parse(expectedRecord).getAsJsonObject
|
||||
assert(processed.equals(expected))
|
||||
}
|
||||
|
||||
"null 'id' field" should "be generated if requested field doesn't exists" in {
|
||||
|
||||
val expectedRecord =
|
||||
"""
|
||||
|{
|
||||
| "firstName": "John",
|
||||
| "lastName": "Smith",
|
||||
| "id": null
|
||||
|}
|
||||
""".stripMargin
|
||||
|
||||
Given("a non-existing field")
|
||||
val connectorProperties = TestConfigurations.getSourceConnectorProperties()
|
||||
connectorProperties.put("connect.cosmosdb.sink.post-processor.documentId.field", "notExists")
|
||||
val config = new CosmosDBConfig(ConnectorConfig.baseConfigDef, connectorProperties.asScala.asJava)
|
||||
|
||||
When("JSON document is processed")
|
||||
val jsonParser = new JsonParser()
|
||||
val json: JsonObject = jsonParser.parse(sourceRecord).getAsJsonObject
|
||||
val postProcessor = new DocumentIdSinkPostProcessor()
|
||||
postProcessor.configure(config)
|
||||
|
||||
Then("'id' is set to null")
|
||||
val processed = postProcessor.runJsonPostProcess(json)
|
||||
val expected = jsonParser.parse(expectedRecord).getAsJsonObject
|
||||
assert(processed.equals(expected))
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,151 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.processor
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import com.google.gson._
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, TestConfigurations}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.processor.source.SelectorSourcePostProcessor
|
||||
import org.scalatest.{FlatSpec, GivenWhenThen}
|
||||
|
||||
class SelectorPostProcessorTest extends FlatSpec with GivenWhenThen {
|
||||
|
||||
val sourceRecord: String =
|
||||
"""
|
||||
|{
|
||||
| "firstName": "John",
|
||||
| "lastName": "Smith",
|
||||
| "isAlive": true,
|
||||
| "age": 27,
|
||||
| "address": {
|
||||
| "streetAddress": "21 2nd Street",
|
||||
| "city": "New York",
|
||||
| "state": "NY",
|
||||
| "postalCode": "10021-3100"
|
||||
| },
|
||||
| "phoneNumbers": [
|
||||
| {
|
||||
| "type": "home",
|
||||
| "number": "212 555-1234"
|
||||
| },
|
||||
| {
|
||||
| "type": "office",
|
||||
| "number": "646 555-4567"
|
||||
| },
|
||||
| {
|
||||
| "type": "mobile",
|
||||
| "number": "123 456-7890"
|
||||
| }
|
||||
| ],
|
||||
| "children": [],
|
||||
| "spouse": null,
|
||||
| "id": "f355b7ff-e522-6906-c169-6d53e7ab046b",
|
||||
| "_rid": "tA4eAIlHRkMFAAAAAAAAAA==",
|
||||
| "_self": "dbs/tA4eAA==/colls/tA4eAIlHRkM=/docs/tA4eAIlHRkMFAAAAAAAAAA==/",
|
||||
| "_etag": "\"39022ddc-0000-0700-0000-5d094f610000\"",
|
||||
| "_attachments": "attachments/",
|
||||
| "_ts": 1560891233
|
||||
|}
|
||||
""".stripMargin
|
||||
|
||||
"Post Processor" should "remove configured fields" in {
|
||||
|
||||
val expectedRecord =
|
||||
"""
|
||||
|{
|
||||
| "firstName": "John",
|
||||
| "lastName": "Smith",
|
||||
| "isAlive": true,
|
||||
| "age": 27,
|
||||
| "address": {
|
||||
| "streetAddress": "21 2nd Street",
|
||||
| "city": "New York",
|
||||
| "state": "NY",
|
||||
| "postalCode": "10021-3100"
|
||||
| },
|
||||
| "phoneNumbers": [
|
||||
| {
|
||||
| "type": "home",
|
||||
| "number": "212 555-1234"
|
||||
| },
|
||||
| {
|
||||
| "type": "office",
|
||||
| "number": "646 555-4567"
|
||||
| },
|
||||
| {
|
||||
| "type": "mobile",
|
||||
| "number": "123 456-7890"
|
||||
| }
|
||||
| ],
|
||||
| "children": [],
|
||||
| "spouse": null
|
||||
|}
|
||||
""".stripMargin
|
||||
|
||||
Given("Post Processor configuration")
|
||||
val connectorProperties = TestConfigurations.getSourceConnectorProperties()
|
||||
connectorProperties.put("connect.cosmosdb.source.post-processor.selector.type", "Exclude")
|
||||
connectorProperties.put("connect.cosmosdb.source.post-processor.selector.fields", "id, _rid, _self, _etag, _attachments, _ts, _lsn, _metadata")
|
||||
val config = new CosmosDBConfig(ConnectorConfig.baseConfigDef, connectorProperties.asScala.asJava)
|
||||
|
||||
When("JSON document is processed")
|
||||
val jsonParser = new JsonParser()
|
||||
val json: JsonObject = jsonParser.parse(sourceRecord).getAsJsonObject
|
||||
val postProcessor = new SelectorSourcePostProcessor()
|
||||
postProcessor.configure(config)
|
||||
|
||||
Then("specified JSON properties are removed")
|
||||
val processed = postProcessor.runJsonPostProcess(json)
|
||||
val expected = jsonParser.parse(expectedRecord).getAsJsonObject
|
||||
assert(processed.equals(expected))
|
||||
}
|
||||
|
||||
"Post Processor" should "keep only configured fields" in {
|
||||
|
||||
val expectedRecord =
|
||||
"""
|
||||
|{
|
||||
| "firstName": "John",
|
||||
| "lastName": "Smith",
|
||||
| "address": {
|
||||
| "streetAddress": "21 2nd Street",
|
||||
| "city": "New York",
|
||||
| "state": "NY",
|
||||
| "postalCode": "10021-3100"
|
||||
| },
|
||||
| "phoneNumbers": [
|
||||
| {
|
||||
| "type": "home",
|
||||
| "number": "212 555-1234"
|
||||
| },
|
||||
| {
|
||||
| "type": "office",
|
||||
| "number": "646 555-4567"
|
||||
| },
|
||||
| {
|
||||
| "type": "mobile",
|
||||
| "number": "123 456-7890"
|
||||
| }
|
||||
| ],
|
||||
| "children": [],
|
||||
| "spouse": null
|
||||
|}
|
||||
""".stripMargin
|
||||
|
||||
Given("Post Processor configuration")
|
||||
val connectorProperties = TestConfigurations.getSourceConnectorProperties()
|
||||
connectorProperties.put("connect.cosmosdb.source.post-processor.selector.type", "Include")
|
||||
connectorProperties.put("connect.cosmosdb.source.post-processor.selector.fields", "firstName, lastName, address, phoneNumbers, children, spouse")
|
||||
val config = new CosmosDBConfig(ConnectorConfig.baseConfigDef, connectorProperties.asScala.asJava)
|
||||
|
||||
When("JSON document is processed")
|
||||
val jsonParser = new JsonParser()
|
||||
val json: JsonObject = jsonParser.parse(sourceRecord).getAsJsonObject
|
||||
val postProcessor = new SelectorSourcePostProcessor()
|
||||
postProcessor.configure(config)
|
||||
|
||||
Then("only specified JSON properties are kept")
|
||||
val processed = postProcessor.runJsonPostProcess(json)
|
||||
val expected = jsonParser.parse(expectedRecord).getAsJsonObject
|
||||
assert(processed.equals(expected))
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.processor
|
||||
|
||||
import java.util.Properties
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster
|
||||
import org.apache.kafka.connect.runtime.WorkerConfig
|
||||
import org.apache.kafka.connect.runtime.distributed.DistributedConfig
|
||||
|
||||
// TODO: This should be removed from here and refactored into an Integration Test
|
||||
|
||||
object SinkPostProcessorTest {
|
||||
|
||||
var COSMOSDB_TOPIC: String = "cosmosdb-source-topic"
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val workerProperties: Properties = getWorkerProperties(KafkaCluster.BrokersList.toString)
|
||||
val connectorProperties: Properties = getConnectorProperties()
|
||||
|
||||
// Add Sink Post Processors
|
||||
val postProcessors =
|
||||
"com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.DocumentIdSinkPostProcessor" ::
|
||||
"com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor" ::
|
||||
"com.microsoft.azure.cosmosdb.kafka.connect.processor.SampleConsoleWriterPostProcessor" ::
|
||||
Nil
|
||||
connectorProperties.put(CosmosDBConfigConstants.SINK_POST_PROCESSOR, postProcessors.mkString(","))
|
||||
|
||||
// Configure Sink Post Processor
|
||||
connectorProperties.put("connect.cosmosdb.sink.post-processor.selector.type", "Include")
|
||||
connectorProperties.put("connect.cosmosdb.sink.post-processor.selector.fields", "id, firstName, lastName, age, address, children, spouse")
|
||||
connectorProperties.put("connect.cosmosdb.sink.post-processor.documentId.field", "lastName")
|
||||
|
||||
KafkaCluster.startEmbeddedConnect(workerProperties, List(connectorProperties))
|
||||
if (KafkaCluster.kafkaConnectEnabled) {
|
||||
println("Kafka Connector Enabled")
|
||||
}
|
||||
}
|
||||
|
||||
def getWorkerProperties(bootstrapServers: String): Properties = {
|
||||
val workerProperties: Properties = new Properties()
|
||||
|
||||
workerProperties.put(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)
|
||||
workerProperties.put(DistributedConfig.GROUP_ID_CONFIG, "cosmosdb-01")
|
||||
workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-sink-config")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-sink-offset")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-sink-status")
|
||||
workerProperties.put(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
|
||||
workerProperties.put(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
|
||||
workerProperties.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "30000")
|
||||
workerProperties.put(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_PARTITIONS_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
|
||||
|
||||
workerProperties
|
||||
}
|
||||
|
||||
def getConnectorProperties(): Properties = {
|
||||
val connectorProperties = TestConfigurations.getSinkConnectorProperties()
|
||||
|
||||
connectorProperties.put(CosmosDBConfigConstants.COLLECTION_CONFIG, "destination")
|
||||
connectorProperties.put(CosmosDBConfigConstants.TOPIC_CONFIG, COSMOSDB_TOPIC)
|
||||
connectorProperties.put("topics", COSMOSDB_TOPIC)
|
||||
connectorProperties.put(CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_CONFIG, "3")
|
||||
|
||||
|
||||
connectorProperties
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.processor
|
||||
|
||||
import java.util.Properties
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster
|
||||
import org.apache.kafka.connect.runtime.WorkerConfig
|
||||
import org.apache.kafka.connect.runtime.distributed.DistributedConfig
|
||||
import org.scalatest.{FlatSpec, GivenWhenThen}
|
||||
|
||||
// TODO: This should be removed from here and refactored into an Integration Test
|
||||
|
||||
object SourcePostProcessorTest {
|
||||
|
||||
var COSMOSDB_TOPIC: String = "cosmosdb-source-topic"
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
val workerProperties: Properties = getWorkerProperties(KafkaCluster.BrokersList.toString)
|
||||
val connectorProperties: Properties = getConnectorProperties()
|
||||
|
||||
// Add Source Post Processors
|
||||
val postProcessors =
|
||||
"com.microsoft.azure.cosmosdb.kafka.connect.processor.source.SelectorSourcePostProcessor" ::
|
||||
"com.microsoft.azure.cosmosdb.kafka.connect.processor.SampleConsoleWriterPostProcessor" ::
|
||||
Nil
|
||||
connectorProperties.put(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR, postProcessors.mkString(","))
|
||||
|
||||
// Configure Source Post Processor
|
||||
connectorProperties.put("connect.cosmosdb.source.post-processor.selector.type", "Exclude")
|
||||
connectorProperties.put("connect.cosmosdb.source.post-processor.selector.fields", "id, _rid, _self, _etag, _attachments, _ts, _lsn, _metadata")
|
||||
|
||||
//connectorProperties.put("connect.cosmosdb.source.post-processor.selector.type", "Include")
|
||||
//connectorProperties.put("connect.cosmosdb.source.post-processor.selector.fields", "id, firstName, lastName, age")
|
||||
|
||||
// Run Embedded Kafka Cluster
|
||||
KafkaCluster.startEmbeddedConnect(workerProperties, List(connectorProperties))
|
||||
if (KafkaCluster.kafkaConnectEnabled) {
|
||||
println("Kafka Connector Enabled")
|
||||
}
|
||||
}
|
||||
|
||||
def getWorkerProperties(bootstrapServers: String): Properties = {
|
||||
val workerProperties: Properties = new Properties()
|
||||
|
||||
workerProperties.put(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)
|
||||
workerProperties.put(DistributedConfig.GROUP_ID_CONFIG, "cosmosdb")
|
||||
workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-config")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-offset")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-status")
|
||||
workerProperties.put(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
|
||||
workerProperties.put(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
|
||||
workerProperties.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "30000")
|
||||
workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-config")
|
||||
workerProperties.put(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-offset")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-status")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_PARTITIONS_CONFIG, "1")
|
||||
workerProperties.put(DistributedConfig.STATUS_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
|
||||
|
||||
workerProperties
|
||||
}
|
||||
|
||||
def getConnectorProperties(): Properties = {
|
||||
val connectorProperties = TestConfigurations.getSourceConnectorProperties()
|
||||
|
||||
connectorProperties.put(CosmosDBConfigConstants.COLLECTION_CONFIG, "source")
|
||||
connectorProperties.put(CosmosDBConfigConstants.TOPIC_CONFIG, COSMOSDB_TOPIC)
|
||||
connectorProperties.put("topics", COSMOSDB_TOPIC)
|
||||
|
||||
connectorProperties
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.provider
|
||||
|
||||
import java.util.concurrent.CountDownLatch
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProviderImpl}
|
||||
import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel}
|
||||
import com.typesafe.scalalogging.LazyLogging
|
||||
import org.apache.kafka.connect.errors.ConnectException
|
||||
import org.scalatest.{FlatSpec, GivenWhenThen}
|
||||
|
||||
import scala.util.{Failure, Success, Try}
|
||||
|
||||
class CosmosDBProviderImplTest extends FlatSpec with GivenWhenThen with LazyLogging {
|
||||
|
||||
"CosmosDBProviderTest" should "read collection with a given name" in {
|
||||
Given("A collection name")
|
||||
val clientSettings = CosmosDBClientSettings(
|
||||
TestConfigurations.ENDPOINT,
|
||||
TestConfigurations.MASTER_KEY,
|
||||
TestConfigurations.DATABASE,
|
||||
TestConfigurations.SOURCE_COLLECTION,
|
||||
ConnectionPolicy.GetDefault(),
|
||||
ConsistencyLevel.Session
|
||||
)
|
||||
val client = Try(CosmosDBProviderImpl.getClient(clientSettings)) match {
|
||||
case Success(conn) =>
|
||||
logger.info("Connection to CosmosDB established.")
|
||||
conn
|
||||
case Failure(f) => throw new ConnectException(s"Couldn't connect to CosmosDB.", f)
|
||||
}
|
||||
|
||||
When("Call CosmosDB readcollection")
|
||||
logger.info("readCollection in CosmosDB .")
|
||||
|
||||
val docCollQry = CosmosDBProviderImpl.queryCollection(TestConfigurations.DATABASE, TestConfigurations.SOURCE_COLLECTION, new CountDownLatch(1)).toBlocking.single
|
||||
logger.info(docCollQry.getResults.size.toString)
|
||||
|
||||
Then(s"Verify collection of messages is equal to inserted")
|
||||
assert(docCollQry.getResults.size != 0)
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.sink
|
||||
|
||||
import com.google.common.collect.Maps
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations}
|
||||
import org.apache.kafka.connect.runtime.ConnectorConfig
|
||||
import org.scalatest.{FlatSpec, GivenWhenThen}
|
||||
|
||||
|
||||
class CosmosDBSinkConnectorTest extends FlatSpec with GivenWhenThen {
|
||||
"CosmosDBSinkConnector" should "Validate all input properties and generate right set of task config properties" in {
|
||||
Given("Valid set of input properties")
|
||||
val props = TestConfigurations.getSinkConnectorProperties()
|
||||
val connector = new CosmosDBSinkConnector
|
||||
When("Start and TaskConfig are called in right order")
|
||||
connector.start(Maps.fromProperties(props))
|
||||
val numTasks = props.getProperty(ConnectorConfig.TASKS_MAX_CONFIG).toInt
|
||||
val taskConfigs = connector.taskConfigs(numTasks)
|
||||
|
||||
Then("The TaskConfigs have all the expected properties")
|
||||
assert(taskConfigs.size() == numTasks)
|
||||
for (i <- 0 until numTasks) {
|
||||
val taskConfig: java.util.Map[String, String] = taskConfigs.get(i)
|
||||
assert(taskConfig.containsKey(ConnectorConfig.NAME_CONFIG))
|
||||
assert(taskConfig.containsKey(ConnectorConfig.CONNECTOR_CLASS_CONFIG))
|
||||
assert(taskConfig.containsKey(ConnectorConfig.TASKS_MAX_CONFIG))
|
||||
assert(taskConfig.containsKey(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG))
|
||||
assert(taskConfig.containsKey(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG))
|
||||
assert(taskConfig.containsKey(CosmosDBConfigConstants.DATABASE_CONFIG))
|
||||
assert(taskConfig.containsKey(CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG))
|
||||
assert(taskConfig.containsKey(CosmosDBConfigConstants.COLLECTION_CONFIG))
|
||||
assert(taskConfig.containsKey(CosmosDBConfigConstants.TOPIC_CONFIG))
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.sink
|
||||
|
||||
import java.util.Properties
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster
|
||||
|
||||
object SinkConnectWriterTest {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val workerProperties: Properties = TestConfigurations.getSinkWorkerProperties(KafkaCluster.BrokersList.toString)
|
||||
val connectorProperties: Properties = TestConfigurations.getSinkConnectorProperties()
|
||||
KafkaCluster.startEmbeddedConnect(workerProperties, List(connectorProperties))
|
||||
if (KafkaCluster.kafkaConnectEnabled) {
|
||||
println("Kafka Connector Enabled")
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,156 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.sink
|
||||
|
||||
import java.util.ArrayList
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.MockCosmosDBProvider
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations.{DATABASE, ENDPOINT, MASTER_KEY}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants
|
||||
import org.apache.kafka.connect.data.Schema
|
||||
import org.apache.kafka.connect.sink.SinkRecord
|
||||
import org.scalatest.{FlatSpec, GivenWhenThen}
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.collection.mutable
|
||||
|
||||
|
||||
class CosmosDBSinkTaskTest extends FlatSpec with GivenWhenThen {
|
||||
|
||||
val PARTITION = 0
|
||||
|
||||
private val TOPIC = "topic"
|
||||
private val TOPIC_2 = "topic2"
|
||||
private val TOPIC_3 = "topic3"
|
||||
private val TOPIC_4 = "topic4"
|
||||
private val TOPIC_5 = "topic5"
|
||||
|
||||
private val COLLECTION = "collection"
|
||||
private val COLLECTION_2 = "collection2"
|
||||
private val COLLECTION_3 = "collection3"
|
||||
|
||||
|
||||
"CosmosDBSinkConnector start" should "Populate a simple collection topic map according to the configuration in settings" in {
|
||||
Given("A Cosmos DB Provider and settings with a collection topic mapping")
|
||||
val mockCosmosProvider = MockCosmosDBProvider
|
||||
mockCosmosProvider.setupCollections(List(COLLECTION))
|
||||
|
||||
val sinkTask = new CosmosDBSinkTask { override val cosmosDBProvider = mockCosmosProvider }
|
||||
val map = Map(
|
||||
org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG -> "CosmosDBSinkConnector",
|
||||
org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG -> "com.microsoft.azure.cosmosdb.kafka.connect.sink.CosmosDBSinkConnector",
|
||||
org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG -> "1",
|
||||
CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> ENDPOINT,
|
||||
CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> MASTER_KEY,
|
||||
CosmosDBConfigConstants.DATABASE_CONFIG -> DATABASE,
|
||||
CosmosDBConfigConstants.COLLECTION_CONFIG -> s"$COLLECTION",
|
||||
CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG -> s"$COLLECTION#$TOPIC",
|
||||
"topics" -> s"$TOPIC",
|
||||
CosmosDBConfigConstants.TOPIC_CONFIG -> s"$TOPIC",
|
||||
CosmosDBConfigConstants.SINK_POST_PROCESSOR -> "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor"
|
||||
).asJava
|
||||
|
||||
When("The sink task is started")
|
||||
sinkTask.start(map)
|
||||
|
||||
Then("The collection topic map should contain the proper mapping")
|
||||
val expectedMap = mutable.HashMap[String, String](TOPIC -> COLLECTION)
|
||||
assert(sinkTask.collectionTopicMap == expectedMap)
|
||||
}
|
||||
|
||||
|
||||
"CosmosDBSinkConnector start" should "Populate a complex collection topic map according to the configuration in settings" in {
|
||||
Given("A Cosmos DB Provider and settings with a collection topic mapping")
|
||||
val mockCosmosProvider = MockCosmosDBProvider
|
||||
mockCosmosProvider.setupCollections(List(COLLECTION))
|
||||
|
||||
val sinkTask = new CosmosDBSinkTask { override val cosmosDBProvider = mockCosmosProvider }
|
||||
val map = Map(
|
||||
org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG -> "CosmosDBSinkConnector",
|
||||
org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG -> "com.microsoft.azure.cosmosdb.kafka.connect.sink.CosmosDBSinkConnector",
|
||||
org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG -> "1",
|
||||
CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> ENDPOINT,
|
||||
CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> MASTER_KEY,
|
||||
CosmosDBConfigConstants.DATABASE_CONFIG -> DATABASE,
|
||||
CosmosDBConfigConstants.COLLECTION_CONFIG -> s"$COLLECTION,$COLLECTION_2,$COLLECTION_3",
|
||||
CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG -> s"$COLLECTION#$TOPIC,$COLLECTION#$TOPIC_2,$COLLECTION_2#$TOPIC_3,$COLLECTION_3#$TOPIC_4,$COLLECTION_3#$TOPIC_5",
|
||||
"topics" -> s"$TOPIC,$TOPIC_2,$TOPIC_3,$TOPIC_4,$TOPIC_5",
|
||||
CosmosDBConfigConstants.TOPIC_CONFIG -> s"$TOPIC,$TOPIC_2,$TOPIC_3,$TOPIC_4,$TOPIC_5",
|
||||
CosmosDBConfigConstants.SINK_POST_PROCESSOR -> "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor"
|
||||
).asJava
|
||||
|
||||
When("The sink task is started")
|
||||
sinkTask.start(map)
|
||||
|
||||
Then("The collection topic map should contain the proper mapping")
|
||||
val expectedMap = mutable.HashMap[String, String](TOPIC -> COLLECTION,
|
||||
TOPIC_2 -> COLLECTION,
|
||||
TOPIC_3 -> COLLECTION_2,
|
||||
TOPIC_4 -> COLLECTION_3,
|
||||
TOPIC_5 -> COLLECTION_3)
|
||||
assert(sinkTask.collectionTopicMap == expectedMap)
|
||||
}
|
||||
|
||||
|
||||
"CosmosDBSinkConnector start" should "Populate the collection topic map with collection name as topic name if no config is given" in {
|
||||
Given("A Cosmos DB Provider and settings without a collection topic mapping")
|
||||
val mockCosmosProvider = MockCosmosDBProvider
|
||||
mockCosmosProvider.setupCollections(List(COLLECTION))
|
||||
|
||||
val sinkTask = new CosmosDBSinkTask { override val cosmosDBProvider = mockCosmosProvider }
|
||||
val map = Map(
|
||||
org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG -> "CosmosDBSinkConnector",
|
||||
org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG -> "com.microsoft.azure.cosmosdb.kafka.connect.sink.CosmosDBSinkConnector",
|
||||
org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG -> "1",
|
||||
CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> ENDPOINT,
|
||||
CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> MASTER_KEY,
|
||||
CosmosDBConfigConstants.DATABASE_CONFIG -> DATABASE,
|
||||
CosmosDBConfigConstants.COLLECTION_CONFIG -> "",
|
||||
CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG -> "",
|
||||
"topics" -> s"$TOPIC,$TOPIC_2",
|
||||
CosmosDBConfigConstants.TOPIC_CONFIG -> s"$TOPIC,$TOPIC_2",
|
||||
CosmosDBConfigConstants.SINK_POST_PROCESSOR -> "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor"
|
||||
).asJava
|
||||
|
||||
When("The sink task is started")
|
||||
sinkTask.start(map)
|
||||
|
||||
Then("The collection topic map should contain the proper mapping")
|
||||
val expectedMap = mutable.HashMap[String, String](TOPIC -> TOPIC,
|
||||
TOPIC_2 -> TOPIC_2)
|
||||
assert(sinkTask.collectionTopicMap == expectedMap)
|
||||
}
|
||||
|
||||
|
||||
"CosmosDBSinkConnector put" should "Write records from topics in the proper collections according to the map" in {
|
||||
Given("A Cosmos DB Provider and a configured Cosmos DB Collection")
|
||||
val mockCosmosProvider = MockCosmosDBProvider
|
||||
mockCosmosProvider.setupCollections(List(COLLECTION))
|
||||
|
||||
val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"message1 payload\"}", 0)
|
||||
val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"message2 payload\"}", 0)
|
||||
val records = new ArrayList[SinkRecord]
|
||||
records.add(record1)
|
||||
records.add(record2)
|
||||
|
||||
val sinkTask = new CosmosDBSinkTask { override val cosmosDBProvider = mockCosmosProvider }
|
||||
val map = Map(
|
||||
org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG -> "CosmosDBSinkConnector",
|
||||
org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG -> "com.microsoft.azure.cosmosdb.kafka.connect.sink.CosmosDBSinkConnector",
|
||||
org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG -> "1",
|
||||
CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> ENDPOINT,
|
||||
CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> MASTER_KEY,
|
||||
CosmosDBConfigConstants.DATABASE_CONFIG -> DATABASE,
|
||||
CosmosDBConfigConstants.COLLECTION_CONFIG -> COLLECTION,
|
||||
CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG -> s"$COLLECTION#$TOPIC",
|
||||
"topics" -> TOPIC,
|
||||
CosmosDBConfigConstants.TOPIC_CONFIG -> TOPIC,
|
||||
CosmosDBConfigConstants.SINK_POST_PROCESSOR -> "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor"
|
||||
).asJava
|
||||
sinkTask.start(map)
|
||||
|
||||
When("Records are passed to the put method")
|
||||
sinkTask.put(records)
|
||||
|
||||
Then("The Cosmos DB collection should contain all of the records")
|
||||
val documents = mockCosmosProvider.getDocumentsByCollection(COLLECTION)
|
||||
assert(documents.size == 2)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,216 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.sink
|
||||
|
||||
import java.util.ArrayList
|
||||
import com.microsoft.azure.cosmosdb.Document
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations.{DATABASE, ENDPOINT, MASTER_KEY}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.MockCosmosDBProvider
|
||||
import org.apache.kafka.connect.data.Schema
|
||||
import org.apache.kafka.connect.sink.SinkRecord
|
||||
import org.scalatest.{FlatSpec, GivenWhenThen}
|
||||
|
||||
import java.util
|
||||
import scala.collection.mutable.HashMap
|
||||
|
||||
class CosmosDBWriterTest extends FlatSpec with GivenWhenThen {
|
||||
|
||||
private val PARTITION = 0
|
||||
|
||||
private val TOPIC = "topic"
|
||||
private val TOPIC_2 = "topic2"
|
||||
private val TOPIC_3 = "topic3"
|
||||
private val TOPIC_4 = "topic4"
|
||||
private val TOPIC_5 = "topic5"
|
||||
|
||||
private val COLLECTION = "collection"
|
||||
private val COLLECTION_2 = "collection2"
|
||||
private val COLLECTION_3 = "collection3"
|
||||
|
||||
// NOTE: All schemas are sent as null during testing because we are not currently enforcing them.
|
||||
// We simply need to validate the presence of the schema object doesn't break the writer.
|
||||
"CosmosDBWriter write" should "Write records formatted as a raw json string with schema" in {
|
||||
Given("A Cosmos DB Provider, a configured Cosmos DB Collection and sample Sink Records")
|
||||
|
||||
// Instantiate the MockCosmosDBProvider and Setup the Collections
|
||||
val mockCosmosProvider = MockCosmosDBProvider
|
||||
mockCosmosProvider.setupCollections(List(COLLECTION))
|
||||
|
||||
// Map the Topic and Collections
|
||||
val collectionTopicMap: HashMap[String, String] = HashMap[String, String]((TOPIC, COLLECTION))
|
||||
|
||||
// Set up Writer
|
||||
val setting = new CosmosDBSinkSettings(ENDPOINT, MASTER_KEY, DATABASE, collectionTopicMap)
|
||||
val writer = new CosmosDBWriter(setting, mockCosmosProvider)
|
||||
|
||||
// Create sample SinkRecords
|
||||
val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"schema\": \"null\", \"payload\": {\"message\": \"message1 payload\"}}", 0)
|
||||
val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"schema\": \"null\", \"payload\": {\"message\": \"message2 payload\"}}", 0)
|
||||
|
||||
When("Records are passed to the write method")
|
||||
writer.write(Seq(record1, record2))
|
||||
|
||||
Then("The Cosmos DB collection should contain all of the records")
|
||||
val documents: ArrayList[Document] = mockCosmosProvider.getDocumentsByCollection(COLLECTION)
|
||||
assert(documents.size == 2)
|
||||
|
||||
// Check the schema wasn't written with the payload
|
||||
assert(documents.get(0).get("schema") == null)
|
||||
assert(documents.get(1).get("schema") == null)
|
||||
assert(documents.get(0).get("message") == "message1 payload")
|
||||
assert(documents.get(1).get("message") == "message2 payload")
|
||||
}
|
||||
|
||||
|
||||
"CosmosDBWriter write" should "Write records formatted as a raw json string without schema" in {
|
||||
Given("A Cosmos DB Provider, a configured Cosmos DB Collection and sample Sink Records")
|
||||
|
||||
// Instantiate the MockCosmosDBProvider and Setup the Collections
|
||||
val mockCosmosProvider = MockCosmosDBProvider
|
||||
mockCosmosProvider.setupCollections(List(COLLECTION))
|
||||
|
||||
// Map the Topic and Collections
|
||||
val collectionTopicMap: HashMap[String, String] = HashMap[String, String]((TOPIC, COLLECTION))
|
||||
|
||||
// Set up Writer
|
||||
val setting = new CosmosDBSinkSettings(ENDPOINT, MASTER_KEY, DATABASE, collectionTopicMap)
|
||||
val writer = new CosmosDBWriter(setting, mockCosmosProvider)
|
||||
|
||||
// Create sample SinkRecords
|
||||
val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"message1 payload\"}", 0)
|
||||
val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"message2 payload\"}", 0)
|
||||
|
||||
When("Records are passed to the write method")
|
||||
writer.write(Seq(record1, record2))
|
||||
|
||||
Then("The Cosmos DB collection should contain all of the records")
|
||||
val documents = mockCosmosProvider.getDocumentsByCollection(COLLECTION)
|
||||
|
||||
assert(documents.size == 2)
|
||||
assert(documents.get(0).get("message") == "message1 payload")
|
||||
assert(documents.get(1).get("message") == "message2 payload")
|
||||
}
|
||||
|
||||
|
||||
"CosmosDBWriter write" should "Write records formatted as hash map without schema" in {
|
||||
Given("A Cosmos DB Provider, a configured Cosmos DB Collection and sample Sink Records")
|
||||
|
||||
// Instantiate the MockCosmosDBProvider and Setup the Collections
|
||||
val mockCosmosProvider = MockCosmosDBProvider
|
||||
mockCosmosProvider.setupCollections(List(COLLECTION))
|
||||
|
||||
// Map the Topic and Collections
|
||||
val collectionTopicMap: HashMap[String, String] = HashMap[String, String]((TOPIC, COLLECTION))
|
||||
|
||||
// Set up Writer
|
||||
val setting = new CosmosDBSinkSettings(ENDPOINT, MASTER_KEY, DATABASE, collectionTopicMap)
|
||||
val writer = new CosmosDBWriter(setting, mockCosmosProvider)
|
||||
|
||||
// Create sample SinkRecords
|
||||
val payload1= new util.HashMap[String, String]()
|
||||
payload1.put("message", "message1 payload")
|
||||
val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, null, payload1, 0)
|
||||
|
||||
val payload2= new util.HashMap[String, String]()
|
||||
payload2.put("message", "message2 payload")
|
||||
val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, null, payload2, 0)
|
||||
|
||||
When("Records are passed to the write method")
|
||||
writer.write(Seq(record1, record2))
|
||||
|
||||
Then("The Cosmos DB collection should contain all of the records")
|
||||
val documents = mockCosmosProvider.getDocumentsByCollection(COLLECTION)
|
||||
|
||||
assert(documents.size == 2)
|
||||
assert(documents.get(0).get("message") == "message1 payload")
|
||||
assert(documents.get(1).get("message") == "message2 payload")
|
||||
}
|
||||
|
||||
|
||||
"CosmosDBWriter write" should "Write records formatted as hash map with schema" in {
|
||||
Given("A Cosmos DB Provider, a configured Cosmos DB Collection and sample Sink Records")
|
||||
|
||||
// Instantiate the MockCosmosDBProvider and Setup the Collections
|
||||
val mockCosmosProvider = MockCosmosDBProvider
|
||||
mockCosmosProvider.setupCollections(List(COLLECTION))
|
||||
|
||||
// Map the Topic and Collections
|
||||
val collectionTopicMap: HashMap[String, String] = HashMap[String, String]((TOPIC, COLLECTION))
|
||||
|
||||
// Set up Writer
|
||||
val setting = new CosmosDBSinkSettings(ENDPOINT, MASTER_KEY, DATABASE, collectionTopicMap)
|
||||
val writer = new CosmosDBWriter(setting, mockCosmosProvider)
|
||||
|
||||
// Create sample SinkRecords
|
||||
val payload1 = new util.HashMap[String, String]()
|
||||
payload1.put("message", "message1 payload")
|
||||
val map1 = new util.HashMap[String, util.HashMap[String, String]]()
|
||||
map1.put("schema", null)
|
||||
map1.put("payload", payload1)
|
||||
val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, null, map1, 0)
|
||||
|
||||
val payload2 = new util.HashMap[String, String]()
|
||||
payload2.put("message", "message2 payload")
|
||||
val map2 = new util.HashMap[String, util.HashMap[String, String]]()
|
||||
map2.put("schema", null)
|
||||
map2.put("payload", payload2)
|
||||
val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, null, map2, 0)
|
||||
|
||||
When("Records are passed to the write method")
|
||||
writer.write(Seq(record1, record2))
|
||||
|
||||
Then("The Cosmos DB collection should contain all of the records")
|
||||
val documents = mockCosmosProvider.getDocumentsByCollection(COLLECTION)
|
||||
|
||||
assert(documents.size == 2)
|
||||
|
||||
// Check the schema wasn't written with the payload
|
||||
assert(documents.get(0).get("schema") == null)
|
||||
assert(documents.get(1).get("schema") == null)
|
||||
assert(documents.get(0).get("message") == "message1 payload")
|
||||
assert(documents.get(1).get("message") == "message2 payload")
|
||||
}
|
||||
|
||||
|
||||
"CosmosDBWriter write" should "Write records in the proper collections according to a complex map" in {
|
||||
Given("A Cosmos DB Provider, a configured Cosmos DB Collection and sample Sink Records")
|
||||
|
||||
// Instantiate the MockCosmosDBProvider and Setup the Collections
|
||||
val mockCosmosProvider = MockCosmosDBProvider
|
||||
mockCosmosProvider.setupCollections(List(COLLECTION,COLLECTION_2,COLLECTION_3))
|
||||
|
||||
// Map the Topic and Collections
|
||||
val collectionTopicMap: HashMap[String, String] = HashMap[String, String]((TOPIC, COLLECTION),
|
||||
(TOPIC_2, COLLECTION),
|
||||
(TOPIC_3, COLLECTION_2),
|
||||
(TOPIC_4, COLLECTION_3),
|
||||
(TOPIC_5, COLLECTION_3))
|
||||
|
||||
// Set up Writer
|
||||
val setting = new CosmosDBSinkSettings(ENDPOINT, MASTER_KEY, DATABASE, collectionTopicMap)
|
||||
val writer = new CosmosDBWriter(setting, mockCosmosProvider)
|
||||
|
||||
// Create sample SinkRecords
|
||||
val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic payload\"}", 0)
|
||||
val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic payload\"}", 0)
|
||||
val record3 = new SinkRecord(TOPIC_2, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic2 payload\"}", 0)
|
||||
val record4 = new SinkRecord(TOPIC_2, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic2 payload\"}", 0)
|
||||
val record5 = new SinkRecord(TOPIC_3, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic3 payload\"}", 0)
|
||||
val record6 = new SinkRecord(TOPIC_3, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic3 payload\"}", 0)
|
||||
val record7 = new SinkRecord(TOPIC_4, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic4 payload\"}", 0)
|
||||
val record8 = new SinkRecord(TOPIC_4, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic4 payload\"}", 0)
|
||||
val record9 = new SinkRecord(TOPIC_5, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic5 payload\"}", 0)
|
||||
val record10 = new SinkRecord(TOPIC_5, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic5 payload\"}", 0)
|
||||
val record11 = new SinkRecord(TOPIC_5, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic5 payload\"}", 0)
|
||||
|
||||
When("Records are passed to the write method")
|
||||
writer.write(Seq(record1, record2, record3, record4, record5, record6, record7, record8, record9, record10, record11))
|
||||
|
||||
Then("The Cosmos DB collection should contain all of the records")
|
||||
val documents = mockCosmosProvider.getDocumentsByCollection(COLLECTION)
|
||||
val documents2 = mockCosmosProvider.getDocumentsByCollection(COLLECTION_2)
|
||||
val documents3 = mockCosmosProvider.getDocumentsByCollection(COLLECTION_3)
|
||||
|
||||
assert(documents.size == 4)
|
||||
assert(documents2.size == 2)
|
||||
assert(documents3.size == 5)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.sink
|
||||
|
||||
import java.util.Properties
|
||||
import java.util.UUID.randomUUID
|
||||
|
||||
import com.fasterxml.jackson.databind.{JsonNode, ObjectMapper}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.model.CosmosDBDocumentTest
|
||||
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
|
||||
import com.fasterxml.jackson.databind.JsonNode
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations
|
||||
import org.apache.kafka.connect.runtime.distributed.DistributedConfig
|
||||
import org.apache.kafka.connect.runtime.{ConnectorConfig, WorkerConfig}
|
||||
|
||||
|
||||
object SourceConnectReaderTest {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val workerProperties: Properties = TestConfigurations.getSourceWorkerProperties(KafkaCluster.BrokersList.toString)
|
||||
val connectorProperties: Properties = TestConfigurations.getSourceConnectorProperties()
|
||||
KafkaCluster.startEmbeddedConnect(workerProperties, List(connectorProperties))
|
||||
if (KafkaCluster.kafkaConnectEnabled) {
|
||||
println("Kafka Connector Enabled")
|
||||
}
|
||||
|
||||
// Write 20 messages to the kafka topic to be consumed
|
||||
val producerProps: Properties = TestConfigurations.getProducerProperties(KafkaCluster.BrokersList.toString)
|
||||
val producer = new KafkaProducer[Nothing, JsonNode](producerProps)
|
||||
val testUUID = randomUUID()
|
||||
|
||||
val objectMapper: ObjectMapper = new ObjectMapper
|
||||
|
||||
//schema JSON test
|
||||
for (i <- 1 to 4) {
|
||||
val json = scala.io.Source.fromFile(getClass.getResource(s"/test$i.json").toURI.getPath).mkString
|
||||
val mapper = new ObjectMapper
|
||||
val jsonNode: JsonNode = mapper.readTree(json)
|
||||
producer.send(new ProducerRecord[Nothing, JsonNode](TestConfigurations.TOPIC, jsonNode))
|
||||
|
||||
}
|
||||
|
||||
//schema-less JSON test
|
||||
for (i <- 5 to 8) {
|
||||
val json = scala.io.Source.fromFile(getClass.getResource(s"/test$i.json").toURI.getPath).mkString
|
||||
val mapper = new ObjectMapper
|
||||
val jsonNode: JsonNode = mapper.readTree(json)
|
||||
producer.send(new ProducerRecord[Nothing, JsonNode](TestConfigurations.TOPIC, jsonNode))
|
||||
|
||||
}
|
||||
|
||||
// JSON string test no schema
|
||||
for (i <- 9 until 15) {
|
||||
val message = new CosmosDBDocumentTest(s"$i", s"message $i", testUUID)
|
||||
val jsonNode: JsonNode = objectMapper.valueToTree(message)
|
||||
|
||||
println("sending message: ", jsonNode.findPath("id"))
|
||||
producer.send(new ProducerRecord[Nothing, JsonNode](TestConfigurations.TOPIC, jsonNode))
|
||||
}
|
||||
|
||||
producer.flush()
|
||||
producer.close()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,226 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
import java.util
|
||||
import java.util.UUID.randomUUID
|
||||
import java.util.concurrent.{CountDownLatch, TimeUnit}
|
||||
import java.util.{ArrayList, Properties, UUID}
|
||||
|
||||
import _root_.rx.Observable
|
||||
import _root_.rx.lang.scala.JavaConversions._
|
||||
import com.google.common.collect.Maps
|
||||
import com.google.gson.Gson
|
||||
import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel, Document, ResourceResponse}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProvider, CosmosDBProviderImpl, MockCosmosDBProvider, MockCosmosDBReader}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations.{DATABASE, ENDPOINT, MASTER_KEY}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.model.{CosmosDBDocumentTest, KafkaPayloadTest}
|
||||
import com.typesafe.scalalogging.LazyLogging
|
||||
import org.apache.kafka.connect.data.Schema
|
||||
import org.apache.kafka.connect.errors.ConnectException
|
||||
import org.apache.kafka.connect.sink.SinkRecord
|
||||
import org.scalatest.{FlatSpec, GivenWhenThen}
|
||||
import org.mockito.MockitoSugar.mock
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.collection.mutable
|
||||
import scala.util.{Failure, Success, Try}
|
||||
|
||||
class CosmosDBSourceConnectorTaskTestMock extends FlatSpec with GivenWhenThen with LazyLogging {
|
||||
|
||||
private val NUM_DOCS: Int = 20
|
||||
private val DOC_SIZE: Int = 313
|
||||
private var testUUID: UUID = null
|
||||
private var batchSize = NUM_DOCS
|
||||
private var bufferSize = batchSize * DOC_SIZE
|
||||
|
||||
"CosmosDBSourceTask start" should "Initialize all properties" in {
|
||||
Given("A list of properties for CosmosSourceTask")
|
||||
val props = TestConfigurations.getSourceConnectorProperties()
|
||||
// Add the assigned partitions
|
||||
props.put(CosmosDBConfigConstants.ASSIGNED_PARTITIONS, "0,1")
|
||||
|
||||
When("CosmosSourceTask is started")
|
||||
val mockCosmosProvider = MockCosmosDBProvider
|
||||
val task = new CosmosDBSourceTask { override val cosmosDBProvider = mockCosmosProvider }
|
||||
task.start(Maps.fromProperties(props))
|
||||
|
||||
Then("CosmosSourceTask should properly initialized the readers")
|
||||
val readers = task.getReaders()
|
||||
readers.foreach(r => assert(r._1 == r._2.setting.assignedPartition))
|
||||
assert(readers.size == 2)
|
||||
}
|
||||
|
||||
"CosmosDBSourceTask poll" should "Return a list of SourceRecords with the right format" in {
|
||||
Given("A set of SourceConnector properties")
|
||||
val props: Properties = TestConfigurations.getSourceConnectorProperties()
|
||||
props.setProperty(CosmosDBConfigConstants.BATCH_SIZE, NUM_DOCS.toString)
|
||||
props.setProperty(CosmosDBConfigConstants.READER_BUFFER_SIZE, "10000")
|
||||
props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000")
|
||||
|
||||
|
||||
Then(s"Start the SourceConnector and return the taskConfigs")
|
||||
// Declare a collection to store the messages from SourceRecord
|
||||
val kafkaMessages = new util.ArrayList[KafkaPayloadTest]
|
||||
|
||||
// Start CosmosDBSourceConnector and return the taskConfigs
|
||||
val connector = new CosmosDBSourceConnector
|
||||
connector.start(Maps.fromProperties(props))
|
||||
val taskConfigs = connector.taskConfigs(2)
|
||||
|
||||
taskConfigs.forEach(config => {
|
||||
When("CosmosSourceTask is started and poll is called")
|
||||
|
||||
|
||||
val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]}
|
||||
task.start(config)
|
||||
|
||||
val sourceRecords = task.poll()
|
||||
|
||||
Then("It returns a list of SourceRecords")
|
||||
assert(sourceRecords != null)
|
||||
val gson = new Gson()
|
||||
sourceRecords.forEach(r => {
|
||||
val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest])
|
||||
if (message.testID == testUUID) {
|
||||
kafkaMessages.add(message)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
"CosmosDBSourceTask poll" should "Return a list of SourceRecords based on the batchSize" in {
|
||||
Given("A set of SourceConnector properties")
|
||||
val props: Properties = TestConfigurations.getSourceConnectorProperties()
|
||||
props.setProperty(CosmosDBConfigConstants.READER_BUFFER_SIZE, "10000")
|
||||
props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000")
|
||||
|
||||
Then(s"Start the SourceConnector and return the taskConfigs")
|
||||
// Declare a collection to store the messages from SourceRecord
|
||||
val kafkaMessages = new util.ArrayList[KafkaPayloadTest]
|
||||
|
||||
// Start CosmosDBSourceConnector and return the taskConfigs
|
||||
val connector = new CosmosDBSourceConnector
|
||||
connector.start(Maps.fromProperties(props))
|
||||
val taskConfigs = connector.taskConfigs(2)
|
||||
val numWorkers = connector.getNumberOfWorkers()
|
||||
taskConfigs.forEach(config => {
|
||||
When("CosmosSourceTask is started and poll is called")
|
||||
val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]}
|
||||
task.start(config)
|
||||
batchSize = config.get(CosmosDBConfigConstants.BATCH_SIZE).toInt
|
||||
val sourceRecords = task.poll()
|
||||
Then("It returns a list of SourceRecords")
|
||||
assert(sourceRecords != null)
|
||||
val gson = new Gson()
|
||||
sourceRecords.forEach(r => {
|
||||
val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest])
|
||||
if (message.testID == testUUID) {
|
||||
kafkaMessages.add(message)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
Then(s"Make sure collection of messages is equal to ${batchSize * numWorkers}")
|
||||
assert(kafkaMessages.size() == batchSize * numWorkers)
|
||||
|
||||
|
||||
}
|
||||
|
||||
"CosmosDBSourceTask poll" should "Return a list of SourceRecords based on the bufferSize" in {
|
||||
Given("A set of SourceConnector properties")
|
||||
val props: Properties = TestConfigurations.getSourceConnectorProperties()
|
||||
props.setProperty(CosmosDBConfigConstants.BATCH_SIZE, NUM_DOCS.toString)
|
||||
props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000")
|
||||
|
||||
Then(s"Start the SourceConnector and return the taskConfigs")
|
||||
// Declare a collection to store the messages from SourceRecord
|
||||
val kafkaMessages = new util.ArrayList[KafkaPayloadTest]
|
||||
|
||||
// Start CosmosDBSourceConnector and return the taskConfigs
|
||||
val connector = new CosmosDBSourceConnector
|
||||
connector.start(Maps.fromProperties(props))
|
||||
val taskConfigs = connector.taskConfigs(2)
|
||||
val numWorkers = connector.getNumberOfWorkers()
|
||||
taskConfigs.forEach(config => {
|
||||
When("CosmosSourceTask is started and poll is called")
|
||||
val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]}
|
||||
task.start(config)
|
||||
bufferSize = config.get(CosmosDBConfigConstants.READER_BUFFER_SIZE).toInt
|
||||
val sourceRecords = task.poll()
|
||||
Then("It returns a list of SourceRecords")
|
||||
assert(sourceRecords != null)
|
||||
val gson = new Gson()
|
||||
sourceRecords.forEach(r => {
|
||||
val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest])
|
||||
if (message.testID == testUUID) {
|
||||
kafkaMessages.add(message)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
val minSize = (bufferSize * numWorkers)
|
||||
val maxSize = ((bufferSize + DOC_SIZE) * numWorkers)
|
||||
Then(s"Make sure number of bytes in the collection of messages is between ${minSize} and ${maxSize}")
|
||||
assert(kafkaMessages.size() * DOC_SIZE >= minSize && kafkaMessages.size() * DOC_SIZE <= maxSize)
|
||||
|
||||
}
|
||||
|
||||
|
||||
private def mockDocuments(): ArrayList[CosmosDBDocumentTest] = {
|
||||
val documents: ArrayList[CosmosDBDocumentTest] = new ArrayList[CosmosDBDocumentTest]
|
||||
testUUID = randomUUID()
|
||||
|
||||
for (i <- 1 to NUM_DOCS) {
|
||||
val doc = new CosmosDBDocumentTest(i.toString, s"Message ${i}", testUUID)
|
||||
documents.add(doc)
|
||||
}
|
||||
return documents
|
||||
}
|
||||
|
||||
|
||||
private def insertDocuments(cosmosDBProvider: CosmosDBProvider = CosmosDBProviderImpl) = {
|
||||
|
||||
// Source Collection
|
||||
val clientSettings = CosmosDBClientSettings(
|
||||
TestConfigurations.ENDPOINT,
|
||||
TestConfigurations.MASTER_KEY,
|
||||
TestConfigurations.DATABASE,
|
||||
TestConfigurations.SOURCE_COLLECTION,
|
||||
ConnectionPolicy.GetDefault(),
|
||||
ConsistencyLevel.Session
|
||||
)
|
||||
//logger.info("");
|
||||
val client = Try(cosmosDBProvider.getClient(clientSettings)) match {
|
||||
case Success(conn) =>
|
||||
logger.info("Connection to CosmosDB established.")
|
||||
conn
|
||||
case Failure(f) => throw new ConnectException(s"Couldn't connect to CosmosDB.", f)
|
||||
}
|
||||
|
||||
val gson: Gson = new Gson()
|
||||
val upsertDocumentsOBs: util.ArrayList[Observable[ResourceResponse[Document]]] = new util.ArrayList[Observable[ResourceResponse[Document]]]
|
||||
val completionLatch = new CountDownLatch(1)
|
||||
val forcedScalaObservable: _root_.rx.lang.scala.Observable[ResourceResponse[Document]] = Observable.merge(upsertDocumentsOBs)
|
||||
mockDocuments().forEach(record => {
|
||||
val json = gson.toJson(record)
|
||||
val document = new Document(json)
|
||||
val obs = client.upsertDocument(CosmosDBProviderImpl.getCollectionLink(TestConfigurations.DATABASE, TestConfigurations.SOURCE_COLLECTION), document, null, false)
|
||||
upsertDocumentsOBs.add(obs)
|
||||
})
|
||||
|
||||
forcedScalaObservable
|
||||
.map(r => r.getRequestCharge)
|
||||
.reduce((sum, value) => sum + value)
|
||||
.subscribe(
|
||||
t => logger.info(s"upsertDocuments total RU charge is $t"),
|
||||
e => {
|
||||
logger.error(s"error upserting documents e:${e.getMessage()} stack:${e.getStackTrace().toString()}")
|
||||
completionLatch.countDown()
|
||||
},
|
||||
() => {
|
||||
logger.info("upsertDocuments completed")
|
||||
completionLatch.countDown()
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
import com.google.common.collect.Maps
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations}
|
||||
import org.apache.kafka.connect.runtime.ConnectorConfig
|
||||
import org.scalatest.{FlatSpec, GivenWhenThen}
|
||||
|
||||
class CosmosDBSourceConnectorTest extends FlatSpec with GivenWhenThen {
|
||||
"CosmosDBSourceConnector" should "Validate all input properties and generate right set of task config properties" in {
|
||||
Given("Valid set of input properties")
|
||||
val props = TestConfigurations.getSourceConnectorProperties()
|
||||
val connector = new CosmosDBSourceConnector
|
||||
When("Start and TaskConfig are called in right order")
|
||||
connector.start(Maps.fromProperties(props))
|
||||
val taskConfigs = connector.taskConfigs(3)
|
||||
val numWorkers = connector.getNumberOfWorkers
|
||||
Then("The TaskConfigs have all the expected properties")
|
||||
assert(taskConfigs.size() == numWorkers)
|
||||
for (i <- 0 until numWorkers) {
|
||||
val taskConfig: java.util.Map[String, String] = taskConfigs.get(i)
|
||||
assert(taskConfig.containsKey(ConnectorConfig.NAME_CONFIG))
|
||||
assert(taskConfig.containsKey(ConnectorConfig.CONNECTOR_CLASS_CONFIG))
|
||||
assert(taskConfig.containsKey(ConnectorConfig.TASKS_MAX_CONFIG))
|
||||
assert(taskConfig.containsKey(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG))
|
||||
assert(taskConfig.containsKey(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG))
|
||||
assert(taskConfig.containsKey(CosmosDBConfigConstants.DATABASE_CONFIG))
|
||||
assert(taskConfig.containsKey(CosmosDBConfigConstants.COLLECTION_CONFIG))
|
||||
assert(taskConfig.containsKey(CosmosDBConfigConstants.TOPIC_CONFIG))
|
||||
Then("Validate assigned partition")
|
||||
val partition = taskConfig.get(CosmosDBConfigConstants.ASSIGNED_PARTITIONS)
|
||||
assert(partition.size == 1)
|
||||
assert(partition == i.toString)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,230 @@
|
|||
package com.microsoft.azure.cosmosdb.kafka.connect.source
|
||||
|
||||
import java.util
|
||||
import java.util.UUID._
|
||||
import java.util.concurrent.{CountDownLatch, TimeUnit}
|
||||
import java.util.{ArrayList, Properties, UUID}
|
||||
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProvider, CosmosDBProviderImpl, MockCosmosDBProvider}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations.{DATABASE, ENDPOINT, MASTER_KEY}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants
|
||||
import org.apache.kafka.connect.data.Schema
|
||||
import org.apache.kafka.connect.sink.SinkRecord
|
||||
import org.scalatest.{FlatSpec, GivenWhenThen}
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.collection.mutable
|
||||
import _root_.rx.Observable
|
||||
import _root_.rx.lang.scala.JavaConversions._
|
||||
import com.google.common.collect.Maps
|
||||
import com.google.gson.Gson
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations}
|
||||
import com.microsoft.azure.cosmosdb.kafka.connect.model.{CosmosDBDocumentTest, KafkaPayloadTest}
|
||||
import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel, Document, ResourceResponse}
|
||||
import com.typesafe.scalalogging.LazyLogging
|
||||
import org.apache.kafka.connect.errors.ConnectException
|
||||
import org.mockito.MockitoSugar.mock
|
||||
import org.scalatest.{FlatSpec, GivenWhenThen}
|
||||
|
||||
import scala.util.{Failure, Success, Try}
|
||||
|
||||
|
||||
class CosmosDBSourceTaskTest extends FlatSpec with GivenWhenThen with LazyLogging {
|
||||
|
||||
private val NUM_DOCS: Int = 20
|
||||
private val DOC_SIZE: Int = 313
|
||||
private var testUUID: UUID = null
|
||||
private var batchSize = NUM_DOCS
|
||||
private var bufferSize = batchSize * DOC_SIZE
|
||||
|
||||
"CosmosDBSourceTask start" should "Initialize all properties" in {
|
||||
Given("A list of properties for CosmosSourceTask")
|
||||
val props = TestConfigurations.getSourceConnectorProperties()
|
||||
// Add the assigned partitions
|
||||
props.put(CosmosDBConfigConstants.ASSIGNED_PARTITIONS, "0,1")
|
||||
|
||||
When("CosmosSourceTask is started")
|
||||
val mockCosmosProvider = MockCosmosDBProvider
|
||||
val task = new CosmosDBSourceTask { override val cosmosDBProvider = mockCosmosProvider }
|
||||
task.start(Maps.fromProperties(props))
|
||||
|
||||
Then("CosmosSourceTask should properly initialized the readers")
|
||||
val readers = task.getReaders()
|
||||
readers.foreach(r => assert(r._1 == r._2.setting.assignedPartition))
|
||||
assert(readers.size == 2)
|
||||
}
|
||||
|
||||
"CosmosDBSourceTask poll" should "Return a list of SourceRecords with the right format" in {
|
||||
Given("A set of SourceConnector properties")
|
||||
val props: Properties = TestConfigurations.getSourceConnectorProperties()
|
||||
props.setProperty(CosmosDBConfigConstants.BATCH_SIZE, NUM_DOCS.toString)
|
||||
props.setProperty(CosmosDBConfigConstants.READER_BUFFER_SIZE, "10000")
|
||||
props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000")
|
||||
|
||||
|
||||
Then(s"Start the SourceConnector and return the taskConfigs")
|
||||
// Declare a collection to store the messages from SourceRecord
|
||||
val kafkaMessages = new util.ArrayList[KafkaPayloadTest]
|
||||
|
||||
// Start CosmosDBSourceConnector and return the taskConfigs
|
||||
val connector = new CosmosDBSourceConnector
|
||||
connector.start(Maps.fromProperties(props))
|
||||
val taskConfigs = connector.taskConfigs(2)
|
||||
|
||||
taskConfigs.forEach(config => {
|
||||
When("CosmosSourceTask is started and poll is called")
|
||||
|
||||
|
||||
val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]}
|
||||
task.start(config)
|
||||
|
||||
val sourceRecords = task.poll()
|
||||
|
||||
Then("It returns a list of SourceRecords")
|
||||
assert(sourceRecords != null)
|
||||
val gson = new Gson()
|
||||
sourceRecords.forEach(r => {
|
||||
val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest])
|
||||
if (message.testID == testUUID) {
|
||||
kafkaMessages.add(message)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
"CosmosDBSourceTask poll" should "Return a list of SourceRecords based on the batchSize" in {
|
||||
Given("A set of SourceConnector properties")
|
||||
val props: Properties = TestConfigurations.getSourceConnectorProperties()
|
||||
props.setProperty(CosmosDBConfigConstants.READER_BUFFER_SIZE, "10000")
|
||||
props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000")
|
||||
|
||||
Then(s"Start the SourceConnector and return the taskConfigs")
|
||||
// Declare a collection to store the messages from SourceRecord
|
||||
val kafkaMessages = new util.ArrayList[KafkaPayloadTest]
|
||||
|
||||
// Start CosmosDBSourceConnector and return the taskConfigs
|
||||
val connector = new CosmosDBSourceConnector
|
||||
connector.start(Maps.fromProperties(props))
|
||||
val taskConfigs = connector.taskConfigs(2)
|
||||
val numWorkers = connector.getNumberOfWorkers()
|
||||
taskConfigs.forEach(config => {
|
||||
When("CosmosSourceTask is started and poll is called")
|
||||
val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]}
|
||||
task.start(config)
|
||||
batchSize = config.get(CosmosDBConfigConstants.BATCH_SIZE).toInt
|
||||
val sourceRecords = task.poll()
|
||||
Then("It returns a list of SourceRecords")
|
||||
assert(sourceRecords != null)
|
||||
val gson = new Gson()
|
||||
sourceRecords.forEach(r => {
|
||||
val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest])
|
||||
if (message.testID == testUUID) {
|
||||
kafkaMessages.add(message)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
Then(s"Make sure collection of messages is equal to ${batchSize * numWorkers}")
|
||||
assert(kafkaMessages.size() == batchSize * numWorkers)
|
||||
|
||||
|
||||
}
|
||||
|
||||
"CosmosDBSourceTask poll" should "Return a list of SourceRecords based on the bufferSize" in {
|
||||
Given("A set of SourceConnector properties")
|
||||
val props: Properties = TestConfigurations.getSourceConnectorProperties()
|
||||
props.setProperty(CosmosDBConfigConstants.BATCH_SIZE, NUM_DOCS.toString)
|
||||
props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000")
|
||||
|
||||
Then(s"Start the SourceConnector and return the taskConfigs")
|
||||
// Declare a collection to store the messages from SourceRecord
|
||||
val kafkaMessages = new util.ArrayList[KafkaPayloadTest]
|
||||
|
||||
// Start CosmosDBSourceConnector and return the taskConfigs
|
||||
val connector = new CosmosDBSourceConnector
|
||||
connector.start(Maps.fromProperties(props))
|
||||
val taskConfigs = connector.taskConfigs(2)
|
||||
val numWorkers = connector.getNumberOfWorkers()
|
||||
taskConfigs.forEach(config => {
|
||||
When("CosmosSourceTask is started and poll is called")
|
||||
val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]}
|
||||
task.start(config)
|
||||
bufferSize = config.get(CosmosDBConfigConstants.READER_BUFFER_SIZE).toInt
|
||||
val sourceRecords = task.poll()
|
||||
Then("It returns a list of SourceRecords")
|
||||
assert(sourceRecords != null)
|
||||
val gson = new Gson()
|
||||
sourceRecords.forEach(r => {
|
||||
val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest])
|
||||
if (message.testID == testUUID) {
|
||||
kafkaMessages.add(message)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
val minSize = (bufferSize * numWorkers)
|
||||
val maxSize = ((bufferSize + DOC_SIZE) * numWorkers)
|
||||
Then(s"Make sure number of bytes in the collection of messages is between ${minSize} and ${maxSize}")
|
||||
assert(kafkaMessages.size() * DOC_SIZE >= minSize && kafkaMessages.size() * DOC_SIZE <= maxSize)
|
||||
|
||||
}
|
||||
|
||||
|
||||
private def mockDocuments(): ArrayList[CosmosDBDocumentTest] = {
|
||||
val documents: ArrayList[CosmosDBDocumentTest] = new ArrayList[CosmosDBDocumentTest]
|
||||
testUUID = randomUUID()
|
||||
|
||||
for (i <- 1 to NUM_DOCS) {
|
||||
val doc = new CosmosDBDocumentTest(i.toString, s"Message ${i}", testUUID)
|
||||
documents.add(doc)
|
||||
}
|
||||
return documents
|
||||
}
|
||||
|
||||
|
||||
private def insertDocuments(cosmosDBProvider: CosmosDBProvider = CosmosDBProviderImpl) = {
|
||||
|
||||
// Source Collection
|
||||
val clientSettings = CosmosDBClientSettings(
|
||||
TestConfigurations.ENDPOINT,
|
||||
TestConfigurations.MASTER_KEY,
|
||||
TestConfigurations.DATABASE,
|
||||
TestConfigurations.SOURCE_COLLECTION,
|
||||
ConnectionPolicy.GetDefault(),
|
||||
ConsistencyLevel.Session
|
||||
)
|
||||
//logger.info("");
|
||||
val client = Try(cosmosDBProvider.getClient(clientSettings)) match {
|
||||
case Success(conn) =>
|
||||
logger.info("Connection to CosmosDB established.")
|
||||
conn
|
||||
case Failure(f) => throw new ConnectException(s"Couldn't connect to CosmosDB.", f)
|
||||
}
|
||||
|
||||
val gson: Gson = new Gson()
|
||||
val upsertDocumentsOBs: util.ArrayList[Observable[ResourceResponse[Document]]] = new util.ArrayList[Observable[ResourceResponse[Document]]]
|
||||
val completionLatch = new CountDownLatch(1)
|
||||
val forcedScalaObservable: _root_.rx.lang.scala.Observable[ResourceResponse[Document]] = Observable.merge(upsertDocumentsOBs)
|
||||
mockDocuments().forEach(record => {
|
||||
val json = gson.toJson(record)
|
||||
val document = new Document(json)
|
||||
val obs = client.upsertDocument(CosmosDBProviderImpl.getCollectionLink(TestConfigurations.DATABASE, TestConfigurations.SOURCE_COLLECTION), document, null, false)
|
||||
upsertDocumentsOBs.add(obs)
|
||||
})
|
||||
|
||||
forcedScalaObservable
|
||||
.map(r => r.getRequestCharge)
|
||||
.reduce((sum, value) => sum + value)
|
||||
.subscribe(
|
||||
t => logger.info(s"upsertDocuments total RU charge is $t"),
|
||||
e => {
|
||||
logger.error(s"error upserting documents e:${e.getMessage()} stack:${e.getStackTrace().toString()}")
|
||||
completionLatch.countDown()
|
||||
},
|
||||
() => {
|
||||
logger.info("upsertDocuments completed")
|
||||
completionLatch.countDown()
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
Загрузка…
Ссылка в новой задаче