moving old scala code to its own branch

This commit is contained in:
Ryan CrawCour 2020-06-02 14:02:45 +12:00
Родитель 7fcb06805e
Коммит 67f31ecada
70 изменённых файлов: 4178 добавлений и 199 удалений

3
.dockerignore Normal file
Просмотреть файл

@ -0,0 +1,3 @@
azure-pipelines.yaml
.git
.vscode

86
.gitignore поставляемый
Просмотреть файл

@ -1,3 +1,85 @@
.idea/
/target/*
/project/*
### Intellij ###
.idea/**
### mac ##
.DS_Store
C:/
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
.idea/modules.xml
.idea/*.iml
.idea/modules
.idea/misc.xml
.idea/sbt.xml
.idea/vcs.xml
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Created by https://www.gitignore.io/api/sbt,scala
### SBT ###
# Simple Build Tool
# http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control
dist/*
target/
*.iml
lib_managed/
src_managed/
project/boot/
project/plugins/project/
.history
.cache
.lib/
### Scala ###
*.class
*.log
# End of https://www.gitignore.io/api/sbt,scala

29
Dockerfile Normal file
Просмотреть файл

@ -0,0 +1,29 @@
# ----- Base Java - Check Dependencies ----
FROM openjdk:8u212-b04-jdk-stretch AS base
# Env variables
ENV SCALA_VERSION=2.12.8
ENV SBT_VERSION=1.2.8
ENV HOME=/app
WORKDIR $HOME
# Install sbt
RUN \
curl -L -o sbt-$SBT_VERSION.deb https://dl.bintray.com/sbt/debian/sbt-$SBT_VERSION.deb && \
dpkg -i sbt-$SBT_VERSION.deb && \
rm sbt-$SBT_VERSION.deb && \
apt-get update && \
apt-get install sbt
#
# ----Build the app ----
FROM base AS build
ADD . $HOME
RUN sbt compile
#
# ---- Publish the App ----
FROM build AS release
EXPOSE 8888
CMD sbt run

Просмотреть файл

@ -1,5 +1,41 @@
# Kafka Connect for Azure Cosmos DB
________________________
This connector is currently undergoing a major refresh.
Stay tuned for new Java version targeted at Cosmos DB V4 Java SDK
**Kafka Connect for Azure Cosmos DB** consists of 2 connectors -
A **Source Connector** which is used to pump data from [Azure Cosmos DB](https://azure.microsoft.com/services/cosmos-db//) via its Change Feed to [Apache Kafka](https://kafka.apache.org/).
A **Sink Connector** reads messages from Kafka and sends them to Cosmos DB.
## Contribute
This project welcomes contributions, feedback and suggestions.
If you would like to become a contributor to this project, please refer to our [Contribution Guide](CONTRIBUTING.MD).
## Setup
### [Source Connector](doc/README_Source.md)
### [Sink Connector](doc/README_Sink.md)
## Configuration
## References
It is worth looking through this material to get better understanding of how Kafka Connect and these connectors work and how to use them.
[Kafka Connect](https://docs.confluent.io/current/connect/index.html)
[Kafka Connect Concepts](https://docs.confluent.io/current/connect/concepts.html)
[Installing and Configuring Kafka Connect](https://docs.confluent.io/current/connect/userguide.html)
[Tutorial: Moving Data In and Out of Kafka](https://docs.confluent.io/current/connect/quickstart.html)
It is also worth understanding how Cosmos DB and its Change Feed works
[Cosmos DB](https://docs.microsoft.com/en-us/azure/cosmos-db/introduction)
[Cosmos DB Change feed](https://docs.microsoft.com/azure/cosmos-db/change-feed)
[Cosmos DB Change feed processor](https://docs.microsoft.com/en-us/azure/cosmos-db/change-feed-processor)

38
azure-pipelines.yml Normal file
Просмотреть файл

@ -0,0 +1,38 @@
# Starter pipeline
# Start with a minimal pipeline that you can customize to build and deploy your code.
# Add steps that build, run tests, deploy, and more:
# https://aka.ms/yaml
trigger:
- master
variables: # pipeline-level
projName: 'kafka-connect-cosmosdb'
topicName: '$(Build.SourceBranchName)'
releaseversion: '$(Build.BuildNumber)'
appName: 'kafkaconnectcosmosdb'
stages:
- stage: Build_Container
displayName: Build the App and publish it in Azure Container Registry
jobs:
- job: BuildJob
pool:
vmImage: 'ubuntu-latest'
steps:
- task: Bash@3
inputs:
targetType: 'inline'
script: 'docker build --target=build -t $(appName)acr.azurecr.io/$(appname):canary .'
- task: Bash@3
inputs:
targetType: 'inline'
script: 'docker cp app/cosmosdbkafkaconnector.jar $(Build.ArtifactStagingDirectory)'
- task: PublishBuildArtifacts@1
inputs:
PathtoPublish: '$(Build.ArtifactStagingDirectory)'
ArtifactName: 'drop'
publishLocation: 'Container'

30
build.sbt Normal file
Просмотреть файл

@ -0,0 +1,30 @@
name := "com.microsoft.azure.cosmosdb.kafka.connect"
organization := "com.microsoft.azure"
version := "0.0.1-preview"
scalaVersion := "2.12.8"
libraryDependencies += "com.microsoft.azure" % "azure-cosmosdb" % "2.4.4"
libraryDependencies += "javax.ws.rs" % "javax.ws.rs-api" % "2.1.1" artifacts Artifact("javax.ws.rs-api", "jar", "jar")
libraryDependencies += "com.typesafe.scala-logging" %% "scala-logging" % "3.9.2"
libraryDependencies += "ch.qos.logback" % "logback-classic" % "1.2.3"
libraryDependencies += "com.google.code.gson" % "gson" % "2.8.5"
libraryDependencies += "io.reactivex" %% "rxscala" % "0.26.5"
libraryDependencies += "org.json4s" %% "json4s-jackson" % "3.5.0"
libraryDependencies += "org.mockito" % "mockito-scala_2.12" % "1.5.11"
libraryDependencies += "org.apache.kafka" %% "kafka" % "2.2.0" % Compile classifier "test"
libraryDependencies += "org.apache.kafka" %% "kafka" % "2.2.0" % Compile
libraryDependencies += "org.apache.kafka" % "kafka-clients" % "2.2.0" % Compile classifier "test"
libraryDependencies += "org.apache.kafka" % "kafka-clients" % "2.2.0" % Compile
libraryDependencies += "org.apache.kafka" % "connect-api" % "2.2.0" % Compile
libraryDependencies += "org.apache.kafka" % "connect-runtime" % "2.2.0" % Compile
trapExit := false
fork in run := true
libraryDependencies += "org.scalactic" %% "scalactic" % "3.0.5"
libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.7" % "test"
libraryDependencies += "com.typesafe" % "config" % "1.3.3" % "test"
licenses += ("MIT", url("https://github.com/Microsoft/kafka-connect-cosmosdb/blob/master/LICENSE"))

87
pom.xml
Просмотреть файл

@ -1,87 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.microsoft.azure</groupId>
<artifactId>cosmosdb.kafka.connect</artifactId>
<version>1.0-SNAPSHOT</version>
<name>cosmosdb.kafka.connect</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<kafka.version>2.5.0</kafka.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>connect-api</artifactId>
<version>${kafka.version}</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>${kafka.version}</version>
</dependency>
<dependency>
<groupId>com.microsoft.azure</groupId>
<artifactId>azure-cosmosdb</artifactId>
<version>2.6.6</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

Просмотреть файл

@ -1,5 +0,0 @@
package com.microsoft.azure.cosmosdb.kafka.connect;
public class CosmosDBProvider{
}

Просмотреть файл

@ -1,42 +0,0 @@
package com.microsoft.azure.cosmosdb.kafka.connect.sink;
import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.connect.connector.Task;
import org.apache.kafka.connect.sink.SinkConnector;
import java.util.List;
import java.util.Map;
public class CosmosDBSinkConnector extends SinkConnector {
@Override
public void start(Map<String, String> map) {
}
@Override
public Class<? extends Task> taskClass() {
throw new IllegalStateException("Not implemented");
}
@Override
public List<Map<String, String>> taskConfigs(int i) {
throw new IllegalStateException("Not implemented");
}
@Override
public void stop() {
throw new IllegalStateException("Not implemented");
}
@Override
public ConfigDef config() {
throw new IllegalStateException("Not implemented");
}
@Override
public String version() {
return this.getClass().getPackage().getImplementationVersion();
}
}

Просмотреть файл

@ -1,41 +0,0 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source;
import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.connect.connector.Task;
import org.apache.kafka.connect.source.SourceConnector;
import java.util.List;
import java.util.Map;
public class CosmosDBSourceConnector extends SourceConnector {
@Override
public void start(Map<String, String> map) {
throw new IllegalStateException("Not implemented");
}
@Override
public Class<? extends Task> taskClass() {
throw new IllegalStateException("Not implemented");
}
@Override
public List<Map<String, String>> taskConfigs(int i) {
throw new IllegalStateException("Not implemented");
}
@Override
public void stop() {
throw new IllegalStateException("Not implemented");
}
@Override
public ConfigDef config() {
throw new IllegalStateException("Not implemented");
}
@Override
public String version() {
return this.getClass().getPackage().getImplementationVersion();
}
}

Просмотреть файл

@ -0,0 +1,11 @@
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<root level="error">
<appender-ref ref="STDOUT" />
</root>
</configuration>

Просмотреть файл

@ -0,0 +1,43 @@
package com.microsoft.azure.cosmosdb.kafka.connect
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfig, CosmosDBConfigConstants}
import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel}
case class CosmosDBClientSettings(
endpoint:String,
masterkey:String,
database:String,
collection:String,
connectionPolicy:ConnectionPolicy,
consistencyLevel:ConsistencyLevel
)
object CosmosDBClientSettings{
def apply(config: CosmosDBConfig): CosmosDBClientSettings = {
val endpoint:String = config.getString(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG)
require(endpoint.trim.nonEmpty, s"Invalid value for ${CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG}")
require(endpoint.startsWith("https://"), s"""Invalid value for ${CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG} - endpoint must start with "https://"""")
val masterKey:String = config.getPassword(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG).value()
require(masterKey.trim.nonEmpty, s"Invalid value for ${CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG}")
val database:String = config.getString(CosmosDBConfigConstants.DATABASE_CONFIG)
require(database.trim.nonEmpty, s"Invalid value for ${CosmosDBConfigConstants.DATABASE_CONFIG}")
val collection:String = config.getString(CosmosDBConfigConstants.COLLECTION_CONFIG)
require(collection.trim.nonEmpty, s"Invalid value for ${CosmosDBConfigConstants.COLLECTION_CONFIG}")
//TODO: make this configurable
val connectionPolicy = ConnectionPolicy.GetDefault()
//TODO: make this configurable
val consistencyLevel = ConsistencyLevel.Session
new CosmosDBClientSettings(endpoint,
masterKey,
database,
collection,
connectionPolicy,
consistencyLevel)
}
}

Просмотреть файл

@ -0,0 +1,9 @@
package com.microsoft.azure.cosmosdb.kafka.connect
import java.util.concurrent.CountDownLatch
import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient
trait CosmosDBProvider {
def getClient(settings: CosmosDBClientSettings): AsyncDocumentClient
def upsertDocuments[T](docs: scala.List[T], databaseName: String, collectionName: String, completionLatch: CountDownLatch): Unit
}

Просмотреть файл

@ -0,0 +1,249 @@
package com.microsoft.azure.cosmosdb.kafka.connect
import java.util
import java.util.List
import java.util.concurrent.CountDownLatch
import _root_.rx.Observable
import _root_.rx.lang.scala.JavaConversions._
import com.microsoft.azure.cosmosdb._
import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError
import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient
import scala.util.{Failure, Success}
object CosmosDBProviderImpl extends HandleRetriableError with CosmosDBProvider {
private val requestOptionsInsert = new RequestOptions
requestOptionsInsert.setConsistencyLevel(ConsistencyLevel.Session)
initializeErrorHandler(2)
var client: AsyncDocumentClient = _
def getClient(settings: CosmosDBClientSettings): AsyncDocumentClient = synchronized {
if (client == null) {
client = new AsyncDocumentClient.Builder()
.withServiceEndpoint(settings.endpoint)
.withMasterKeyOrResourceToken(settings.masterkey)
.withConnectionPolicy(settings.connectionPolicy)
.withConsistencyLevel(settings.consistencyLevel)
.build()
}
client
}
def getCollectionLink(databaseName: String, collectionName: String) = "/dbs/%s/colls/%s".format(databaseName, collectionName)
def createDatabaseIfNotExists(databaseName: String): Unit = {
if (!isDatabaseExists(databaseName)) {
val dbDefinition = new Database()
dbDefinition.setId(databaseName)
logger.info(s"Creating Database $databaseName")
client.createDatabase(dbDefinition, null).toCompletable.await()
}
}
def createCollectionIfNotExists(databaseName: String, collectionName: String): Unit = {
if (!isCollectionExists(databaseName, collectionName)) {
val dbLnk = String.format("/dbs/%s", databaseName)
val collDefinition = new DocumentCollection
collDefinition.setId(collectionName)
logger.info(s"Creating Collection $collectionName")
client.createCollection(dbLnk, collDefinition, null).toCompletable.await()
}
}
def isDatabaseExists(databaseName: String): Boolean = {
val databaseLink = s"/dbs/$databaseName"
val databaseReadObs = client.readDatabase(databaseLink, null)
var isDatabaseExists = false
val db = databaseReadObs
.doOnNext((x: ResourceResponse[Database]) => {
def foundDataBase(x: ResourceResponse[Database]): Unit = {
logger.info(s"Database $databaseName already exists.")
isDatabaseExists = true
}
foundDataBase(x)
})
.onErrorResumeNext((e: Throwable) => {
def tryCreateDatabaseOnError(e: Throwable) = {
e match {
case de: DocumentClientException =>
if (de.getStatusCode == 404) {
logger.info(s"Database $databaseName does not exist")
isDatabaseExists = false
}
}
Observable.empty()
}
tryCreateDatabaseOnError(e)
})
db.toCompletable.await()
isDatabaseExists
}
def isCollectionExists(databaseName: String, collectionName: String): Boolean = {
var isCollectionExists = false
val dbLnk = s"/dbs/$databaseName"
val params = new SqlParameterCollection(new SqlParameter("@id", collectionName))
val qry = new SqlQuerySpec("SELECT * FROM r where r.id = @id", params)
client.queryCollections(dbLnk, qry, null).single.flatMap(page => {
def foundCollection(page: FeedResponse[DocumentCollection]) = {
isCollectionExists = !page.getResults.isEmpty
Observable.empty
}
foundCollection(page)
}).toCompletable.await()
isCollectionExists
}
def close(): Unit = {
client.close()
}
def readChangeFeed(databaseName: String, collectionName: String): Unit = {
//TODO: call Allan's ChangeFeedProcessor here
//TODO: ultimately replace Allan's ChangeFeedProcessor with the PG one
}
def createDocuments[T](docs: scala.List[T], databaseName: String, collectionName: String, completionLatch: CountDownLatch): Unit = {
val colLnk = s"/dbs/$databaseName/colls/$collectionName"
val createDocumentsOBs: List[Observable[ResourceResponse[Document]]] = new util.ArrayList[Observable[ResourceResponse[Document]]]
docs.foreach(f = t => {
val obs = client.createDocument(colLnk, t, null, false)
createDocumentsOBs.add(obs)
})
val forcedScalaObservable: _root_.rx.lang.scala.Observable[ResourceResponse[Document]] = Observable.merge(createDocumentsOBs)
forcedScalaObservable
.map(r => r.getRequestCharge)
.reduce((sum, value) => sum + value)
.subscribe(
t => {
logger.debug(s"createDocuments total RU charge is $t")
HandleRetriableError(Success())
},
e => {
logger.debug(s"error creating documents e:${e.getMessage()} stack:${e.getStackTrace().toString()}")
HandleRetriableError(Failure(e))
completionLatch.countDown()
},
() => {
logger.info("createDocuments completed")
completionLatch.countDown()
})
}
def upsertDocuments[T](docs: scala.List[T], databaseName: String, collectionName: String, completionLatch: CountDownLatch): Unit = {
val colLnk = s"/dbs/$databaseName/colls/$collectionName"
val upsertDocumentsOBs: List[Observable[ResourceResponse[Document]]] = new util.ArrayList[Observable[ResourceResponse[Document]]]
docs.foreach(f = t => {
val obs = client.upsertDocument(colLnk, t, null, false)
upsertDocumentsOBs.add(obs)
})
val forcedScalaObservable: _root_.rx.lang.scala.Observable[ResourceResponse[Document]] = Observable.merge(upsertDocumentsOBs)
forcedScalaObservable
.map(r => r.getRequestCharge)
.reduce((sum, value) => sum + value)
.subscribe(
t => {
logger.debug(s"upsertDocuments total RU charge is $t")
HandleRetriableError(Success())
},
e => {
logger.debug(s"error upserting documents e:${e.getMessage()} stack:${e.getStackTrace().toString()}")
HandleRetriableError(Failure(e))
completionLatch.countDown()
},
() => {
logger.info("upsertDocuments completed")
completionLatch.countDown()
})
}
def readCollection(databaseName: String, collectionName: String, completionLatch: CountDownLatch): _root_.rx.lang.scala.Observable[ResourceResponse[DocumentCollection]]= { // Create a Collection
val colLnk = s"/dbs/$databaseName/colls/$collectionName"
logger.info("reading collection " + colLnk)
val readDocumentsOBs = client.readCollection(colLnk, null)
val forcedScalaObservable: _root_.rx.lang.scala.Observable[ResourceResponse[DocumentCollection]] = readDocumentsOBs
forcedScalaObservable
.subscribe(
t => {
logger.debug(s"activityId" + t.getActivityId + s"id" + t.getResource.getId)
HandleRetriableError(Success())
},
e => {
logger.debug(s"error reading document collection e:${e.getMessage()} stack:${e.getStackTrace().toString()}")
HandleRetriableError(Failure(e))
completionLatch.countDown()
},
() => {
logger.info("readDocuments completed")
completionLatch.countDown()
})
return forcedScalaObservable
}
def queryCollection(databaseName: String, collectionName: String, completionLatch: CountDownLatch): _root_.rx.lang.scala.Observable[FeedResponse[DocumentCollection]]= { // Create a Collection
val colLnk = s"/dbs/$databaseName/colls/$collectionName"
val dbLink = s"/dbs/$databaseName"
logger.info("reading collection " + colLnk)
//val query = "SELECT * from c"
val query = String.format("SELECT * from c where c.id = '%s'", collectionName)
val options = new FeedOptions
options.setMaxItemCount(2)
val queryCollectionObservable = client.queryCollections(dbLink, query, options)
val forcedScalaObservable: _root_.rx.lang.scala.Observable[FeedResponse[DocumentCollection]] = queryCollectionObservable
forcedScalaObservable
.subscribe(
t => {
logger.debug(s"activityId" + t.getActivityId + s"id" + t.getResults.toString)
HandleRetriableError(Success())
},
e => {
logger.debug(s"error reading document collection e:${e.getMessage()} stack:${e.getStackTrace().toString()}")
HandleRetriableError(Failure(e))
completionLatch.countDown()
},
() => {
logger.debug("readDocuments completed")
completionLatch.countDown()
})
return forcedScalaObservable
}
}

Просмотреть файл

@ -0,0 +1,45 @@
package com.microsoft.azure.cosmosdb.kafka.connect
import java.util.concurrent.CountDownLatch
import com.microsoft.azure.cosmosdb._
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfig, CosmosDBConfigConstants}
import scala.collection.JavaConverters._
// TODO: Please follow getter and setter model
// Otherwise document create fails
class SampleDoc() {
private var name = ""
private var age = 0
}
object Runner extends App{
val connectionPolicy=new ConnectionPolicy();
connectionPolicy.setConnectionMode(ConnectionMode.Direct)
connectionPolicy.setMaxPoolSize(600)
val consistencyLevel = ConsistencyLevel.Session
val cosmosDBClientSettings=CosmosDBClientSettings(
endpoint = "test",
masterkey = "test",
database = "test",
collection = "test",
connectionPolicy = connectionPolicy,
consistencyLevel = consistencyLevel)
val client = CosmosDBProviderImpl.getClient(cosmosDBClientSettings)
CosmosDBProviderImpl.createDatabaseIfNotExists("test8")
CosmosDBProviderImpl.createCollectionIfNotExists("test8","collection")
val sampleDoc = new SampleDoc()
val docs=List[SampleDoc](sampleDoc)
CosmosDBProviderImpl.createDocuments[SampleDoc](docs,"test8","collection", new CountDownLatch(1))
println("End of the Runner.")
}

Просмотреть файл

@ -0,0 +1,76 @@
package com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler
import java.util.Date
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.errors.{ConnectException, RetriableException}
import scala.util.{Failure, Success, Try}
case class ErrorHandlerObj(remainingRetries: Int, maxRetries: Int, errorMessage: String, lastErrorTimestamp: Date)
trait HandleRetriableError extends StrictLogging{
var errorHandlerObj: Option[ErrorHandlerObj] = None
private var maxRetriesDefault = CosmosDBConfigConstants.ERROR_MAX_RETRIES_DEFAULT
def initializeErrorHandler(maxRetries: Int): Unit = {
errorHandlerObj = Some(ErrorHandlerObj(maxRetries, maxRetries, "", new Date()))
}
def HandleRetriableError[A](t : Try[A]) : Option[A] = {
if(!errorHandlerObj.isDefined) {
logger.info(s"HandleRetriableError not initialized, getting max retries value")
maxRetriesDefault = CosmosDBConfigConstants.ERROR_MAX_RETRIES_DEFAULT
initializeErrorHandler(maxRetriesDefault)
}
t
match {
case Success(s) => {
//in case we had previous errors.
if (errorHandlerObj.get.remainingRetries != errorHandlerObj.get.maxRetries) {
logger.info(s"Message retry is successful.")
}
//reset ErrorHandlerObj
resetErrorHandlerObj()
Some(s)
}
case Failure(f) =>
//decrement the retry count
logger.error(s"Encountered error ${f.getMessage}", f)
this.errorHandlerObj = Some(decrementErrorHandlerRetries(errorHandlerObj.get, f.getMessage))
//handle policy error
handleError(f, errorHandlerObj.get.remainingRetries, errorHandlerObj.get.maxRetries)
None
}
}
def resetErrorHandlerObj() = {
errorHandlerObj = Some(ErrorHandlerObj(errorHandlerObj.get.maxRetries, errorHandlerObj.get.maxRetries, "", new Date()))
}
private def decrementErrorHandlerRetries(errorHandlerObj: ErrorHandlerObj, msg: String): ErrorHandlerObj = {
if (errorHandlerObj.maxRetries == -1) {
ErrorHandlerObj(errorHandlerObj.remainingRetries, errorHandlerObj.maxRetries, msg, new Date())
} else {
ErrorHandlerObj(errorHandlerObj.remainingRetries - 1, errorHandlerObj.maxRetries, msg, new Date())
}
}
private def handleError(error: Throwable, retryCount: Int, maxRetries: Int) = {
//throw connectException
if (maxRetries > 0 && retryCount == 0) {
throw new ConnectException(error)
}
else {
logger.warn(s"Error policy set to RETRY. Remaining attempts $retryCount")
throw new RetriableException(error)
}
}
}

Просмотреть файл

@ -0,0 +1,83 @@
package com.microsoft.azure.cosmosdb.kafka.connect.config
import java.util
import org.apache.kafka.common.config.ConfigDef.{Importance, Type, Width}
import org.apache.kafka.common.config.{AbstractConfig, ConfigDef}
object ConnectorConfig {
lazy val baseConfigDef: ConfigDef = new ConfigDef()
.define(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG, Type.STRING, Importance.HIGH,
CosmosDBConfigConstants.CONNECTION_ENDPOINT_DOC, "Connection", 1, Width.LONG,
CosmosDBConfigConstants.CONNECTION_ENDPOINT_DISPLAY)
.define(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG, Type.PASSWORD, Importance.HIGH,
CosmosDBConfigConstants.CONNECTION_MASTERKEY_DOC, "Connection", 2, Width.LONG,
CosmosDBConfigConstants.CONNECTION_MASTERKEY_DISPLAY)
.define(CosmosDBConfigConstants.DATABASE_CONFIG, Type.STRING, Importance.HIGH,
CosmosDBConfigConstants.DATABASE_CONFIG_DOC, "Database", 1, Width.MEDIUM,
CosmosDBConfigConstants.DATABASE_CONFIG_DISPLAY)
.define(CosmosDBConfigConstants.COLLECTION_CONFIG, Type.STRING, Importance.HIGH,
CosmosDBConfigConstants.COLLECTION_CONFIG_DOC, "Collection", 1, Width.MEDIUM,
CosmosDBConfigConstants.COLLECTION_CONFIG_DISPLAY)
.define(CosmosDBConfigConstants.TOPIC_CONFIG, Type.STRING, Importance.HIGH,
CosmosDBConfigConstants.TOPIC_CONFIG_DOC, "Topic", 1, Width.MEDIUM,
CosmosDBConfigConstants.TOPIC_CONFIG_DISPLAY)
.define(CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_CONFIG, Type.INT, CosmosDBConfigConstants.ERROR_MAX_RETRIES_DEFAULT, Importance.MEDIUM,
CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_DOC, "Common", 1,
Width.MEDIUM , CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_DISPLAY)
/**
* Holds the extra configurations for the source on top of
* the base.
**/
lazy val sourceConfigDef: ConfigDef = ConnectorConfig.baseConfigDef
.define(CosmosDBConfigConstants.ASSIGNED_PARTITIONS, Type.STRING, "", Importance.HIGH,
CosmosDBConfigConstants.ASSIGNED_PARTITIONS_DOC, "Source", 1, Width.MEDIUM,
CosmosDBConfigConstants.ASSIGNED_PARTITIONS_DISPLAY)
.define(CosmosDBConfigConstants.READER_BUFFER_SIZE, Type.INT, CosmosDBConfigConstants.READER_BUFFER_SIZE_DEFAULT, Importance.MEDIUM,
CosmosDBConfigConstants.READER_BUFFER_SIZE_DOC, "Source", 2, Width.LONG,
CosmosDBConfigConstants.READER_BUFFER_SIZE_DISPLAY)
.define(CosmosDBConfigConstants.BATCH_SIZE, Type.INT, CosmosDBConfigConstants.BATCH_SIZE_DEFAULT, Importance.MEDIUM,
CosmosDBConfigConstants.BATCH_SIZE_DOC, "Source", 3, Width.LONG,
CosmosDBConfigConstants.BATCH_SIZE_DISPLAY)
.define(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR, Type.STRING, CosmosDBConfigConstants.SOURCE_POST_PROCESSOR_DEFAULT, Importance.MEDIUM,
CosmosDBConfigConstants.SOURCE_POST_PROCESSOR_DOC, "Source", 4, Width.LONG,
CosmosDBConfigConstants.SOURCE_POST_PROCESSOR_DISPLAY)
.define(CosmosDBConfigConstants.TIMEOUT, Type.INT, CosmosDBConfigConstants.TIMEOUT_DEFAULT, Importance.MEDIUM,
CosmosDBConfigConstants.TIMEOUT_DOC, "Source", 4, Width.LONG,
CosmosDBConfigConstants.TIMEOUT_DISPLAY)
/**
* Holds the extra configurations for the sink on top of
* the base.
**/
lazy val sinkConfigDef: ConfigDef = ConnectorConfig.baseConfigDef
.define(CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG, Type.STRING, Importance.HIGH,
CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG_DOC, "Map", 1, Width.MEDIUM,
CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG_DISPLAY)
.define(CosmosDBConfigConstants.SINK_POST_PROCESSOR, Type.STRING, CosmosDBConfigConstants.SINK_POST_PROCESSOR_DEFAULT, Importance.MEDIUM,
CosmosDBConfigConstants.SINK_POST_PROCESSOR_DOC, "Sink", 1, Width.LONG,
CosmosDBConfigConstants.SINK_POST_PROCESSOR_DISPLAY)
// .define(CosmosDBConfigConstants.EXTRA_SINK_CONFIG_01, Type.STRING, Importance.HIGH,
// CosmosDBConfigConstants.EXTRA_SINK_CONFIG_01_DOC, "Sink", 1, Width.MEDIUM,
// CosmosDBConfigConstants.EXTRA_SINK_CONFIG_01_DISPLAY)
// .define(CosmosDBConfigConstants.EXTRA_SINK_CONFIG_02, Type.STRING, Importance.HIGH,
// CosmosDBConfigConstants.EXTRA_SINK_CONFIG_02_DOC, "Sink", 2, Width.MEDIUM,
// CosmosDBConfigConstants.EXTRA_SINK_CONFIG_02_DISPLAY)
lazy val commonConfigDef: ConfigDef = ConnectorConfig.baseConfigDef
.define(CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_CONFIG, Type.INT, CosmosDBConfigConstants.ERROR_MAX_RETRIES_DEFAULT, Importance.MEDIUM,
CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_DOC, "Common", 1,
Width.MEDIUM , CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_DISPLAY)
}
case class CosmosDBConfig(config: ConfigDef, props: util.Map[String, String])
extends AbstractConfig(config, props)

Просмотреть файл

@ -0,0 +1,70 @@
package com.microsoft.azure.cosmosdb.kafka.connect.config
object CosmosDBConfigConstants {
val CONNECTOR_PREFIX = "connect.cosmosdb"
val CONNECTION_ENDPOINT_CONFIG = s"$CONNECTOR_PREFIX.connection.endpoint"
val CONNECTION_ENDPOINT_DOC = "The Cosmos DB endpoint."
val CONNECTION_ENDPOINT_DISPLAY = "Endpoint"
val CONNECTION_MASTERKEY_CONFIG = s"$CONNECTOR_PREFIX.master.key"
val CONNECTION_MASTERKEY_DOC = "The connection master key."
val CONNECTION_MASTERKEY_DISPLAY = "Master Key"
val DATABASE_CONFIG = s"$CONNECTOR_PREFIX.database"
val DATABASE_CONFIG_DISPLAY = "Database Name."
val DATABASE_CONFIG_DOC = "The Cosmos DB target database."
val COLLECTION_CONFIG = s"$CONNECTOR_PREFIX.collections"
val COLLECTION_CONFIG_DISPLAY = "Collection Names List."
val COLLECTION_CONFIG_DOC = "A comma delimited list of target collection names."
val TOPIC_CONFIG = s"$CONNECTOR_PREFIX.topic.name"
val TOPIC_CONFIG_DISPLAY = "Topic Names List."
val TOPIC_CONFIG_DOC = "A comma delimited list of target Kafka Topics."
val COLLECTION_TOPIC_MAP_CONFIG = s"$CONNECTOR_PREFIX.collections.topicmap"
val COLLECTION_TOPIC_MAP_CONFIG_DISPLAY = "Collection Topic Map."
val COLLECTION_TOPIC_MAP_CONFIG_DOC = "A comma delimited list of collections mapped to their partitions. Formatted coll1#topic1,coll2#topic2."
//for the source task, the connector will set this for the each source task
val ASSIGNED_PARTITIONS = s"$CONNECTOR_PREFIX.assigned.partitions"
val ASSIGNED_PARTITIONS_DOC = "The CosmosDB partitions a task has been assigned."
val ASSIGNED_PARTITIONS_DISPLAY = "Assigned Partitions."
val BATCH_SIZE = s"$CONNECTOR_PREFIX.task.batch.size"
val BATCH_SIZE_DISPLAY = "Batch Size."
val BATCH_SIZE_DOC = "The max number of of documents the source task will buffer before send them to Kafka."
val BATCH_SIZE_DEFAULT = 100
val READER_BUFFER_SIZE = s"$CONNECTOR_PREFIX.task.buffer.size"
val READER_BUFFER_SIZE_DISPLAY = "Reader Buffer Size."
val READER_BUFFER_SIZE_DOC = "The max size the collection of documents the source task will buffer before send them to Kafka."
val READER_BUFFER_SIZE_DEFAULT = 10000
val SOURCE_POST_PROCESSOR = s"$CONNECTOR_PREFIX.source.post-processor"
val SOURCE_POST_PROCESSOR_DISPLAY = "Source Post-Processor List"
val SOURCE_POST_PROCESSOR_DOC = "Comma-separated list of Source Post-Processor class names to use for post-processing"
val SOURCE_POST_PROCESSOR_DEFAULT = ""
val SINK_POST_PROCESSOR = s"$CONNECTOR_PREFIX.sink.post-processor"
val SINK_POST_PROCESSOR_DISPLAY = "Sink Post-Processor List"
val SINK_POST_PROCESSOR_DOC = "Comma-separated list of Source Post-Processor class names to use for post-processing"
val SINK_POST_PROCESSOR_DEFAULT = ""
val DEFAULT_POLL_INTERVAL = 1000
val ERRORS_RETRY_TIMEOUT_CONFIG = "errors.retry.timeout"
val ERROR_MAX_RETRIES_DEFAULT = 3
val ERRORS_RETRY_TIMEOUT_DISPLAY = "Retry Timeout for Errors"
val ERRORS_RETRY_TIMEOUT_DOC = "The maximum duration in milliseconds that a failed operation " +
"will be reattempted. The default is 0, which means no retries will be attempted. Use -1 for infinite retries.";
val TIMEOUT = s"$CONNECTOR_PREFIX.task.timeout"
val TIMEOUT_DISPLAY = "Timeout."
val TIMEOUT_DOC = "The max number of milliseconds the source task will use to read documents before send them to Kafka."
val TIMEOUT_DEFAULT = 5000
}

Просмотреть файл

@ -0,0 +1,113 @@
package com.microsoft.azure.cosmosdb.kafka.connect.kafka
import org.apache.kafka.common.utils.SystemTime
import org.apache.kafka.common.utils.Time
import org.apache.kafka.common.utils.Utils
import org.apache.kafka.connect.runtime.{ConnectorConfig, Herder, Worker}
import org.apache.kafka.connect.runtime.distributed.DistributedConfig
import org.apache.kafka.connect.runtime.distributed.DistributedHerder
import org.apache.kafka.connect.runtime.rest.entities.ConnectorInfo
import org.apache.kafka.connect.storage._
import org.apache.kafka.connect.util.FutureCallback
import java.util.Properties
import java.util.UUID
import java.util.concurrent.CountDownLatch
import java.util.concurrent.ExecutionException
import java.util.concurrent.TimeUnit
import java.util.concurrent.TimeoutException
import java.util.concurrent.atomic.AtomicBoolean
import scala.collection.JavaConversions._
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.runtime.isolation.Plugins
/**
* Embedded Kafka Connect server as per KIP-26
*/
case class EmbeddedConnect(workerConfig: Properties, connectorConfigs: List[Properties]) extends StrictLogging {
private val REQUEST_TIMEOUT_MS = 120000
private val startLatch: CountDownLatch = new CountDownLatch(1)
private val shutdown: AtomicBoolean = new AtomicBoolean(false)
private val stopLatch: CountDownLatch = new CountDownLatch(1)
private var worker: Worker = _
private var herder: DistributedHerder = _
// ConnectEmbedded - throws Exception
val time: Time = new SystemTime()
val config: DistributedConfig = new DistributedConfig(Utils.propsToStringMap(workerConfig))
val offsetBackingStore: KafkaOffsetBackingStore = new KafkaOffsetBackingStore()
offsetBackingStore.configure(config)
//not sure if this is going to work but because we don't have advertised url we can get at least a fairly random
val workerId: String = UUID.randomUUID().toString
println("---> " + config.toString)
worker = new Worker(workerId, time, new Plugins(Map.empty[String, String]), config, offsetBackingStore)
val statusBackingStore: StatusBackingStore = new KafkaStatusBackingStore(time, worker.getInternalValueConverter)
statusBackingStore.configure(config)
val configBackingStore: ConfigBackingStore = new KafkaConfigBackingStore(worker.getInternalValueConverter, config, worker.configTransformer())
//advertisedUrl = "" as we don't have the rest server - hopefully this will not break anything
herder = new DistributedHerder(config, time, worker, "KafkaCluster1",statusBackingStore, configBackingStore, "")
def start(): Unit = {
try {
logger.info("Kafka ConnectEmbedded starting")
sys.ShutdownHookThread {
logger.info("exiting")
try {
startLatch.await()
EmbeddedConnect.this.stop()
} catch {
case e: InterruptedException => logger.error("Interrupted in shutdown hook while waiting for Kafka Connect startup to finish");
}
}
worker.start()
herder.start()
logger.info("Kafka ConnectEmbedded started")
connectorConfigs.foreach { connectorConfig: Properties =>
val callback = new FutureCallback[Herder.Created[ConnectorInfo]]()
val name = connectorConfig.getProperty(ConnectorConfig.NAME_CONFIG)
herder.putConnectorConfig(name, Utils.propsToStringMap(connectorConfig), true, callback)
callback.get(REQUEST_TIMEOUT_MS, TimeUnit.MILLISECONDS)
}
} catch {
case e: InterruptedException => logger.error("Starting interrupted ", e)
case e: ExecutionException => logger.error("Submitting connector config failed", e.getCause)
case e: TimeoutException => logger.error("Submitting connector config timed out", e)
case e: Exception => logger.error("Starting failed", e)
} finally {
startLatch.countDown()
}
}
def stop(): Unit = {
try {
val wasShuttingDown = shutdown.getAndSet(true)
if (!wasShuttingDown) {
logger.info("Kafka ConnectEmbedded stopping")
herder.stop()
worker.stop()
logger.info("Kafka ConnectEmbedded stopped")
}
} finally {
stopLatch.countDown()
}
}
def awaitStop(): Unit = {
try {
stopLatch.await()
} catch {
case e: InterruptedException => logger.error("Interrupted waiting for Kafka Connect to shutdown")
}
}
}

Просмотреть файл

@ -0,0 +1,70 @@
package com.microsoft.azure.cosmosdb.kafka.connect.kafka
import java.util.Properties
import kafka.server.{KafkaConfig, KafkaServer}
import kafka.utils.{CoreUtils, TestUtils}
import kafka.zk.EmbeddedZookeeper
import org.apache.kafka.common.security.auth.SecurityProtocol
import org.apache.kafka.common.utils.SystemTime
import scala.collection.immutable.IndexedSeq
object KafkaCluster extends AutoCloseable {
private val Zookeeper = new EmbeddedZookeeper
val brokersNumber = 1
val ZookeeperConnection = s"localhost:${Zookeeper.port}"
var Connect: EmbeddedConnect = _
var kafkaConnectEnabled: Boolean = false
val BrokersConfig: IndexedSeq[KafkaConfig] = (1 to brokersNumber).map(i => getKafkaConfig(i))
val Brokers: IndexedSeq[KafkaServer] = BrokersConfig.map(TestUtils.createServer(_, new SystemTime()))
val BrokersList: String = TestUtils.getBrokerListStrFromServers(Brokers, SecurityProtocol.PLAINTEXT)
System.setProperty("http.nonProxyHosts", "localhost|0.0.0.0|127.0.0.1")
def startEmbeddedConnect(workerConfig: Properties, connectorConfigs: List[Properties]): Unit = {
kafkaConnectEnabled = true
Connect = EmbeddedConnect(workerConfig, connectorConfigs)
Connect.start()
}
private def injectProperties(props: Properties, brokerId: Int): Unit = {
props.setProperty("log.dir", s"C:/Temp/kafka-logs-${brokerId}")
props.setProperty("auto.create.topics.enable", "true")
props.setProperty("num.partitions", "1")
}
private def getKafkaConfig(brokerId: Int): KafkaConfig = {
val props: Properties = TestUtils.createBrokerConfig(
brokerId,
ZookeeperConnection,
enableControlledShutdown = false,
enableDeleteTopic = false,
TestUtils.RandomPort,
interBrokerSecurityProtocol = None,
trustStoreFile = None,
None,
enablePlaintext = true,
enableSaslPlaintext = false,
TestUtils.RandomPort,
enableSsl = false,
TestUtils.RandomPort,
enableSaslSsl = false,
TestUtils.RandomPort,
None)
injectProperties(props, brokerId)
KafkaConfig.fromProps(props)
}
def close(): Unit = {
if (kafkaConnectEnabled) {
Connect.stop()
}
Brokers.foreach { server =>
server.shutdown
CoreUtils.delete(server.config.logDirs)
}
Zookeeper.shutdown()
}
}

Просмотреть файл

@ -0,0 +1,46 @@
package com.microsoft.azure.cosmosdb.kafka.connect.processor
import com.google.gson._
import org.apache.kafka.connect.sink.SinkRecord
import org.apache.kafka.connect.source.SourceRecord
abstract class JsonPostProcessor extends PostProcessor {
override final def runPostProcess(sourceRecord: SourceRecord): SourceRecord = {
val jsonParser = new JsonParser()
val json: JsonObject = jsonParser.parse(sourceRecord.value().toString).getAsJsonObject
val processedJson = runJsonPostProcess(json)
val result = new SourceRecord(
sourceRecord.sourcePartition,
sourceRecord.sourceOffset,
sourceRecord.topic,
null,
processedJson.toString
)
result
}
override def runPostProcess(sinkRecord: SinkRecord): SinkRecord = {
val jsonParser = new JsonParser()
val json: JsonObject = jsonParser.parse(sinkRecord.value().toString).getAsJsonObject
val processedJson = runJsonPostProcess(json)
val result = new SinkRecord(
sinkRecord.topic,
sinkRecord.kafkaPartition,
sinkRecord.keySchema,
sinkRecord.key,
sinkRecord.valueSchema,
processedJson.toString,
sinkRecord.kafkaOffset
)
result
}
def runJsonPostProcess(json: JsonObject): JsonObject
}

Просмотреть файл

@ -0,0 +1,32 @@
package com.microsoft.azure.cosmosdb.kafka.connect.processor
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfig
import com.typesafe.scalalogging.LazyLogging
import org.apache.kafka.connect.sink.SinkRecord
import org.apache.kafka.connect.source.SourceRecord
abstract class PostProcessor {
def configure(config: CosmosDBConfig): Unit
def runPostProcess(sourceRecord: SourceRecord): SourceRecord
def runPostProcess(sinkRecord: SinkRecord): SinkRecord
}
object PostProcessor extends AnyRef with LazyLogging {
def createPostProcessorList(processorClassNames: String, config: CosmosDBConfig): List[PostProcessor] =
processorClassNames.split(',').map(c => {
logger.info(s"Instantiating ${c} as Post-Processor")
if (c.isEmpty) {
null
} else {
val postProcessor = Class.forName(c).newInstance().asInstanceOf[PostProcessor]
postProcessor.configure(config)
postProcessor
}
}).filter( e => e != null).toList
}

Просмотреть файл

@ -0,0 +1,22 @@
package com.microsoft.azure.cosmosdb.kafka.connect.processor
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfig
import org.apache.kafka.connect.sink.SinkRecord
import org.apache.kafka.connect.source.SourceRecord
class SampleConsoleWriterPostProcessor extends PostProcessor {
override def configure(config: CosmosDBConfig): Unit = {
}
override def runPostProcess(sourceRecord: SourceRecord): SourceRecord = {
println(sourceRecord.value())
sourceRecord
}
override def runPostProcess(sinkRecord: SinkRecord): SinkRecord = {
println(sinkRecord.value())
sinkRecord
}
}

Просмотреть файл

@ -0,0 +1,57 @@
package com.microsoft.azure.cosmosdb.kafka.connect.processor.sink
import com.google.gson._
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, CosmosDBConfigConstants}
import com.microsoft.azure.cosmosdb.kafka.connect.processor.JsonPostProcessor
import org.apache.kafka.common.config.ConfigDef.{Importance, Type, Width}
class DocumentIdSinkPostProcessor extends JsonPostProcessor {
var documentIdField: String = ""
override def configure(config: CosmosDBConfig): Unit = {
val field = getPostProcessorConfiguration(config)
if (field.isDefined) documentIdField = field.get
}
override def runJsonPostProcess(json: JsonObject): JsonObject = {
if (!json.has("id")) {
if (json.has(documentIdField))
json.addProperty("id", json.get(documentIdField).getAsString)
else
json.add("id", JsonNull.INSTANCE)
}
json
}
private def getPostProcessorConfiguration(config: CosmosDBConfig): Option[String] =
{
val CONFIG = s"${CosmosDBConfigConstants.CONNECTOR_PREFIX}.sink.post-processor.documentId.field"
val DOC = "JSON field to be used as the Cosmos DB id"
val DISPLAY = "JSON Field Path"
val DEFAULT = ""
val postProcessorConfigDef = ConnectorConfig.baseConfigDef
if(ConnectorConfig.baseConfigDef.configKeys().containsKey(CONFIG)) {
ConnectorConfig.baseConfigDef.configKeys().remove(CONFIG)
}
postProcessorConfigDef.define(
CONFIG, Type.STRING, DEFAULT, Importance.MEDIUM,
DOC, s"PostProcessor:DocumentId",
1, Width.LONG, DISPLAY
)
val postProcessorConfig: CosmosDBConfig = CosmosDBConfig(postProcessorConfigDef, config.props)
val field = Option(postProcessorConfig.getString(CONFIG))
field
}
}

Просмотреть файл

@ -0,0 +1,13 @@
package com.microsoft.azure.cosmosdb.kafka.connect.processor.sink
import com.google.gson._
import com.microsoft.azure.cosmosdb.kafka.connect.processor.JsonPostProcessor
import com.microsoft.azure.cosmosdb.kafka.connect.processor.`trait`._
class SelectorSinkPostProcessor extends JsonPostProcessor with Selector {
override def pipelineStage = "sink"
override def runJsonPostProcess(json: JsonObject): JsonObject = processor(json)
}

Просмотреть файл

@ -0,0 +1,13 @@
package com.microsoft.azure.cosmosdb.kafka.connect.processor.source
import com.google.gson._
import com.microsoft.azure.cosmosdb.kafka.connect.processor.JsonPostProcessor
import com.microsoft.azure.cosmosdb.kafka.connect.processor.`trait`._
class SelectorSourcePostProcessor extends JsonPostProcessor with Selector {
override def pipelineStage = "source"
override def runJsonPostProcess(json: JsonObject): JsonObject = processor(json)
}

Просмотреть файл

@ -0,0 +1,106 @@
package com.microsoft.azure.cosmosdb.kafka.connect.processor.`trait`
import com.google.gson._
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, CosmosDBConfigConstants}
import com.microsoft.azure.cosmosdb.kafka.connect.processor.PostProcessor
import org.apache.kafka.common.config.ConfigDef.{Importance, Type, Width}
object SelectorType extends Enumeration {
type SelectorType = Value
val Include, Exclude, All = Value
def fromString(s: String): Value = values.find(_.toString == s).getOrElse(All)
}
import SelectorType._
trait Selector extends PostProcessor {
var selectorFields = Seq.empty[String]
var selectorType: SelectorType = SelectorType.Include
var processor: JsonObject => JsonObject = includeFields
def pipelineStage: String
override def configure(config: CosmosDBConfig): Unit = {
val configValues = getPostProcessorConfiguration(config)
selectorFields = configValues._1
selectorType = configValues._2
processor = selectorType match {
case Include => includeFields
case Exclude => excludeFields
case _ => includeAll
}
}
private def includeAll(json: JsonObject): JsonObject = json
private def includeFields(json: JsonObject): JsonObject = {
val toInclude = selectorFields
val newJson: JsonObject = new JsonObject()
toInclude.foreach(e => {
val j = json.get(e)
if (j != null) newJson.add(e, j)
})
newJson
}
private def excludeFields(json: JsonObject): JsonObject = {
val toRemove = selectorFields
toRemove.foreach(e => json.remove(e))
json
}
private def getPostProcessorConfiguration(config: CosmosDBConfig): (Seq[String], SelectorType) =
{
val FIELD_CONFIG = s"${CosmosDBConfigConstants.CONNECTOR_PREFIX}.$pipelineStage.post-processor.selector.fields"
val FIELD_DOC = "List of fields to be included or excluded in the generated JSON"
val FIELD_DISPLAY = "List of fields"
val FIELD_DEFAULT = ""
val TYPE_CONFIG = s"${CosmosDBConfigConstants.CONNECTOR_PREFIX}.$pipelineStage.post-processor.selector.type"
val TYPE_DOC = "How the selector should behave: Include or Exclude specified fields in the processed JSON"
val TYPE_DISPLAY = "Selector behaviour: Include or Exclued"
val TYPE_DEFAULT = ""
if(ConnectorConfig.baseConfigDef.configKeys().containsKey(FIELD_CONFIG)) {
ConnectorConfig.baseConfigDef.configKeys().remove(FIELD_CONFIG)
}
if(ConnectorConfig.baseConfigDef.configKeys().containsKey(TYPE_CONFIG)) {
ConnectorConfig.baseConfigDef.configKeys().remove(TYPE_CONFIG)
}
val postProcessorConfigDef = ConnectorConfig.baseConfigDef
.define(
FIELD_CONFIG, Type.STRING, FIELD_DEFAULT, Importance.MEDIUM,
FIELD_DOC, s"PostProcessor:Selector:${pipelineStage}",
1, Width.LONG, FIELD_DISPLAY
).define(
TYPE_CONFIG, Type.STRING, TYPE_DEFAULT, Importance.MEDIUM,
TYPE_DOC, s"PostProcessor:Selector:${pipelineStage}",
2, Width.LONG, TYPE_DISPLAY
)
val postProcessorConfig: CosmosDBConfig = CosmosDBConfig(postProcessorConfigDef, config.props)
selectorFields = postProcessorConfig.getString(FIELD_CONFIG).split(',').map(e => e.trim).toSeq
selectorType = SelectorType.fromString(postProcessorConfig.getString(TYPE_CONFIG))
(selectorFields, selectorType)
}
}

Просмотреть файл

@ -0,0 +1,56 @@
package com.microsoft.azure.cosmosdb.kafka.connect.sink
import java.util
import org.apache.kafka.connect.data.Struct
import org.apache.kafka.connect.data.Schema._
import sun.reflect.generics.reflectiveObjects.NotImplementedException
import scala.collection.JavaConversions._
trait ConnectCosmosConverter {
/**
* Converts connect data to json tuples.
*
* @return converted data
*/
def toJsonMap(value: Object): List[(String, Object)]
}
/**
* Converter of connect data with schema to json tuples.
*/
object SchemaConnectCosmosConverter extends ConnectCosmosConverter {
override def toJsonMap(value: Object): List[(String, Object)] = {
val struct = value.asInstanceOf[Struct]
var res : Map[String,Object] = Map()
for (field <- struct.schema().fields()){
val fieldName = field.name()
val fieldType = field.schema().`type`()
fieldType match {
case Type.INT8 => res += (fieldName-> struct.getInt8(fieldName))
case Type.INT16 => res += (fieldName-> struct.getInt16(fieldName))
case Type.INT32 => res += (fieldName-> struct.getInt32(fieldName))
case Type.INT64 => res += (fieldName-> struct.getInt64(fieldName))
case Type.FLOAT32 => res += (fieldName-> struct.getFloat32(fieldName))
case Type.FLOAT64 => res += (fieldName-> struct.getFloat64(fieldName))
case Type.BOOLEAN => res += (fieldName-> struct.getBoolean(fieldName))
case Type.STRING => res += (fieldName-> struct.getString(fieldName))
case _ => throw new NotImplementedException()
}
}
res.toList
}
}
/**
* Converter of connect data without schema to json tuples.
*/
object NoSchemaConnectCosmosConverter extends ConnectCosmosConverter {
override def toJsonMap(value: Object): List[(String, Object)] = {
value.asInstanceOf[util.HashMap[String,Object]].toList
}
}

Просмотреть файл

@ -0,0 +1,56 @@
package com.microsoft.azure.cosmosdb.kafka.connect.sink
import java.util
import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig}
import org.apache.kafka.common.config.ConfigDef
import org.apache.kafka.connect.connector.Task
import org.apache.kafka.connect.sink.SinkConnector
import scala.collection.JavaConverters._
import scala.util.{Failure, Success}
class CosmosDBSinkConnector extends SinkConnector with HandleRetriableError {
private var configProps: util.Map[String, String] = _
override def version(): String = getClass.getPackage.getImplementationVersion
override def start(props: util.Map[String, String]): Unit = {
logger.info("Starting CosmosDBSinkConnector")
try {
initializeErrorHandler(props.get(org.apache.kafka.connect.runtime.ConnectorConfig.ERRORS_RETRY_TIMEOUT_CONFIG).toInt) // TODO: test
val config = CosmosDBConfig(ConnectorConfig.sinkConfigDef, props)
HandleRetriableError(Success(config))
}
catch{
case f: Throwable =>
logger.error(s"Couldn't start Cosmos DB Sink due to configuration error: ${f.getMessage}", f)
HandleRetriableError(Failure(f))
}
configProps = props
}
override def stop(): Unit = {
logger.info("Stopping CosmosDBSinkConnector")
}
override def taskClass(): Class[_ <: Task] = classOf[CosmosDBSinkTask]
override def taskConfigs(maxTasks: Int): util.List[util.Map[String, String]] = {
logger.info(s"Setting task configurations for $maxTasks workers with properties $this.configProps")
println(this.configProps)
(1 to maxTasks).map(_ => this.configProps).toList.asJava
}
override def config(): ConfigDef = ConnectorConfig.sinkConfigDef
}

Просмотреть файл

@ -0,0 +1,11 @@
package com.microsoft.azure.cosmosdb.kafka.connect.sink
import scala.collection.mutable.HashMap
case class CosmosDBSinkSettings(endpoint: String,
masterKey: String,
database: String,
collectionTopicMap: HashMap[String, String]) {
}

Просмотреть файл

@ -0,0 +1,125 @@
package com.microsoft.azure.cosmosdb.kafka.connect.sink
import java.util
import scala.collection.mutable.HashMap
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, CosmosDBConfigConstants}
import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProviderImpl, CosmosDBProvider}
import com.microsoft.azure.cosmosdb.kafka.connect.processor._
import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient
import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel}
import com.typesafe.scalalogging.LazyLogging
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.errors.ConnectException
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}
import scala.collection.JavaConverters._
import scala.util.{Failure, Success, Try}
class CosmosDBSinkTask extends SinkTask with LazyLogging {
private var writer: Option[CosmosDBWriter] = None
private var client: AsyncDocumentClient = null
private var database: String = ""
private var taskConfig: Option[CosmosDBConfig] = None
private var topicNames: Array[String] = null
private var postProcessors = List.empty[PostProcessor]
val collectionTopicMap: HashMap[String, String] = HashMap.empty[String, String] // Public to allow for testing
val cosmosDBProvider: CosmosDBProvider = CosmosDBProviderImpl
override def start(props: util.Map[String, String]): Unit = {
logger.info("Starting CosmosDBSinkTask")
var config: util.Map[String, String] = null
if (context != null) {
config = if (context.configs().isEmpty) props else context.configs()
}
else {
config = props
}
// Get Configuration for this Task
taskConfig = Try(CosmosDBConfig(ConnectorConfig.sinkConfigDef, config)) match {
case Failure(f) => throw new ConnectException("Couldn't start CosmosDBSink due to configuration error.", f)
case Success(s) => Some(s)
}
// Add configured Post-Processors
val processorClassNames = taskConfig.get.getString(CosmosDBConfigConstants.SINK_POST_PROCESSOR)
postProcessors = PostProcessor.createPostProcessorList(processorClassNames, taskConfig.get)
// Get CosmosDB Connection
val endpoint: String = taskConfig.get.getString(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG)
val masterKey: String = taskConfig.get.getPassword(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG).value()
database = taskConfig.get.getString(CosmosDBConfigConstants.DATABASE_CONFIG)
// Populate collection topic map
// TODO: add support for many to many mapping, this only assumes each topic writes to one collection and multiple topics can write to the same collection
val collectionTopicMapString = taskConfig.get.getString(CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG)
if(collectionTopicMapString.contains("#")) { // There is at least one pair
collectionTopicMapString.split(",").map(_.trim).foreach(
m => {
val map = m.split("#").map(_.trim)
collectionTopicMap.put(map(1), map(0)) // topic, collection
})
}
// If there are topics with no mapping, add them to the map with topic name as collection name
topicNames = taskConfig.get.getString(CosmosDBConfigConstants.TOPIC_CONFIG).split(",").map(_.trim)
topicNames.foreach(
t => {
if (!collectionTopicMap.contains(t)) {
collectionTopicMap.put(t, t) // topic, collection
}
})
val clientSettings = CosmosDBClientSettings(
endpoint,
masterKey,
database,
null, // Don't pass a collection because our client is potentially for multiple collections
ConnectionPolicy.GetDefault(),
ConsistencyLevel.Session
)
client = Try(cosmosDBProvider.getClient(clientSettings)) match {
case Success(conn) =>
logger.info("Connection to CosmosDB established.")
conn
case Failure(f) => throw new ConnectException(s"Couldn't connect to CosmosDB.", f)
}
// Set up Writer
val setting = new CosmosDBSinkSettings(endpoint, masterKey, database, collectionTopicMap)
writer = Option(new CosmosDBWriter(setting, cosmosDBProvider))
}
override def put(records: util.Collection[SinkRecord]): Unit = {
val seq = records.asScala.toList
logger.info(s"Sending ${seq.length} records to writer to be written")
// Execute PostProcessing
val postProcessed = seq.map(sr => applyPostProcessing(sr))
// Currently only built for messages with JSON payload without schema
writer.foreach(w => w.write(postProcessed))
}
override def stop(): Unit = {
logger.info("Stopping CosmosDBSinkTask")
}
override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {}
override def version(): String = getClass.getPackage.getImplementationVersion
private def applyPostProcessing(sinkRecord: SinkRecord): SinkRecord =
postProcessors.foldLeft(sinkRecord)((r, p) => {
//println(p.getClass.toString)
p.runPostProcess(r)
})
}

Просмотреть файл

@ -0,0 +1,87 @@
package com.microsoft.azure.cosmosdb.kafka.connect.sink
import java.util.concurrent.CountDownLatch
import com.fasterxml.jackson.databind.ObjectMapper
import com.microsoft.azure.cosmosdb._
import com.microsoft.azure.cosmosdb.kafka.connect.CosmosDBProvider
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.sink.SinkRecord
class CosmosDBWriter(val settings: CosmosDBSinkSettings, val cosmosDBProvider: CosmosDBProvider) extends StrictLogging
{
private val requestOptionsInsert = new RequestOptions
requestOptionsInsert.setConsistencyLevel(ConsistencyLevel.Session)
def write(records: Seq[SinkRecord]): Unit = {
if (records.isEmpty) {
logger.info("No records received.")
} else {
logger.info(s"Received ${records.size} records.")
insert(records)
}
}
private def insert(records: Seq[SinkRecord]) = {
try {
var docs = List.empty[Document]
var collection: String = ""
records.groupBy(_.topic()).foreach { case (_, groupedRecords) =>
groupedRecords.foreach { record =>
// Determine which collection to write to
if (settings.collectionTopicMap.contains(record.topic))
collection = settings.collectionTopicMap(record.topic)
else
throw new Exception("No sink collection specified for this topic.") // TODO: tie this in with the exception handler
val content: String = serializeValue(record.value())
val document = new Document(content)
logger.info("Upserting Document object id " + document.get("id") + " into collection " + collection)
docs = docs :+ document
}
// Send current batch of documents and reset the list for the next topic's documents
cosmosDBProvider.upsertDocuments[Document](docs, settings.database, collection, new CountDownLatch(1))
docs = List.empty[Document]
}
}
catch {
case t: Throwable =>
logger.error(s"There was an error inserting the records ${t.getMessage}", t)
}
}
def close(): Unit = {
logger.info("Shutting down CosmosDBWriter.")
}
def serializeValue(value: Any): String = {
var content: String = null
val om = new ObjectMapper()
if (!value.isInstanceOf[String]){
content = om.writeValueAsString(value)
}else {
content = value.toString
}
if(om.readTree(content).has("payload")){
val temp = om.readTree(content).get("payload")
if (temp.isTextual()){ // TextNodes cannot be directly converted to strings
content = temp.asText()
} else {
content = temp.toString
}
}
return content
}
}

Просмотреть файл

@ -0,0 +1,5 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
trait ChangeFeedObserver {
def processChanges(documentList: List[String])
}

Просмотреть файл

@ -0,0 +1,64 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
import com.microsoft.azure.cosmosdb._
import java.util.concurrent.CountDownLatch
import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError
import scala.collection.JavaConversions._
class ChangeFeedProcessor(feedCollectionInfo: DocumentCollectionInfo, leaseCollectionInfo: DocumentCollectionInfo, changeFeedProcessorOptions: ChangeFeedProcessorOptions, changeFeedObserver: ChangeFeedObserver)extends HandleRetriableError {
val asyncClientFeed = DocumentClientBuilder.buildAsyncDocumentClient(feedCollectionInfo.uri, feedCollectionInfo.masterKey)
val asyncClientLease = DocumentClientBuilder.buildAsyncDocumentClient(leaseCollectionInfo.uri, leaseCollectionInfo.masterKey)
val partitionLeaseStateManager = new PartitionLeaseStateManager(asyncClientLease, leaseCollectionInfo.databaseName, leaseCollectionInfo.collectionName)
val partitionFeedReaders = createPartitionMap()
private var run = true
private def createPartitionMap(): Map[String, PartitionFeedReader] = {
val rangeIdList = getPartitionRangeIds()
val feedReaderMap = Map(rangeIdList map { partitionKeyRangeId => (partitionKeyRangeId, new PartitionFeedReader(asyncClientFeed, feedCollectionInfo.databaseName, feedCollectionInfo.collectionName, partitionKeyRangeId, partitionLeaseStateManager, changeFeedProcessorOptions)) }: _*)
return feedReaderMap
}
private def getPartitionRangeIds(): List[String] = {
val collectionLink = DocumentClientBuilder.getCollectionLink(feedCollectionInfo.databaseName, feedCollectionInfo.collectionName)
val changeFeedObservable = asyncClientFeed.readPartitionKeyRanges(collectionLink, null)
var results = List[PartitionKeyRange]()
changeFeedObservable.toBlocking().forEach(x => results = results ++ x.getResults())
return results.map(p => p.getId)
}
def start(): Unit = {
println("Started!")
spawn {
do {
val countDownLatch = new CountDownLatch(partitionFeedReaders.size)
// Parallel
partitionFeedReaders.par.foreach { p => p._2.readChangeFeed(changeFeedObserver.processChanges, countDownLatch) }
// Serial:
//for ((id, pfr) <- partitionFeedReaders) pfr.readChangeFeed(changeFeedObserver.processChanges, countDownLatch)
countDownLatch.await()
println("Waiting...")
Thread.sleep(changeFeedProcessorOptions.defaultFeedPollDelay)
} while (run)
}
}
def stop(): Unit = {
run = false
println("Finished!")
}
private def spawn(p: => Unit) {
val t = new Thread() {
override def run() = p
}
t.start()
}
}

Просмотреть файл

@ -0,0 +1,57 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
import org.apache.kafka.connect.errors.ConnectException
import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError
import scala.reflect._
class ChangeFeedProcessorBuilder(feedCollectionInfo: DocumentCollectionInfo, leaseCollectionInfo: DocumentCollectionInfo, changeFeedProcessorOptions: ChangeFeedProcessorOptions, changeFeedObserver: ChangeFeedObserver)extends HandleRetriableError {
def this() = this(null, null, new ChangeFeedProcessorOptions(), null)
def withFeedCollection(newFeedCollectionInfo: DocumentCollectionInfo): ChangeFeedProcessorBuilder = {
guardAgainstNull(newFeedCollectionInfo)
return new ChangeFeedProcessorBuilder(newFeedCollectionInfo, this.leaseCollectionInfo, this.changeFeedProcessorOptions, this.changeFeedObserver)
}
def withLeaseCollection(newLeaseCollectionInfo: DocumentCollectionInfo): ChangeFeedProcessorBuilder = {
guardAgainstNull(newLeaseCollectionInfo)
return new ChangeFeedProcessorBuilder(this.feedCollectionInfo, newLeaseCollectionInfo, this.changeFeedProcessorOptions, this.changeFeedObserver)
}
def withProcessorOptions(newChangeFeedProcessorOptions: ChangeFeedProcessorOptions): ChangeFeedProcessorBuilder = {
guardAgainstNull(newChangeFeedProcessorOptions)
return new ChangeFeedProcessorBuilder(this.feedCollectionInfo, this.leaseCollectionInfo, newChangeFeedProcessorOptions, this.changeFeedObserver)
}
def withObserver(newChangeFeedObserver: ChangeFeedObserver): ChangeFeedProcessorBuilder = {
guardAgainstNull(newChangeFeedObserver)
return new ChangeFeedProcessorBuilder(this.feedCollectionInfo, this.leaseCollectionInfo, this.changeFeedProcessorOptions, newChangeFeedObserver)
}
def build(): ChangeFeedProcessor = {
guardAgainstNull(this.feedCollectionInfo)
guardAgainstNull(this.leaseCollectionInfo)
guardAgainstNull(this.changeFeedProcessorOptions)
guardAgainstNull(this.changeFeedObserver)
return new ChangeFeedProcessor(this.feedCollectionInfo, this.leaseCollectionInfo, this.changeFeedProcessorOptions, this.changeFeedObserver)
}
private def guardAgainstNull[T: ClassTag](objectToCheck: T): Unit = {
try{
val className = classTag[T].runtimeClass.getSimpleName()
val messageIfNull = "%s can't be null!".format(className)
if (objectToCheck == null) throw new NullPointerException(messageIfNull)
logger.debug("%s Object initialized".format(className))
}catch{
case f: Throwable =>
throw new ConnectException("%s can't be null!".format(classTag[T].runtimeClass.getSimpleName()), f)
}
}
}

Просмотреть файл

@ -0,0 +1,5 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
class ChangeFeedProcessorOptions(val queryPartitionsMaxBatchSize: Int, val defaultFeedPollDelay: Int) {
def this() = this(100, 2000)
}

Просмотреть файл

@ -0,0 +1,170 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
import java.util
import com.google.gson.Gson
import com.microsoft.azure.cosmosdb._
import com.microsoft.azure.cosmosdb.kafka.connect.CosmosDBProviderImpl
import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError
import com.microsoft.azure.cosmosdb.rx._
import org.apache.kafka.connect.source.{SourceRecord, SourceTaskContext}
import scala.collection.JavaConversions._
class CosmosDBReader(private val client: AsyncDocumentClient,
val setting: CosmosDBSourceSettings,
private val context: SourceTaskContext) extends HandleRetriableError {
private val SOURCE_PARTITION_FIELD = "partition"
private val SOURCE_OFFSET_FIELD = "changeFeedState"
// Read the initial state from the offset storage when the CosmosDBReader is instantiated for the
// assigned partition
private val initialState : CosmosDBReaderChangeFeedState = getCosmosDBReaderChangeFeedState(setting.assignedPartition)
// Initialize the current state using the same values of the initial state
private var currentState = initialState
// Initialize variables that control the position of the reading cursor
private var lastCursorPosition = -1
private var currentCursorPosition = -1
def processChanges(): util.List[SourceRecord] = {
val records = new util.ArrayList[SourceRecord]
var bufferSize = 0
val collectionLink = CosmosDBProviderImpl.getCollectionLink(setting.database, setting.collection)
val changeFeedOptions = createChangeFeedOptions()
try
{
// Initial position of the reading cursor
if (initialState != null)
lastCursorPosition = initialState.lsn.toInt
else
lastCursorPosition = currentCursorPosition
val changeFeedObservable = client.queryDocumentChangeFeed(collectionLink, changeFeedOptions)
changeFeedObservable
.doOnNext(feedResponse => {
val processingStartTime = System.currentTimeMillis()
// Return the list of documents in the FeedResponse
val documents = feedResponse.getResults()
documents.foreach(doc => {
// Update the reader state
currentState = new CosmosDBReaderChangeFeedState(
setting.assignedPartition,
feedResponse.getResponseHeaders.get("etag"),
doc.get("_lsn").toString
)
// Update the current reader cursor
currentCursorPosition = currentState.lsn.toInt
// Check if the cursor has moved beyond the last processed position
if (currentCursorPosition > lastCursorPosition) {
// Process new document
logger.debug(s"Sending document ${doc} to the Kafka topic ${setting.topicName}")
logger.debug(s"Current State => Partition: ${currentState.partition}, " +
s"ContinuationToken: ${currentState.continuationToken}, " +
s"LSN: ${currentState.lsn}")
records.add(new SourceRecord(
sourcePartition(setting.assignedPartition),
sourceOffset(new Gson().toJson(currentState)),
setting.topicName,
null,
doc.toJson()
))
// Increment the buffer
bufferSize = bufferSize + doc.toJson().getBytes().length
// Calculate the elapsed time
val processingElapsedTime = System.currentTimeMillis() - processingStartTime
// Returns records based on batch size, buffer size or timeout
if (records.size >= setting.batchSize || bufferSize >= setting.bufferSize || processingElapsedTime >= setting.timeout) {
return records
}
}
})
})
.doOnCompleted(() => {}) // signal to the consumer that there is no more data available
.doOnError((e) => { logger.error(e.getMessage()) }) // signal to the consumer that an error has occurred
.subscribe()
changeFeedObservable.toBlocking.single
}
catch
{
case f: Throwable =>
logger.error(s"Couldn't add documents to the kafka topic: ${f.getMessage}", f)
}
return records
}
private def createChangeFeedOptions(): ChangeFeedOptions = {
val changeFeedOptions = new ChangeFeedOptions()
changeFeedOptions.setPartitionKeyRangeId(setting.assignedPartition)
changeFeedOptions.setMaxItemCount(setting.batchSize)
if (currentState == null) {
changeFeedOptions.setStartFromBeginning(true)
}
else {
// If the cursor position has not reached the end of the feed, read again
if (currentCursorPosition < currentState.continuationToken.replaceAll("^\"|\"$", "").toInt) {
if (initialState != null)
changeFeedOptions.setRequestContinuation(initialState.continuationToken)
else
changeFeedOptions.setStartFromBeginning(true)
return changeFeedOptions
}
currentState.continuationToken match {
case null => changeFeedOptions.setStartFromBeginning(true)
case "" => changeFeedOptions.setStartFromBeginning(true)
case t => changeFeedOptions.setRequestContinuation(t)
}
}
return changeFeedOptions
}
private def getCosmosDBReaderChangeFeedState(partition: String): CosmosDBReaderChangeFeedState = {
var state: CosmosDBReaderChangeFeedState = null
if (context != null) {
val offset = context.offsetStorageReader.offset(sourcePartition(partition))
if (offset != null) {
state = new Gson().fromJson(offset.get(SOURCE_OFFSET_FIELD).toString(), classOf[CosmosDBReaderChangeFeedState])
}
}
return state
}
private def sourcePartition(partition: String): util.Map[String, String] = {
val map = new java.util.HashMap[String,String]
map.put(SOURCE_PARTITION_FIELD, partition)
return map
}
private def sourceOffset(offset: String): util.Map[String, String] = {
val map = new java.util.HashMap[String,String]
map.put(SOURCE_OFFSET_FIELD, offset)
return map
}
}

Просмотреть файл

@ -0,0 +1,7 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
case class CosmosDBReaderChangeFeedState(partition: String,
continuationToken: String,
lsn: String) {
}

Просмотреть файл

@ -0,0 +1,83 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
import java.util
import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError
import com.microsoft.azure.cosmosdb._
import scala.collection.JavaConversions._
import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel}
import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProvider, CosmosDBProviderImpl}
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, CosmosDBConfigConstants}
import org.apache.kafka.common.config.ConfigDef
import org.apache.kafka.connect.connector.Task
import org.apache.kafka.connect.source.SourceConnector
import org.apache.kafka.connect.util.ConnectorUtils
import scala.util.{Failure, Success, Try}
import scala.collection.JavaConverters._
class CosmosDBSourceConnector extends SourceConnector with HandleRetriableError {
private var configProps: util.Map[String, String] = _
private var numWorkers: Int = 0
val cosmosDBProvider: CosmosDBProvider = CosmosDBProviderImpl
override def version(): String = getClass.getPackage.getImplementationVersion
override def start(props: util.Map[String, String]): Unit = {
logger.info("Starting CosmosDBSourceConnector")
configProps = props
}
override def taskClass(): Class[_ <: Task] = classOf[CosmosDBSourceTask]
override def taskConfigs(maxTasks: Int): util.List[util.Map[String, String]] = {
try {
val config: CosmosDBConfig = CosmosDBConfig(ConnectorConfig.sourceConfigDef, configProps)
val database: String = config.getString(CosmosDBConfigConstants.DATABASE_CONFIG)
val collection: String = config.getString(CosmosDBConfigConstants.COLLECTION_CONFIG)
val settings: CosmosDBClientSettings = CosmosDBClientSettings(
config.getString(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG),
config.getPassword(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG).value(),
database,
collection,
ConnectionPolicy.GetDefault(),
ConsistencyLevel.Session
)
logger.debug("Settings for Cosmos Db connection: ", settings)
val client = cosmosDBProvider.getClient(settings)
val collectionLink = CosmosDBProviderImpl.getCollectionLink(database, collection)
val changeFeedObservable = client.readPartitionKeyRanges(collectionLink, null)
var results = List[PartitionKeyRange]()
changeFeedObservable.toBlocking().forEach(x => results = results ++ x.getResults())
val numberOfPartitions = results.map(p => p.getId)
numWorkers = Math.min(numberOfPartitions.size(), maxTasks)
logger.info(s"Setting task configurations for $numWorkers workers.")
val groups = ConnectorUtils.groupPartitions(numberOfPartitions, maxTasks)
groups
.withFilter(g => g.nonEmpty)
.map { g =>
val taskConfigs = new java.util.HashMap[String, String](this.configProps)
taskConfigs.put(CosmosDBConfigConstants.ASSIGNED_PARTITIONS, g.mkString(","))
taskConfigs
}
}
catch {
case f: Throwable =>
logger.error(s"Couldn't initialize CosmosDb with settings: ${f.getMessage}", f)
HandleRetriableError(Failure(f))
return null
}
}
override def config(): ConfigDef = ConnectorConfig.sourceConfigDef
override def stop(): Unit = {
logger.info("Stopping CosmosDBSourceConnector")
}
def getNumberOfWorkers(): Int = numWorkers
}

Просмотреть файл

@ -0,0 +1,12 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
case class CosmosDBSourceSettings(
database: String,
collection: String,
assignedPartition: String,
batchSize: Int,
bufferSize: Int,
timeout: Int,
topicName: String,
) {
}

Просмотреть файл

@ -0,0 +1,150 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
import java.util
import com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler.HandleRetriableError
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, CosmosDBConfigConstants}
import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProvider, CosmosDBProviderImpl}
import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient
import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.errors.ConnectException
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}
import com.microsoft.azure.cosmosdb.kafka.connect.processor._
import com.microsoft.azure.cosmosdb.kafka.connect.source.CosmosDBReader
import scala.collection.JavaConversions._
import scala.collection.mutable
import scala.util.{Failure, Success, Try}
class CosmosDBSourceTask extends SourceTask with StrictLogging with HandleRetriableError{
val readers = mutable.Map.empty[String, CosmosDBReader]
private var client: AsyncDocumentClient = null
private var database: String = ""
private var collection: String = ""
private var taskConfig: Option[CosmosDBConfig] = None
private var bufferSize: Option[Int] = None
private var batchSize: Option[Int] = None
private var timeout: Option[Int] = None
private var topicName: String = ""
private var postProcessors = List.empty[PostProcessor]
val cosmosDBProvider: CosmosDBProvider = CosmosDBProviderImpl
override def start(props: util.Map[String, String]): Unit = {
logger.info("Starting CosmosDBSourceTask")
var config: util.Map[String, String] = null
if (context != null) {
config = if (context.configs().isEmpty) props else context.configs()
}
else {
config = props
}
// Get Configuration for this Task
try{
taskConfig = Some(CosmosDBConfig(ConnectorConfig.sourceConfigDef, config))
//HandleError(Success(config))
}
catch{
case f: Throwable =>
logger.error(s"Couldn't start Cosmos DB Source due to configuration error: ${f.getMessage}", f)
HandleRetriableError(Failure(f))
}
/*taskConfig = Try(CosmosDBConfig(ConnectorConfig.sourceConfigDef, config)) match {
case Failure(f) => throw new ConnectException("Couldn't start CosmosDBSource due to configuration error.", f)
case Success(s) => Some(s)
}*/
// Add configured Post-Processors if exist in configuration file
if(taskConfig.get.getString(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR)!=null){
val processorClassNames = taskConfig.get.getString(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR)
postProcessors = PostProcessor.createPostProcessorList(processorClassNames, taskConfig.get)
}
// Get CosmosDB Connection
val endpoint: String = taskConfig.get.getString(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG)
val masterKey: String = taskConfig.get.getPassword(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG).value()
database = taskConfig.get.getString(CosmosDBConfigConstants.DATABASE_CONFIG)
collection = taskConfig.get.getString(CosmosDBConfigConstants.COLLECTION_CONFIG)
// Source Collection
val clientSettings = CosmosDBClientSettings(
endpoint,
masterKey,
database,
collection,
ConnectionPolicy.GetDefault(),
ConsistencyLevel.Session
)
try{
client = cosmosDBProvider.getClient(clientSettings)
logger.info("Connection to CosmosDB established.")
}catch{
case f: Throwable =>
logger.error(s"Couldn't connect to CosmosDB.: ${f.getMessage}", f)
HandleRetriableError(Failure(f))
}
/*client = Try(CosmosDBProvider.getClient(clientSettings)) match {
case Success(conn) =>
logger.info("Connection to CosmosDB established.")
conn
case Failure(f) => throw new ConnectException(s"Couldn't connect to CosmosDB.", f)
}*/
// Get bufferSize and batchSize
bufferSize = Some(taskConfig.get.getInt(CosmosDBConfigConstants.READER_BUFFER_SIZE))
batchSize = Some(taskConfig.get.getInt(CosmosDBConfigConstants.BATCH_SIZE))
timeout = Some(taskConfig.get.getInt(CosmosDBConfigConstants.TIMEOUT))
// Get Topic
topicName = taskConfig.get.getString(CosmosDBConfigConstants.TOPIC_CONFIG)
// Get the List of Assigned Partitions
val assigned = taskConfig.get.getString(CosmosDBConfigConstants.ASSIGNED_PARTITIONS).split(",").toList
// Set up Readers
assigned.map(partition => {
val setting = new CosmosDBSourceSettings(database, collection, partition, batchSize.get, bufferSize.get, timeout.get, topicName)
readers += partition -> new CosmosDBReader(client, setting, context)
})
}
override def stop(): Unit = {
logger.info("Stopping CosmosDBSourceTask")
}
override def poll(): util.List[SourceRecord] = {
try{
if(postProcessors.isEmpty){
return readers.flatten(reader => reader._2.processChanges()).toList
}else{
return readers.flatten(reader => reader._2.processChanges()).toList.map(sr => applyPostProcessing(sr))
}
}catch{
case f: Exception =>
logger.debug(s"Couldn't create a list of source records ${f.getMessage}", f)
HandleRetriableError(Failure(f))
return null
}
return null
}
override def version(): String = getClass.getPackage.getImplementationVersion
def getReaders(): mutable.Map[String, CosmosDBReader] = readers
private def applyPostProcessing(sourceRecord: SourceRecord): SourceRecord =
postProcessors.foldLeft(sourceRecord)((r, p) => {
//println(p.getClass.toString)
p.runPostProcess(r)
})
}

Просмотреть файл

@ -0,0 +1,27 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
import com.microsoft.azure.cosmosdb.rx._;
import com.microsoft.azure.cosmosdb._;
object DocumentClientBuilder {
def createConnectionPolicy(): ConnectionPolicy = {
val policy = new ConnectionPolicy()
policy.setConnectionMode(ConnectionMode.Direct)
return policy
}
def buildAsyncDocumentClient(cosmosServiceEndpoint: String, cosmosKey: String): AsyncDocumentClient = {
new AsyncDocumentClient.Builder()
.withServiceEndpoint(cosmosServiceEndpoint)
.withMasterKeyOrResourceToken(cosmosKey)
.withConnectionPolicy(createConnectionPolicy())
.withConsistencyLevel(ConsistencyLevel.Eventual)
.build()
}
def getCollectionLink(databaseName: String, collectionName: String) = "/dbs/%s/colls/%s".format(databaseName, collectionName)
def getDatabaseLink(databaseName: String) = "/dbs/%s".format(databaseName)
}

Просмотреть файл

@ -0,0 +1,5 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
class DocumentCollectionInfo(val uri: String, val masterKey: String, val databaseName: String, val collectionName: String) {
}

Просмотреть файл

@ -0,0 +1,65 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
import java.util.Properties
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants
import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster
import org.apache.kafka.connect.runtime.distributed.DistributedConfig
import org.apache.kafka.connect.runtime.{ConnectorConfig, WorkerConfig}
object Main {
var COSMOSDB_TOPIC: String = "test_topic_issue49"
def main(args: Array[String]): Unit = {
val workerProperties: Properties = getWorkerProperties(KafkaCluster.BrokersList.toString)
val connectorProperties: Properties = getConnectorProperties()
KafkaCluster.startEmbeddedConnect(workerProperties, List(connectorProperties))
if (KafkaCluster.kafkaConnectEnabled) {
println("Kafka Connector Enabled")
}
}
def getWorkerProperties(bootstrapServers: String): Properties = {
val workerProperties: Properties = new Properties()
workerProperties.put(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)
workerProperties.put(DistributedConfig.GROUP_ID_CONFIG, "cosmosdb")
workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-config")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-offset")
workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-status")
workerProperties.put(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
workerProperties.put(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
workerProperties.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "30000")
workerProperties.put(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG, "1")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
workerProperties.put(DistributedConfig.STATUS_STORAGE_PARTITIONS_CONFIG, "1")
workerProperties.put(DistributedConfig.STATUS_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
return workerProperties
}
def getConnectorProperties(): Properties = {
val connectorProperties: Properties = new Properties()
connectorProperties.put(ConnectorConfig.NAME_CONFIG, "CosmosDBSourceConnector")
connectorProperties.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG , "com.microsoft.azure.cosmosdb.kafka.connect.source.CosmosDBSourceConnector")
connectorProperties.put(ConnectorConfig.TASKS_MAX_CONFIG , "1")
connectorProperties.put("connect.cosmosdb.connection.endpoint" , "https://localhost:8888")
connectorProperties.put("connect.cosmosdb.master.key", "C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw==")
connectorProperties.put("connect.cosmosdb.database" , "database")
connectorProperties.put("connect.cosmosdb.collection" , "collection1")
// connectorProperties.put("connect.cosmosdb.connection.endpoint" , "https://dmcosmos.documents.azure.com:443")
// connectorProperties.put("connect.cosmosdb.master.key", "YAopQ0edHWK9v8yV7IpCU1WzvFQkPvpHWDGmjhpXC0swlmibZgHkgqVDiTRG3abFM2PfYoWKPOVFjL7OTJOPsA==")
// connectorProperties.put("connect.cosmosdb.database" , "kafka-connector")
// connectorProperties.put("connect.cosmosdb.collection" , "source")
connectorProperties.put("connect.cosmosdb.topic.name" , COSMOSDB_TOPIC)
connectorProperties.put(CosmosDBConfigConstants.BATCH_SIZE, "100")
connectorProperties.put(CosmosDBConfigConstants.TIMEOUT, "1")
connectorProperties.put(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR, "com.microsoft.azure.cosmosdb.kafka.connect.processor.source.SelectorSourcePostProcessor")
return connectorProperties
}
}

Просмотреть файл

@ -0,0 +1,56 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
import java.util.concurrent.CountDownLatch
import com.microsoft.azure.cosmosdb.rx._
import com.microsoft.azure.cosmosdb._
import scala.collection.JavaConversions._
class PartitionFeedReader(asyncClient: AsyncDocumentClient, databaseName: String, collectionName: String, partitionKeyRangeId: String, partitionFeedStateManager: PartitionLeaseStateManager, changeFeedProcessorOptions: ChangeFeedProcessorOptions) {
var partitionFeedState = partitionFeedStateManager.load(partitionKeyRangeId)
private def createChangeFeedOptionsFromState(): ChangeFeedOptions = {
val changeFeedOptions = new ChangeFeedOptions()
changeFeedOptions.setPartitionKeyRangeId(partitionKeyRangeId)
changeFeedOptions.setMaxItemCount(changeFeedProcessorOptions.queryPartitionsMaxBatchSize)
partitionFeedState.continuationToken match {
case null => changeFeedOptions.setStartFromBeginning(true)
case "" => changeFeedOptions.setStartFromBeginning(true)
case t => changeFeedOptions.setRequestContinuation(t)
}
return changeFeedOptions
}
def readChangeFeed(documentProcessor: List[String] => Unit, completionLatch: CountDownLatch) {
val collectionLink = "/dbs/%s/colls/%s".format(databaseName, collectionName)
val changeFeedOptions = createChangeFeedOptionsFromState()
val changeFeedObservable = asyncClient.queryDocumentChangeFeed(collectionLink, changeFeedOptions)
changeFeedObservable
// Process documents
.doOnNext(feedResponse => {
val documents = feedResponse.getResults().map(d => d.toJson()) // ready to send to Kafka
documentProcessor(documents.toList) // callback passing the list of documents
})
// Logging
.doOnNext(feedResponse => {
println("Count: " + feedResponse.getResults().length)
println("ResponseContinuation: " + feedResponse.getResponseContinuation())
})
// Save state ... save offset
.flatMap(feedResponse => {
println("Saving State!")
val continuationToken = feedResponse.getResponseContinuation().replaceAll("^\"|\"$", "")
partitionFeedState = new PartitionFeedState(partitionKeyRangeId, continuationToken)
partitionFeedStateManager.save(partitionFeedState)
})
.subscribe(
v => {}, // Every response - can have multiple documents
e => completionLatch.countDown(), // when error
() => completionLatch.countDown()) // final execution
}
}

Просмотреть файл

@ -0,0 +1,5 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
class PartitionFeedState(val id: String, val continuationToken: String) {
def this(id: String) = this(id, null)
}

Просмотреть файл

@ -0,0 +1,47 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
import rx.{Observable, _}
import com.microsoft.azure.cosmosdb.rx._
import com.microsoft.azure.cosmosdb._
import com.google.gson._
class PartitionLeaseStateManager(asyncClient: AsyncDocumentClient, databaseName: String, collectionName: String) {
private val gson = new Gson()
def save(partitionFeedState: PartitionFeedState): Observable[ResourceResponse[Document]] = {
val json = gson.toJson(partitionFeedState)
val document = new Document(json)
val collectionLink = DocumentClientBuilder.getCollectionLink(databaseName, collectionName)
val createDocumentObservable = asyncClient.upsertDocument(collectionLink, document, null, false)
return createDocumentObservable
}
def load(partitionKeyRangeId: String): PartitionFeedState = {
val collectionLink = DocumentClientBuilder.getCollectionLink(databaseName, collectionName)
val querySpec = new SqlQuerySpec("SELECT * FROM " + collectionName + " where " + collectionName + ".id = @id",
new SqlParameterCollection(
new SqlParameter("@id", partitionKeyRangeId)
))
val queryOptions = new FeedOptions()
queryOptions.setEnableCrossPartitionQuery(true)
val queryFeedObservable = asyncClient.queryDocuments(collectionLink, querySpec, queryOptions)
try {
val results = queryFeedObservable.toBlocking().single().getResults()
val partitionFeedState = results.iterator().next().toJson()
return gson.fromJson(partitionFeedState, classOf[PartitionFeedState])
}
catch {
case error: Throwable => {
System.err.println("Error when getting last state from partitionKeyRangeId. Details: " + error)
return new PartitionFeedState(partitionKeyRangeId)
}
}
}
}

Просмотреть файл

@ -0,0 +1,47 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
import java.util
import java.util.{Collections, Properties}
import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster
import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
import org.apache.kafka.common.serialization.StringDeserializer
object SampleConsumer {
var COSMOSDB_TOPIC: String = "cosmosdb-source-topic"
def main(args: Array[String]): Unit = {
try {
val properties = new Properties()
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KafkaCluster.BrokersList)
properties.put(ConsumerConfig.CLIENT_ID_CONFIG, "sample_debugger_consumer-01")
properties.put(ConsumerConfig.GROUP_ID_CONFIG, "debugger_consumergroup")
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true")
properties.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000")
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
val consumer = new KafkaConsumer[String, String](properties)
consumer.subscribe(Collections.singletonList(COSMOSDB_TOPIC))
val documents = new util.ArrayList[String]
while (true) {
val records = consumer.poll(java.time.Duration.ofMillis(100))
records.forEach(r => {
val document = r.value()
documents.add(document)
})
}
}
catch {
case e: Exception => {
println(s" Exception ${e.getMessage() }")
}
}
}
}

Просмотреть файл

@ -1,20 +0,0 @@
package com.microsoft.azure;
import static org.junit.Assert.assertTrue;
import org.junit.Test;
/**
* Unit test for simple App.
*/
public class AppTest
{
/**
* Rigorous Test :-)
*/
@Test
public void shouldAnswerWithTrue()
{
assertTrue( true );
}
}

Просмотреть файл

@ -0,0 +1,32 @@
package com.microsoft.azure.cosmosdb.kafka.connect
import java.util.ArrayList
import java.util.HashMap
import java.util.concurrent.CountDownLatch
import com.microsoft.azure.cosmosdb.Document
import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient
import org.mockito.MockitoSugar.mock
object MockCosmosDBProvider extends CosmosDBProvider {
var CosmosDBCollections: HashMap[String, ArrayList[Document]] = new HashMap[String, ArrayList[Document]]
def setupCollections[T](collectionNames: List[String]): Unit ={
collectionNames.foreach(c => CosmosDBCollections.put(c, new ArrayList[Document]()))
}
def getDocumentsByCollection(collectionName: String): ArrayList[Document] = {
return CosmosDBCollections.get(collectionName)
}
override def upsertDocuments[T](docs: List[T], databaseName: String, collectionName: String, completionLatch: CountDownLatch): Unit = {
if(CosmosDBCollections.containsKey(collectionName)){
docs.foreach(d => CosmosDBCollections.get(collectionName).add(d.asInstanceOf[Document]))
}
}
override def getClient(settings: CosmosDBClientSettings): AsyncDocumentClient = {
return mock[AsyncDocumentClient]
}
}

Просмотреть файл

@ -0,0 +1,52 @@
package com.microsoft.azure.cosmosdb.kafka.connect
import java.util
import java.util.UUID.randomUUID
import com.microsoft.azure.cosmosdb.kafka.connect.model.CosmosDBDocumentTest
import com.microsoft.azure.cosmosdb.kafka.connect.source.{CosmosDBReader, CosmosDBSourceSettings}
import java.util.{ArrayList, Properties, UUID}
import java.util.UUID._
import com.google.gson.Gson
import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient
import org.apache.kafka.connect.source.{SourceRecord, SourceTaskContext}
import org.apache.kafka.connect.storage.OffsetStorageReader
import org.mockito.MockitoSugar.mock
class MockCosmosDBReader (private val client: AsyncDocumentClient,
override val setting: CosmosDBSourceSettings,
private val context: SourceTaskContext) extends CosmosDBReader(client, setting,context) {
private val SOURCE_PARTITION_FIELD = "partition"
private val SOURCE_OFFSET_FIELD = "changeFeedState"
override def processChanges(): util.List[SourceRecord] = {
//Return a mock doc list
/* val records = new util.ArrayList[SourceRecord]
val jsonFile = """{"id": "9","_rid": "tqZSAOCV8ekBAAAAAAAAAA==","_self": "dbs/tqZSAA==/colls/tqZSAOCV8ek=/docs/tqZSAOCV8ekBAAAAAAAAAA==/","_etag": "\"00000000-0000-0000-2bcf-cab592a001d5\"","_attachments": "attachments/","_ts": 1561519953}"""
records.add(new SourceRecord(
sourcePartition(setting.assignedPartition),
sourceOffset(new Gson().toJson(1)),
setting.topicName,
null,
jsonFile
))*/
return mock[util.ArrayList[SourceRecord]]
}
private def sourcePartition(partition: String): util.Map[String, String] = {
val map = new java.util.HashMap[String,String]
map.put(SOURCE_PARTITION_FIELD, partition)
return map
}
private def sourceOffset(offset: String): util.Map[String, String] = {
val map = new java.util.HashMap[String,String]
map.put(SOURCE_OFFSET_FIELD, offset)
return map
}
}

Просмотреть файл

@ -0,0 +1,37 @@
package com.microsoft.azure.cosmosdb.kafka.connect.common.ErrorHandler
import org.apache.kafka.connect.errors.{ConnectException, RetriableException}
import scala.util.{Failure, Try}
import org.scalatest.WordSpec
class HandleRetriableErrorTest extends WordSpec with HandleRetriableError {
initializeErrorHandler(10)
"should decrement number of retries" in {
intercept[RetriableException] {
try {
1 / 0
} catch {
case t: Throwable =>
HandleRetriableError(Failure(t))
}
}
}
initializeErrorHandler(0)
"should throw ConnectException when retries = 0" in {
intercept[ConnectException] {
try {
1 / 0
} catch {
case t: Throwable =>
HandleRetriableError(Failure(t))
}
}
}
}

Просмотреть файл

@ -0,0 +1,62 @@
package com.microsoft.azure.cosmosdb.kafka.connect.config
import org.apache.kafka.common.config.ConfigException
import org.scalatest.{Matchers, WordSpec}
import collection.JavaConverters._
class CosmosDBConfigTest extends WordSpec with Matchers {
"CosmosDBConfig" should {
"throw an exception if endpoint not present" in {
val map = Map(
"foo" -> "f",
).asJava
val caught = intercept[ConfigException] {
CosmosDBConfig(ConnectorConfig.baseConfigDef, map)
}
caught.getMessage should startWith(s"""Missing required configuration "${CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG}" """)
}
"throw an exception if master key not present" in {
val map = Map(
CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> "f"
).asJava
val caught = intercept[ConfigException] {
CosmosDBConfig(ConnectorConfig.baseConfigDef, map)
}
caught.getMessage should startWith(s"""Missing required configuration "${CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG}" """)
}
"throw an exception if database not present" in {
val map = Map(
CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> "f",
CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> "f",
CosmosDBConfigConstants.COLLECTION_CONFIG -> "f",
).asJava
val caught = intercept[ConfigException] {
CosmosDBConfig(ConnectorConfig.baseConfigDef, map)
}
caught.getMessage should startWith(s"""Missing required configuration "${CosmosDBConfigConstants.DATABASE_CONFIG}" """)
}
"throw an exception if collection not present" in {
val map = Map(
CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> "f",
CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> "f",
CosmosDBConfigConstants.DATABASE_CONFIG -> "f",
).asJava
val caught = intercept[ConfigException] {
CosmosDBConfig(ConnectorConfig.baseConfigDef, map)
}
caught.getMessage should startWith(s"""Missing required configuration "${CosmosDBConfigConstants.COLLECTION_CONFIG}" """)
}
}
}

Просмотреть файл

@ -0,0 +1,113 @@
package com.microsoft.azure.cosmosdb.kafka.connect.config
import java.util.Properties
import com.google.common.base.Strings
import com.typesafe.config.ConfigFactory
import org.apache.commons.lang3.StringUtils
import org.apache.kafka.clients.producer.ProducerConfig
import org.apache.kafka.connect.runtime.WorkerConfig
import org.apache.kafka.connect.runtime.distributed.DistributedConfig
object TestConfigurations {
lazy private val config = ConfigFactory.load()
lazy private val CosmosDBConfig = config.getConfig("CosmosDB")
// Replace ENDPOINT and MASTER_KEY with values from your Azure Cosmos DB account.
// The default values are credentials of the local emulator, which are not used in any production environment.
var ENDPOINT : String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("endpoint")), "https://localhost:8081/")
var MASTER_KEY: String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("masterKey")), "C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw==")
var DATABASE : String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("database")), "database")
var SOURCE_COLLECTION : String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("collection")), "collection1")
var SINK_COLLECTION : String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("collection")), "collection2")
var TOPIC : String = StringUtils.defaultString(Strings.emptyToNull(CosmosDBConfig.getString("topic")), "topic_test")
def getSourceWorkerProperties(bootstrapServers: String): Properties = {
val workerProperties: Properties = new Properties()
workerProperties.put(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)
workerProperties.put(DistributedConfig.GROUP_ID_CONFIG, "cosmosdb")
workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-config")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-offset")
workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-status")
workerProperties.put(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
workerProperties.put(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
workerProperties.put("value.converter.schemas.enable", "false")
workerProperties.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "30000")
workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-config")
workerProperties.put(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG, "1")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
workerProperties.put(DistributedConfig.STATUS_STORAGE_PARTITIONS_CONFIG, "1")
workerProperties.put(DistributedConfig.STATUS_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
return workerProperties
}
def getSinkWorkerProperties(bootstrapServers: String): Properties = {
val workerProperties: Properties = new Properties()
workerProperties.put(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)
workerProperties.put(DistributedConfig.GROUP_ID_CONFIG, "cosmosdb-01")
workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-sink-config")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-sink-offset")
workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-sink-status")
workerProperties.put(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
workerProperties.put(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
workerProperties.put("value.converter.schemas.enable", "false")
workerProperties.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "30000")
workerProperties.put(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG, "1")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
workerProperties.put(DistributedConfig.STATUS_STORAGE_PARTITIONS_CONFIG, "1")
workerProperties.put(DistributedConfig.STATUS_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
return workerProperties
}
def getSourceConnectorProperties(): Properties = {
val connectorProperties: Properties = new Properties()
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG, "CosmosDBSourceConnector")
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG , "com.microsoft.azure.cosmosdb.kafka.connect.source.CosmosDBSourceConnector")
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG , "1")
connectorProperties.put(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG, ENDPOINT)
connectorProperties.put(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG, MASTER_KEY)
connectorProperties.put(CosmosDBConfigConstants.DATABASE_CONFIG, DATABASE)
connectorProperties.put(CosmosDBConfigConstants.COLLECTION_CONFIG, SOURCE_COLLECTION)
connectorProperties.put(CosmosDBConfigConstants.TOPIC_CONFIG, TOPIC)
connectorProperties.put(CosmosDBConfigConstants.BATCH_SIZE, "10")
connectorProperties.put(CosmosDBConfigConstants.READER_BUFFER_SIZE, "1000")
connectorProperties.put(CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_CONFIG, "3")
connectorProperties.put(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR, "com.microsoft.azure.cosmosdb.kafka.connect.processor.source.SelectorSourcePostProcessor")
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.ERRORS_RETRY_TIMEOUT_CONFIG, "3")
return connectorProperties
}
def getSinkConnectorProperties(): Properties = {
val connectorProperties: Properties = new Properties()
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG, "CosmosDBSinkConnector")
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG , "com.microsoft.azure.cosmosdb.kafka.connect.sink.CosmosDBSinkConnector")
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG , "1")
connectorProperties.put(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG, ENDPOINT)
connectorProperties.put(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG, MASTER_KEY)
connectorProperties.put(CosmosDBConfigConstants.DATABASE_CONFIG, DATABASE)
connectorProperties.put(CosmosDBConfigConstants.COLLECTION_CONFIG, SINK_COLLECTION)
connectorProperties.put(CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG, s"$SINK_COLLECTION#$TOPIC")
connectorProperties.put("topics", TOPIC) // constant required by sink connector
connectorProperties.put(CosmosDBConfigConstants.TOPIC_CONFIG, TOPIC )
connectorProperties.put(org.apache.kafka.connect.runtime.ConnectorConfig.ERRORS_RETRY_TIMEOUT_CONFIG, "3")
// connectorProperties.put(CosmosDBConfigConstants.SINK_POST_PROCESSOR, "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor")
return connectorProperties
}
def getProducerProperties(bootstrapServers: String): Properties = {
val producerProperties: Properties = new Properties()
producerProperties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)
producerProperties.put(ProducerConfig.ACKS_CONFIG, "all")
producerProperties.put(ProducerConfig.RETRIES_CONFIG, "3")
producerProperties.put(ProducerConfig.BATCH_SIZE_CONFIG, "10")
producerProperties.put(ProducerConfig.LINGER_MS_CONFIG, "1")
producerProperties.put(ProducerConfig.BUFFER_MEMORY_CONFIG, "33554432")
producerProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonSerializer")
producerProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonSerializer")
return producerProperties
}
}

Просмотреть файл

@ -0,0 +1,21 @@
package com.microsoft.azure.cosmosdb.kafka.connect.model
class Address(var city: String, var state: String) {
def setCity (city:String) {
this.city = city
}
def setAge (state:String) {
this.state = state
}
def getCity () : String = {
city
}
def getState () : String = {
state
}
}

Просмотреть файл

@ -0,0 +1,29 @@
package com.microsoft.azure.cosmosdb.kafka.connect.model
import java.util.UUID
class CosmosDBDocumentTest(var id: String, var message: String, var testID: UUID) {
def getId(): String = {
return id
}
def getMessage(): String = {
return message
}
def getTestID(): UUID = {
return testID
}
def setId(id: String) = {
this.id = id
}
def setMessage(message: String) = {
this.message = message
}
def setTestID(testID: UUID) = {
this.testID = testID
}
}

Просмотреть файл

@ -0,0 +1,14 @@
package com.microsoft.azure.cosmosdb.kafka.connect.model
import java.util.UUID
case class KafkaPayloadTest(
id: String,
message: String,
testID: UUID,
_rid: String,
_self: String,
_etag: String,
_attachments: String,
_ts: Long
)

Просмотреть файл

@ -0,0 +1,76 @@
package com.microsoft.azure.cosmosdb.kafka.connect.processor
import com.google.gson._
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, TestConfigurations}
import com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.DocumentIdSinkPostProcessor
import org.scalatest.{FlatSpec, GivenWhenThen}
import scala.collection.JavaConverters._
class DocumentIdSinkPostProcessorTest extends FlatSpec with GivenWhenThen {
val sourceRecord: String =
"""
|{
| "firstName": "John",
| "lastName": "Smith"
|}
""".stripMargin
"'id' field" should "be created or replaced with value taken from specified field" in {
val expectedRecord =
"""
|{
| "firstName": "John",
| "lastName": "Smith",
| "id": "John"
|}
""".stripMargin
Given("an existing field")
val connectorProperties = TestConfigurations.getSourceConnectorProperties()
connectorProperties.put("connect.cosmosdb.sink.post-processor.documentId.field", "firstName")
val config = new CosmosDBConfig(ConnectorConfig.baseConfigDef, connectorProperties.asScala.asJava)
When("JSON document is processed")
val jsonParser = new JsonParser()
val json: JsonObject = jsonParser.parse(sourceRecord).getAsJsonObject
val postProcessor = new DocumentIdSinkPostProcessor()
postProcessor.configure(config)
Then("'id' is replaced with specified existing field value")
val processed = postProcessor.runJsonPostProcess(json)
val expected = jsonParser.parse(expectedRecord).getAsJsonObject
assert(processed.equals(expected))
}
"null 'id' field" should "be generated if requested field doesn't exists" in {
val expectedRecord =
"""
|{
| "firstName": "John",
| "lastName": "Smith",
| "id": null
|}
""".stripMargin
Given("a non-existing field")
val connectorProperties = TestConfigurations.getSourceConnectorProperties()
connectorProperties.put("connect.cosmosdb.sink.post-processor.documentId.field", "notExists")
val config = new CosmosDBConfig(ConnectorConfig.baseConfigDef, connectorProperties.asScala.asJava)
When("JSON document is processed")
val jsonParser = new JsonParser()
val json: JsonObject = jsonParser.parse(sourceRecord).getAsJsonObject
val postProcessor = new DocumentIdSinkPostProcessor()
postProcessor.configure(config)
Then("'id' is set to null")
val processed = postProcessor.runJsonPostProcess(json)
val expected = jsonParser.parse(expectedRecord).getAsJsonObject
assert(processed.equals(expected))
}
}

Просмотреть файл

@ -0,0 +1,151 @@
package com.microsoft.azure.cosmosdb.kafka.connect.processor
import scala.collection.JavaConverters._
import com.google.gson._
import com.microsoft.azure.cosmosdb.kafka.connect.config.{ConnectorConfig, CosmosDBConfig, TestConfigurations}
import com.microsoft.azure.cosmosdb.kafka.connect.processor.source.SelectorSourcePostProcessor
import org.scalatest.{FlatSpec, GivenWhenThen}
class SelectorPostProcessorTest extends FlatSpec with GivenWhenThen {
val sourceRecord: String =
"""
|{
| "firstName": "John",
| "lastName": "Smith",
| "isAlive": true,
| "age": 27,
| "address": {
| "streetAddress": "21 2nd Street",
| "city": "New York",
| "state": "NY",
| "postalCode": "10021-3100"
| },
| "phoneNumbers": [
| {
| "type": "home",
| "number": "212 555-1234"
| },
| {
| "type": "office",
| "number": "646 555-4567"
| },
| {
| "type": "mobile",
| "number": "123 456-7890"
| }
| ],
| "children": [],
| "spouse": null,
| "id": "f355b7ff-e522-6906-c169-6d53e7ab046b",
| "_rid": "tA4eAIlHRkMFAAAAAAAAAA==",
| "_self": "dbs/tA4eAA==/colls/tA4eAIlHRkM=/docs/tA4eAIlHRkMFAAAAAAAAAA==/",
| "_etag": "\"39022ddc-0000-0700-0000-5d094f610000\"",
| "_attachments": "attachments/",
| "_ts": 1560891233
|}
""".stripMargin
"Post Processor" should "remove configured fields" in {
val expectedRecord =
"""
|{
| "firstName": "John",
| "lastName": "Smith",
| "isAlive": true,
| "age": 27,
| "address": {
| "streetAddress": "21 2nd Street",
| "city": "New York",
| "state": "NY",
| "postalCode": "10021-3100"
| },
| "phoneNumbers": [
| {
| "type": "home",
| "number": "212 555-1234"
| },
| {
| "type": "office",
| "number": "646 555-4567"
| },
| {
| "type": "mobile",
| "number": "123 456-7890"
| }
| ],
| "children": [],
| "spouse": null
|}
""".stripMargin
Given("Post Processor configuration")
val connectorProperties = TestConfigurations.getSourceConnectorProperties()
connectorProperties.put("connect.cosmosdb.source.post-processor.selector.type", "Exclude")
connectorProperties.put("connect.cosmosdb.source.post-processor.selector.fields", "id, _rid, _self, _etag, _attachments, _ts, _lsn, _metadata")
val config = new CosmosDBConfig(ConnectorConfig.baseConfigDef, connectorProperties.asScala.asJava)
When("JSON document is processed")
val jsonParser = new JsonParser()
val json: JsonObject = jsonParser.parse(sourceRecord).getAsJsonObject
val postProcessor = new SelectorSourcePostProcessor()
postProcessor.configure(config)
Then("specified JSON properties are removed")
val processed = postProcessor.runJsonPostProcess(json)
val expected = jsonParser.parse(expectedRecord).getAsJsonObject
assert(processed.equals(expected))
}
"Post Processor" should "keep only configured fields" in {
val expectedRecord =
"""
|{
| "firstName": "John",
| "lastName": "Smith",
| "address": {
| "streetAddress": "21 2nd Street",
| "city": "New York",
| "state": "NY",
| "postalCode": "10021-3100"
| },
| "phoneNumbers": [
| {
| "type": "home",
| "number": "212 555-1234"
| },
| {
| "type": "office",
| "number": "646 555-4567"
| },
| {
| "type": "mobile",
| "number": "123 456-7890"
| }
| ],
| "children": [],
| "spouse": null
|}
""".stripMargin
Given("Post Processor configuration")
val connectorProperties = TestConfigurations.getSourceConnectorProperties()
connectorProperties.put("connect.cosmosdb.source.post-processor.selector.type", "Include")
connectorProperties.put("connect.cosmosdb.source.post-processor.selector.fields", "firstName, lastName, address, phoneNumbers, children, spouse")
val config = new CosmosDBConfig(ConnectorConfig.baseConfigDef, connectorProperties.asScala.asJava)
When("JSON document is processed")
val jsonParser = new JsonParser()
val json: JsonObject = jsonParser.parse(sourceRecord).getAsJsonObject
val postProcessor = new SelectorSourcePostProcessor()
postProcessor.configure(config)
Then("only specified JSON properties are kept")
val processed = postProcessor.runJsonPostProcess(json)
val expected = jsonParser.parse(expectedRecord).getAsJsonObject
assert(processed.equals(expected))
}
}

Просмотреть файл

@ -0,0 +1,72 @@
package com.microsoft.azure.cosmosdb.kafka.connect.processor
import java.util.Properties
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations}
import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster
import org.apache.kafka.connect.runtime.WorkerConfig
import org.apache.kafka.connect.runtime.distributed.DistributedConfig
// TODO: This should be removed from here and refactored into an Integration Test
object SinkPostProcessorTest {
var COSMOSDB_TOPIC: String = "cosmosdb-source-topic"
def main(args: Array[String]): Unit = {
val workerProperties: Properties = getWorkerProperties(KafkaCluster.BrokersList.toString)
val connectorProperties: Properties = getConnectorProperties()
// Add Sink Post Processors
val postProcessors =
"com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.DocumentIdSinkPostProcessor" ::
"com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor" ::
"com.microsoft.azure.cosmosdb.kafka.connect.processor.SampleConsoleWriterPostProcessor" ::
Nil
connectorProperties.put(CosmosDBConfigConstants.SINK_POST_PROCESSOR, postProcessors.mkString(","))
// Configure Sink Post Processor
connectorProperties.put("connect.cosmosdb.sink.post-processor.selector.type", "Include")
connectorProperties.put("connect.cosmosdb.sink.post-processor.selector.fields", "id, firstName, lastName, age, address, children, spouse")
connectorProperties.put("connect.cosmosdb.sink.post-processor.documentId.field", "lastName")
KafkaCluster.startEmbeddedConnect(workerProperties, List(connectorProperties))
if (KafkaCluster.kafkaConnectEnabled) {
println("Kafka Connector Enabled")
}
}
def getWorkerProperties(bootstrapServers: String): Properties = {
val workerProperties: Properties = new Properties()
workerProperties.put(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)
workerProperties.put(DistributedConfig.GROUP_ID_CONFIG, "cosmosdb-01")
workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-sink-config")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-sink-offset")
workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-sink-status")
workerProperties.put(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
workerProperties.put(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
workerProperties.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "30000")
workerProperties.put(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG, "1")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
workerProperties.put(DistributedConfig.STATUS_STORAGE_PARTITIONS_CONFIG, "1")
workerProperties.put(DistributedConfig.STATUS_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
workerProperties
}
def getConnectorProperties(): Properties = {
val connectorProperties = TestConfigurations.getSinkConnectorProperties()
connectorProperties.put(CosmosDBConfigConstants.COLLECTION_CONFIG, "destination")
connectorProperties.put(CosmosDBConfigConstants.TOPIC_CONFIG, COSMOSDB_TOPIC)
connectorProperties.put("topics", COSMOSDB_TOPIC)
connectorProperties.put(CosmosDBConfigConstants.ERRORS_RETRY_TIMEOUT_CONFIG, "3")
connectorProperties
}
}

Просмотреть файл

@ -0,0 +1,75 @@
package com.microsoft.azure.cosmosdb.kafka.connect.processor
import java.util.Properties
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations}
import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster
import org.apache.kafka.connect.runtime.WorkerConfig
import org.apache.kafka.connect.runtime.distributed.DistributedConfig
import org.scalatest.{FlatSpec, GivenWhenThen}
// TODO: This should be removed from here and refactored into an Integration Test
object SourcePostProcessorTest {
var COSMOSDB_TOPIC: String = "cosmosdb-source-topic"
def main(args: Array[String]): Unit = {
val workerProperties: Properties = getWorkerProperties(KafkaCluster.BrokersList.toString)
val connectorProperties: Properties = getConnectorProperties()
// Add Source Post Processors
val postProcessors =
"com.microsoft.azure.cosmosdb.kafka.connect.processor.source.SelectorSourcePostProcessor" ::
"com.microsoft.azure.cosmosdb.kafka.connect.processor.SampleConsoleWriterPostProcessor" ::
Nil
connectorProperties.put(CosmosDBConfigConstants.SOURCE_POST_PROCESSOR, postProcessors.mkString(","))
// Configure Source Post Processor
connectorProperties.put("connect.cosmosdb.source.post-processor.selector.type", "Exclude")
connectorProperties.put("connect.cosmosdb.source.post-processor.selector.fields", "id, _rid, _self, _etag, _attachments, _ts, _lsn, _metadata")
//connectorProperties.put("connect.cosmosdb.source.post-processor.selector.type", "Include")
//connectorProperties.put("connect.cosmosdb.source.post-processor.selector.fields", "id, firstName, lastName, age")
// Run Embedded Kafka Cluster
KafkaCluster.startEmbeddedConnect(workerProperties, List(connectorProperties))
if (KafkaCluster.kafkaConnectEnabled) {
println("Kafka Connector Enabled")
}
}
def getWorkerProperties(bootstrapServers: String): Properties = {
val workerProperties: Properties = new Properties()
workerProperties.put(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)
workerProperties.put(DistributedConfig.GROUP_ID_CONFIG, "cosmosdb")
workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-config")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-offset")
workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-status")
workerProperties.put(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
workerProperties.put(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonConverter")
workerProperties.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "30000")
workerProperties.put(DistributedConfig.CONFIG_TOPIC_CONFIG, "cosmosdb-config")
workerProperties.put(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "cosmosdb-offset")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG, "1")
workerProperties.put(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
workerProperties.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "cosmosdb-status")
workerProperties.put(DistributedConfig.STATUS_STORAGE_PARTITIONS_CONFIG, "1")
workerProperties.put(DistributedConfig.STATUS_STORAGE_REPLICATION_FACTOR_CONFIG, "1")
workerProperties
}
def getConnectorProperties(): Properties = {
val connectorProperties = TestConfigurations.getSourceConnectorProperties()
connectorProperties.put(CosmosDBConfigConstants.COLLECTION_CONFIG, "source")
connectorProperties.put(CosmosDBConfigConstants.TOPIC_CONFIG, COSMOSDB_TOPIC)
connectorProperties.put("topics", COSMOSDB_TOPIC)
connectorProperties
}
}

Просмотреть файл

@ -0,0 +1,43 @@
package com.microsoft.azure.cosmosdb.kafka.connect.provider
import java.util.concurrent.CountDownLatch
import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations
import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProviderImpl}
import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel}
import com.typesafe.scalalogging.LazyLogging
import org.apache.kafka.connect.errors.ConnectException
import org.scalatest.{FlatSpec, GivenWhenThen}
import scala.util.{Failure, Success, Try}
class CosmosDBProviderImplTest extends FlatSpec with GivenWhenThen with LazyLogging {
"CosmosDBProviderTest" should "read collection with a given name" in {
Given("A collection name")
val clientSettings = CosmosDBClientSettings(
TestConfigurations.ENDPOINT,
TestConfigurations.MASTER_KEY,
TestConfigurations.DATABASE,
TestConfigurations.SOURCE_COLLECTION,
ConnectionPolicy.GetDefault(),
ConsistencyLevel.Session
)
val client = Try(CosmosDBProviderImpl.getClient(clientSettings)) match {
case Success(conn) =>
logger.info("Connection to CosmosDB established.")
conn
case Failure(f) => throw new ConnectException(s"Couldn't connect to CosmosDB.", f)
}
When("Call CosmosDB readcollection")
logger.info("readCollection in CosmosDB .")
val docCollQry = CosmosDBProviderImpl.queryCollection(TestConfigurations.DATABASE, TestConfigurations.SOURCE_COLLECTION, new CountDownLatch(1)).toBlocking.single
logger.info(docCollQry.getResults.size.toString)
Then(s"Verify collection of messages is equal to inserted")
assert(docCollQry.getResults.size != 0)
}
}

Просмотреть файл

@ -0,0 +1,34 @@
package com.microsoft.azure.cosmosdb.kafka.connect.sink
import com.google.common.collect.Maps
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations}
import org.apache.kafka.connect.runtime.ConnectorConfig
import org.scalatest.{FlatSpec, GivenWhenThen}
class CosmosDBSinkConnectorTest extends FlatSpec with GivenWhenThen {
"CosmosDBSinkConnector" should "Validate all input properties and generate right set of task config properties" in {
Given("Valid set of input properties")
val props = TestConfigurations.getSinkConnectorProperties()
val connector = new CosmosDBSinkConnector
When("Start and TaskConfig are called in right order")
connector.start(Maps.fromProperties(props))
val numTasks = props.getProperty(ConnectorConfig.TASKS_MAX_CONFIG).toInt
val taskConfigs = connector.taskConfigs(numTasks)
Then("The TaskConfigs have all the expected properties")
assert(taskConfigs.size() == numTasks)
for (i <- 0 until numTasks) {
val taskConfig: java.util.Map[String, String] = taskConfigs.get(i)
assert(taskConfig.containsKey(ConnectorConfig.NAME_CONFIG))
assert(taskConfig.containsKey(ConnectorConfig.CONNECTOR_CLASS_CONFIG))
assert(taskConfig.containsKey(ConnectorConfig.TASKS_MAX_CONFIG))
assert(taskConfig.containsKey(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG))
assert(taskConfig.containsKey(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG))
assert(taskConfig.containsKey(CosmosDBConfigConstants.DATABASE_CONFIG))
assert(taskConfig.containsKey(CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG))
assert(taskConfig.containsKey(CosmosDBConfigConstants.COLLECTION_CONFIG))
assert(taskConfig.containsKey(CosmosDBConfigConstants.TOPIC_CONFIG))
}
}
}

Просмотреть файл

@ -0,0 +1,19 @@
package com.microsoft.azure.cosmosdb.kafka.connect.sink
import java.util.Properties
import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants
import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster
object SinkConnectWriterTest {
def main(args: Array[String]): Unit = {
val workerProperties: Properties = TestConfigurations.getSinkWorkerProperties(KafkaCluster.BrokersList.toString)
val connectorProperties: Properties = TestConfigurations.getSinkConnectorProperties()
KafkaCluster.startEmbeddedConnect(workerProperties, List(connectorProperties))
if (KafkaCluster.kafkaConnectEnabled) {
println("Kafka Connector Enabled")
}
}
}

Просмотреть файл

@ -0,0 +1,156 @@
package com.microsoft.azure.cosmosdb.kafka.connect.sink
import java.util.ArrayList
import com.microsoft.azure.cosmosdb.kafka.connect.MockCosmosDBProvider
import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations.{DATABASE, ENDPOINT, MASTER_KEY}
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.sink.SinkRecord
import org.scalatest.{FlatSpec, GivenWhenThen}
import scala.collection.JavaConverters._
import scala.collection.mutable
class CosmosDBSinkTaskTest extends FlatSpec with GivenWhenThen {
val PARTITION = 0
private val TOPIC = "topic"
private val TOPIC_2 = "topic2"
private val TOPIC_3 = "topic3"
private val TOPIC_4 = "topic4"
private val TOPIC_5 = "topic5"
private val COLLECTION = "collection"
private val COLLECTION_2 = "collection2"
private val COLLECTION_3 = "collection3"
"CosmosDBSinkConnector start" should "Populate a simple collection topic map according to the configuration in settings" in {
Given("A Cosmos DB Provider and settings with a collection topic mapping")
val mockCosmosProvider = MockCosmosDBProvider
mockCosmosProvider.setupCollections(List(COLLECTION))
val sinkTask = new CosmosDBSinkTask { override val cosmosDBProvider = mockCosmosProvider }
val map = Map(
org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG -> "CosmosDBSinkConnector",
org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG -> "com.microsoft.azure.cosmosdb.kafka.connect.sink.CosmosDBSinkConnector",
org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG -> "1",
CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> ENDPOINT,
CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> MASTER_KEY,
CosmosDBConfigConstants.DATABASE_CONFIG -> DATABASE,
CosmosDBConfigConstants.COLLECTION_CONFIG -> s"$COLLECTION",
CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG -> s"$COLLECTION#$TOPIC",
"topics" -> s"$TOPIC",
CosmosDBConfigConstants.TOPIC_CONFIG -> s"$TOPIC",
CosmosDBConfigConstants.SINK_POST_PROCESSOR -> "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor"
).asJava
When("The sink task is started")
sinkTask.start(map)
Then("The collection topic map should contain the proper mapping")
val expectedMap = mutable.HashMap[String, String](TOPIC -> COLLECTION)
assert(sinkTask.collectionTopicMap == expectedMap)
}
"CosmosDBSinkConnector start" should "Populate a complex collection topic map according to the configuration in settings" in {
Given("A Cosmos DB Provider and settings with a collection topic mapping")
val mockCosmosProvider = MockCosmosDBProvider
mockCosmosProvider.setupCollections(List(COLLECTION))
val sinkTask = new CosmosDBSinkTask { override val cosmosDBProvider = mockCosmosProvider }
val map = Map(
org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG -> "CosmosDBSinkConnector",
org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG -> "com.microsoft.azure.cosmosdb.kafka.connect.sink.CosmosDBSinkConnector",
org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG -> "1",
CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> ENDPOINT,
CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> MASTER_KEY,
CosmosDBConfigConstants.DATABASE_CONFIG -> DATABASE,
CosmosDBConfigConstants.COLLECTION_CONFIG -> s"$COLLECTION,$COLLECTION_2,$COLLECTION_3",
CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG -> s"$COLLECTION#$TOPIC,$COLLECTION#$TOPIC_2,$COLLECTION_2#$TOPIC_3,$COLLECTION_3#$TOPIC_4,$COLLECTION_3#$TOPIC_5",
"topics" -> s"$TOPIC,$TOPIC_2,$TOPIC_3,$TOPIC_4,$TOPIC_5",
CosmosDBConfigConstants.TOPIC_CONFIG -> s"$TOPIC,$TOPIC_2,$TOPIC_3,$TOPIC_4,$TOPIC_5",
CosmosDBConfigConstants.SINK_POST_PROCESSOR -> "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor"
).asJava
When("The sink task is started")
sinkTask.start(map)
Then("The collection topic map should contain the proper mapping")
val expectedMap = mutable.HashMap[String, String](TOPIC -> COLLECTION,
TOPIC_2 -> COLLECTION,
TOPIC_3 -> COLLECTION_2,
TOPIC_4 -> COLLECTION_3,
TOPIC_5 -> COLLECTION_3)
assert(sinkTask.collectionTopicMap == expectedMap)
}
"CosmosDBSinkConnector start" should "Populate the collection topic map with collection name as topic name if no config is given" in {
Given("A Cosmos DB Provider and settings without a collection topic mapping")
val mockCosmosProvider = MockCosmosDBProvider
mockCosmosProvider.setupCollections(List(COLLECTION))
val sinkTask = new CosmosDBSinkTask { override val cosmosDBProvider = mockCosmosProvider }
val map = Map(
org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG -> "CosmosDBSinkConnector",
org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG -> "com.microsoft.azure.cosmosdb.kafka.connect.sink.CosmosDBSinkConnector",
org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG -> "1",
CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> ENDPOINT,
CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> MASTER_KEY,
CosmosDBConfigConstants.DATABASE_CONFIG -> DATABASE,
CosmosDBConfigConstants.COLLECTION_CONFIG -> "",
CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG -> "",
"topics" -> s"$TOPIC,$TOPIC_2",
CosmosDBConfigConstants.TOPIC_CONFIG -> s"$TOPIC,$TOPIC_2",
CosmosDBConfigConstants.SINK_POST_PROCESSOR -> "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor"
).asJava
When("The sink task is started")
sinkTask.start(map)
Then("The collection topic map should contain the proper mapping")
val expectedMap = mutable.HashMap[String, String](TOPIC -> TOPIC,
TOPIC_2 -> TOPIC_2)
assert(sinkTask.collectionTopicMap == expectedMap)
}
"CosmosDBSinkConnector put" should "Write records from topics in the proper collections according to the map" in {
Given("A Cosmos DB Provider and a configured Cosmos DB Collection")
val mockCosmosProvider = MockCosmosDBProvider
mockCosmosProvider.setupCollections(List(COLLECTION))
val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"message1 payload\"}", 0)
val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"message2 payload\"}", 0)
val records = new ArrayList[SinkRecord]
records.add(record1)
records.add(record2)
val sinkTask = new CosmosDBSinkTask { override val cosmosDBProvider = mockCosmosProvider }
val map = Map(
org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG -> "CosmosDBSinkConnector",
org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG -> "com.microsoft.azure.cosmosdb.kafka.connect.sink.CosmosDBSinkConnector",
org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG -> "1",
CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG -> ENDPOINT,
CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG -> MASTER_KEY,
CosmosDBConfigConstants.DATABASE_CONFIG -> DATABASE,
CosmosDBConfigConstants.COLLECTION_CONFIG -> COLLECTION,
CosmosDBConfigConstants.COLLECTION_TOPIC_MAP_CONFIG -> s"$COLLECTION#$TOPIC",
"topics" -> TOPIC,
CosmosDBConfigConstants.TOPIC_CONFIG -> TOPIC,
CosmosDBConfigConstants.SINK_POST_PROCESSOR -> "com.microsoft.azure.cosmosdb.kafka.connect.processor.sink.SelectorSinkPostProcessor"
).asJava
sinkTask.start(map)
When("Records are passed to the put method")
sinkTask.put(records)
Then("The Cosmos DB collection should contain all of the records")
val documents = mockCosmosProvider.getDocumentsByCollection(COLLECTION)
assert(documents.size == 2)
}
}

Просмотреть файл

@ -0,0 +1,216 @@
package com.microsoft.azure.cosmosdb.kafka.connect.sink
import java.util.ArrayList
import com.microsoft.azure.cosmosdb.Document
import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations.{DATABASE, ENDPOINT, MASTER_KEY}
import com.microsoft.azure.cosmosdb.kafka.connect.MockCosmosDBProvider
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.sink.SinkRecord
import org.scalatest.{FlatSpec, GivenWhenThen}
import java.util
import scala.collection.mutable.HashMap
class CosmosDBWriterTest extends FlatSpec with GivenWhenThen {
private val PARTITION = 0
private val TOPIC = "topic"
private val TOPIC_2 = "topic2"
private val TOPIC_3 = "topic3"
private val TOPIC_4 = "topic4"
private val TOPIC_5 = "topic5"
private val COLLECTION = "collection"
private val COLLECTION_2 = "collection2"
private val COLLECTION_3 = "collection3"
// NOTE: All schemas are sent as null during testing because we are not currently enforcing them.
// We simply need to validate the presence of the schema object doesn't break the writer.
"CosmosDBWriter write" should "Write records formatted as a raw json string with schema" in {
Given("A Cosmos DB Provider, a configured Cosmos DB Collection and sample Sink Records")
// Instantiate the MockCosmosDBProvider and Setup the Collections
val mockCosmosProvider = MockCosmosDBProvider
mockCosmosProvider.setupCollections(List(COLLECTION))
// Map the Topic and Collections
val collectionTopicMap: HashMap[String, String] = HashMap[String, String]((TOPIC, COLLECTION))
// Set up Writer
val setting = new CosmosDBSinkSettings(ENDPOINT, MASTER_KEY, DATABASE, collectionTopicMap)
val writer = new CosmosDBWriter(setting, mockCosmosProvider)
// Create sample SinkRecords
val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"schema\": \"null\", \"payload\": {\"message\": \"message1 payload\"}}", 0)
val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"schema\": \"null\", \"payload\": {\"message\": \"message2 payload\"}}", 0)
When("Records are passed to the write method")
writer.write(Seq(record1, record2))
Then("The Cosmos DB collection should contain all of the records")
val documents: ArrayList[Document] = mockCosmosProvider.getDocumentsByCollection(COLLECTION)
assert(documents.size == 2)
// Check the schema wasn't written with the payload
assert(documents.get(0).get("schema") == null)
assert(documents.get(1).get("schema") == null)
assert(documents.get(0).get("message") == "message1 payload")
assert(documents.get(1).get("message") == "message2 payload")
}
"CosmosDBWriter write" should "Write records formatted as a raw json string without schema" in {
Given("A Cosmos DB Provider, a configured Cosmos DB Collection and sample Sink Records")
// Instantiate the MockCosmosDBProvider and Setup the Collections
val mockCosmosProvider = MockCosmosDBProvider
mockCosmosProvider.setupCollections(List(COLLECTION))
// Map the Topic and Collections
val collectionTopicMap: HashMap[String, String] = HashMap[String, String]((TOPIC, COLLECTION))
// Set up Writer
val setting = new CosmosDBSinkSettings(ENDPOINT, MASTER_KEY, DATABASE, collectionTopicMap)
val writer = new CosmosDBWriter(setting, mockCosmosProvider)
// Create sample SinkRecords
val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"message1 payload\"}", 0)
val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"message2 payload\"}", 0)
When("Records are passed to the write method")
writer.write(Seq(record1, record2))
Then("The Cosmos DB collection should contain all of the records")
val documents = mockCosmosProvider.getDocumentsByCollection(COLLECTION)
assert(documents.size == 2)
assert(documents.get(0).get("message") == "message1 payload")
assert(documents.get(1).get("message") == "message2 payload")
}
"CosmosDBWriter write" should "Write records formatted as hash map without schema" in {
Given("A Cosmos DB Provider, a configured Cosmos DB Collection and sample Sink Records")
// Instantiate the MockCosmosDBProvider and Setup the Collections
val mockCosmosProvider = MockCosmosDBProvider
mockCosmosProvider.setupCollections(List(COLLECTION))
// Map the Topic and Collections
val collectionTopicMap: HashMap[String, String] = HashMap[String, String]((TOPIC, COLLECTION))
// Set up Writer
val setting = new CosmosDBSinkSettings(ENDPOINT, MASTER_KEY, DATABASE, collectionTopicMap)
val writer = new CosmosDBWriter(setting, mockCosmosProvider)
// Create sample SinkRecords
val payload1= new util.HashMap[String, String]()
payload1.put("message", "message1 payload")
val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, null, payload1, 0)
val payload2= new util.HashMap[String, String]()
payload2.put("message", "message2 payload")
val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, null, payload2, 0)
When("Records are passed to the write method")
writer.write(Seq(record1, record2))
Then("The Cosmos DB collection should contain all of the records")
val documents = mockCosmosProvider.getDocumentsByCollection(COLLECTION)
assert(documents.size == 2)
assert(documents.get(0).get("message") == "message1 payload")
assert(documents.get(1).get("message") == "message2 payload")
}
"CosmosDBWriter write" should "Write records formatted as hash map with schema" in {
Given("A Cosmos DB Provider, a configured Cosmos DB Collection and sample Sink Records")
// Instantiate the MockCosmosDBProvider and Setup the Collections
val mockCosmosProvider = MockCosmosDBProvider
mockCosmosProvider.setupCollections(List(COLLECTION))
// Map the Topic and Collections
val collectionTopicMap: HashMap[String, String] = HashMap[String, String]((TOPIC, COLLECTION))
// Set up Writer
val setting = new CosmosDBSinkSettings(ENDPOINT, MASTER_KEY, DATABASE, collectionTopicMap)
val writer = new CosmosDBWriter(setting, mockCosmosProvider)
// Create sample SinkRecords
val payload1 = new util.HashMap[String, String]()
payload1.put("message", "message1 payload")
val map1 = new util.HashMap[String, util.HashMap[String, String]]()
map1.put("schema", null)
map1.put("payload", payload1)
val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, null, map1, 0)
val payload2 = new util.HashMap[String, String]()
payload2.put("message", "message2 payload")
val map2 = new util.HashMap[String, util.HashMap[String, String]]()
map2.put("schema", null)
map2.put("payload", payload2)
val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, null, map2, 0)
When("Records are passed to the write method")
writer.write(Seq(record1, record2))
Then("The Cosmos DB collection should contain all of the records")
val documents = mockCosmosProvider.getDocumentsByCollection(COLLECTION)
assert(documents.size == 2)
// Check the schema wasn't written with the payload
assert(documents.get(0).get("schema") == null)
assert(documents.get(1).get("schema") == null)
assert(documents.get(0).get("message") == "message1 payload")
assert(documents.get(1).get("message") == "message2 payload")
}
"CosmosDBWriter write" should "Write records in the proper collections according to a complex map" in {
Given("A Cosmos DB Provider, a configured Cosmos DB Collection and sample Sink Records")
// Instantiate the MockCosmosDBProvider and Setup the Collections
val mockCosmosProvider = MockCosmosDBProvider
mockCosmosProvider.setupCollections(List(COLLECTION,COLLECTION_2,COLLECTION_3))
// Map the Topic and Collections
val collectionTopicMap: HashMap[String, String] = HashMap[String, String]((TOPIC, COLLECTION),
(TOPIC_2, COLLECTION),
(TOPIC_3, COLLECTION_2),
(TOPIC_4, COLLECTION_3),
(TOPIC_5, COLLECTION_3))
// Set up Writer
val setting = new CosmosDBSinkSettings(ENDPOINT, MASTER_KEY, DATABASE, collectionTopicMap)
val writer = new CosmosDBWriter(setting, mockCosmosProvider)
// Create sample SinkRecords
val record1 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic payload\"}", 0)
val record2 = new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic payload\"}", 0)
val record3 = new SinkRecord(TOPIC_2, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic2 payload\"}", 0)
val record4 = new SinkRecord(TOPIC_2, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic2 payload\"}", 0)
val record5 = new SinkRecord(TOPIC_3, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic3 payload\"}", 0)
val record6 = new SinkRecord(TOPIC_3, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic3 payload\"}", 0)
val record7 = new SinkRecord(TOPIC_4, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic4 payload\"}", 0)
val record8 = new SinkRecord(TOPIC_4, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic4 payload\"}", 0)
val record9 = new SinkRecord(TOPIC_5, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic5 payload\"}", 0)
val record10 = new SinkRecord(TOPIC_5, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic5 payload\"}", 0)
val record11 = new SinkRecord(TOPIC_5, PARTITION, Schema.STRING_SCHEMA, null, Schema.STRING_SCHEMA, "{\"message\": \"topic5 payload\"}", 0)
When("Records are passed to the write method")
writer.write(Seq(record1, record2, record3, record4, record5, record6, record7, record8, record9, record10, record11))
Then("The Cosmos DB collection should contain all of the records")
val documents = mockCosmosProvider.getDocumentsByCollection(COLLECTION)
val documents2 = mockCosmosProvider.getDocumentsByCollection(COLLECTION_2)
val documents3 = mockCosmosProvider.getDocumentsByCollection(COLLECTION_3)
assert(documents.size == 4)
assert(documents2.size == 2)
assert(documents3.size == 5)
}
}

Просмотреть файл

@ -0,0 +1,65 @@
package com.microsoft.azure.cosmosdb.kafka.connect.sink
import java.util.Properties
import java.util.UUID.randomUUID
import com.fasterxml.jackson.databind.{JsonNode, ObjectMapper}
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants
import com.microsoft.azure.cosmosdb.kafka.connect.kafka.KafkaCluster
import com.microsoft.azure.cosmosdb.kafka.connect.model.CosmosDBDocumentTest
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.ObjectMapper
import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations
import org.apache.kafka.connect.runtime.distributed.DistributedConfig
import org.apache.kafka.connect.runtime.{ConnectorConfig, WorkerConfig}
object SourceConnectReaderTest {
def main(args: Array[String]): Unit = {
val workerProperties: Properties = TestConfigurations.getSourceWorkerProperties(KafkaCluster.BrokersList.toString)
val connectorProperties: Properties = TestConfigurations.getSourceConnectorProperties()
KafkaCluster.startEmbeddedConnect(workerProperties, List(connectorProperties))
if (KafkaCluster.kafkaConnectEnabled) {
println("Kafka Connector Enabled")
}
// Write 20 messages to the kafka topic to be consumed
val producerProps: Properties = TestConfigurations.getProducerProperties(KafkaCluster.BrokersList.toString)
val producer = new KafkaProducer[Nothing, JsonNode](producerProps)
val testUUID = randomUUID()
val objectMapper: ObjectMapper = new ObjectMapper
//schema JSON test
for (i <- 1 to 4) {
val json = scala.io.Source.fromFile(getClass.getResource(s"/test$i.json").toURI.getPath).mkString
val mapper = new ObjectMapper
val jsonNode: JsonNode = mapper.readTree(json)
producer.send(new ProducerRecord[Nothing, JsonNode](TestConfigurations.TOPIC, jsonNode))
}
//schema-less JSON test
for (i <- 5 to 8) {
val json = scala.io.Source.fromFile(getClass.getResource(s"/test$i.json").toURI.getPath).mkString
val mapper = new ObjectMapper
val jsonNode: JsonNode = mapper.readTree(json)
producer.send(new ProducerRecord[Nothing, JsonNode](TestConfigurations.TOPIC, jsonNode))
}
// JSON string test no schema
for (i <- 9 until 15) {
val message = new CosmosDBDocumentTest(s"$i", s"message $i", testUUID)
val jsonNode: JsonNode = objectMapper.valueToTree(message)
println("sending message: ", jsonNode.findPath("id"))
producer.send(new ProducerRecord[Nothing, JsonNode](TestConfigurations.TOPIC, jsonNode))
}
producer.flush()
producer.close()
}
}

Просмотреть файл

@ -0,0 +1,226 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
import java.util
import java.util.UUID.randomUUID
import java.util.concurrent.{CountDownLatch, TimeUnit}
import java.util.{ArrayList, Properties, UUID}
import _root_.rx.Observable
import _root_.rx.lang.scala.JavaConversions._
import com.google.common.collect.Maps
import com.google.gson.Gson
import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel, Document, ResourceResponse}
import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProvider, CosmosDBProviderImpl, MockCosmosDBProvider, MockCosmosDBReader}
import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations.{DATABASE, ENDPOINT, MASTER_KEY}
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations}
import com.microsoft.azure.cosmosdb.kafka.connect.model.{CosmosDBDocumentTest, KafkaPayloadTest}
import com.typesafe.scalalogging.LazyLogging
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.errors.ConnectException
import org.apache.kafka.connect.sink.SinkRecord
import org.scalatest.{FlatSpec, GivenWhenThen}
import org.mockito.MockitoSugar.mock
import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.util.{Failure, Success, Try}
class CosmosDBSourceConnectorTaskTestMock extends FlatSpec with GivenWhenThen with LazyLogging {
private val NUM_DOCS: Int = 20
private val DOC_SIZE: Int = 313
private var testUUID: UUID = null
private var batchSize = NUM_DOCS
private var bufferSize = batchSize * DOC_SIZE
"CosmosDBSourceTask start" should "Initialize all properties" in {
Given("A list of properties for CosmosSourceTask")
val props = TestConfigurations.getSourceConnectorProperties()
// Add the assigned partitions
props.put(CosmosDBConfigConstants.ASSIGNED_PARTITIONS, "0,1")
When("CosmosSourceTask is started")
val mockCosmosProvider = MockCosmosDBProvider
val task = new CosmosDBSourceTask { override val cosmosDBProvider = mockCosmosProvider }
task.start(Maps.fromProperties(props))
Then("CosmosSourceTask should properly initialized the readers")
val readers = task.getReaders()
readers.foreach(r => assert(r._1 == r._2.setting.assignedPartition))
assert(readers.size == 2)
}
"CosmosDBSourceTask poll" should "Return a list of SourceRecords with the right format" in {
Given("A set of SourceConnector properties")
val props: Properties = TestConfigurations.getSourceConnectorProperties()
props.setProperty(CosmosDBConfigConstants.BATCH_SIZE, NUM_DOCS.toString)
props.setProperty(CosmosDBConfigConstants.READER_BUFFER_SIZE, "10000")
props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000")
Then(s"Start the SourceConnector and return the taskConfigs")
// Declare a collection to store the messages from SourceRecord
val kafkaMessages = new util.ArrayList[KafkaPayloadTest]
// Start CosmosDBSourceConnector and return the taskConfigs
val connector = new CosmosDBSourceConnector
connector.start(Maps.fromProperties(props))
val taskConfigs = connector.taskConfigs(2)
taskConfigs.forEach(config => {
When("CosmosSourceTask is started and poll is called")
val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]}
task.start(config)
val sourceRecords = task.poll()
Then("It returns a list of SourceRecords")
assert(sourceRecords != null)
val gson = new Gson()
sourceRecords.forEach(r => {
val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest])
if (message.testID == testUUID) {
kafkaMessages.add(message)
}
})
})
}
"CosmosDBSourceTask poll" should "Return a list of SourceRecords based on the batchSize" in {
Given("A set of SourceConnector properties")
val props: Properties = TestConfigurations.getSourceConnectorProperties()
props.setProperty(CosmosDBConfigConstants.READER_BUFFER_SIZE, "10000")
props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000")
Then(s"Start the SourceConnector and return the taskConfigs")
// Declare a collection to store the messages from SourceRecord
val kafkaMessages = new util.ArrayList[KafkaPayloadTest]
// Start CosmosDBSourceConnector and return the taskConfigs
val connector = new CosmosDBSourceConnector
connector.start(Maps.fromProperties(props))
val taskConfigs = connector.taskConfigs(2)
val numWorkers = connector.getNumberOfWorkers()
taskConfigs.forEach(config => {
When("CosmosSourceTask is started and poll is called")
val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]}
task.start(config)
batchSize = config.get(CosmosDBConfigConstants.BATCH_SIZE).toInt
val sourceRecords = task.poll()
Then("It returns a list of SourceRecords")
assert(sourceRecords != null)
val gson = new Gson()
sourceRecords.forEach(r => {
val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest])
if (message.testID == testUUID) {
kafkaMessages.add(message)
}
})
})
Then(s"Make sure collection of messages is equal to ${batchSize * numWorkers}")
assert(kafkaMessages.size() == batchSize * numWorkers)
}
"CosmosDBSourceTask poll" should "Return a list of SourceRecords based on the bufferSize" in {
Given("A set of SourceConnector properties")
val props: Properties = TestConfigurations.getSourceConnectorProperties()
props.setProperty(CosmosDBConfigConstants.BATCH_SIZE, NUM_DOCS.toString)
props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000")
Then(s"Start the SourceConnector and return the taskConfigs")
// Declare a collection to store the messages from SourceRecord
val kafkaMessages = new util.ArrayList[KafkaPayloadTest]
// Start CosmosDBSourceConnector and return the taskConfigs
val connector = new CosmosDBSourceConnector
connector.start(Maps.fromProperties(props))
val taskConfigs = connector.taskConfigs(2)
val numWorkers = connector.getNumberOfWorkers()
taskConfigs.forEach(config => {
When("CosmosSourceTask is started and poll is called")
val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]}
task.start(config)
bufferSize = config.get(CosmosDBConfigConstants.READER_BUFFER_SIZE).toInt
val sourceRecords = task.poll()
Then("It returns a list of SourceRecords")
assert(sourceRecords != null)
val gson = new Gson()
sourceRecords.forEach(r => {
val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest])
if (message.testID == testUUID) {
kafkaMessages.add(message)
}
})
})
val minSize = (bufferSize * numWorkers)
val maxSize = ((bufferSize + DOC_SIZE) * numWorkers)
Then(s"Make sure number of bytes in the collection of messages is between ${minSize} and ${maxSize}")
assert(kafkaMessages.size() * DOC_SIZE >= minSize && kafkaMessages.size() * DOC_SIZE <= maxSize)
}
private def mockDocuments(): ArrayList[CosmosDBDocumentTest] = {
val documents: ArrayList[CosmosDBDocumentTest] = new ArrayList[CosmosDBDocumentTest]
testUUID = randomUUID()
for (i <- 1 to NUM_DOCS) {
val doc = new CosmosDBDocumentTest(i.toString, s"Message ${i}", testUUID)
documents.add(doc)
}
return documents
}
private def insertDocuments(cosmosDBProvider: CosmosDBProvider = CosmosDBProviderImpl) = {
// Source Collection
val clientSettings = CosmosDBClientSettings(
TestConfigurations.ENDPOINT,
TestConfigurations.MASTER_KEY,
TestConfigurations.DATABASE,
TestConfigurations.SOURCE_COLLECTION,
ConnectionPolicy.GetDefault(),
ConsistencyLevel.Session
)
//logger.info("");
val client = Try(cosmosDBProvider.getClient(clientSettings)) match {
case Success(conn) =>
logger.info("Connection to CosmosDB established.")
conn
case Failure(f) => throw new ConnectException(s"Couldn't connect to CosmosDB.", f)
}
val gson: Gson = new Gson()
val upsertDocumentsOBs: util.ArrayList[Observable[ResourceResponse[Document]]] = new util.ArrayList[Observable[ResourceResponse[Document]]]
val completionLatch = new CountDownLatch(1)
val forcedScalaObservable: _root_.rx.lang.scala.Observable[ResourceResponse[Document]] = Observable.merge(upsertDocumentsOBs)
mockDocuments().forEach(record => {
val json = gson.toJson(record)
val document = new Document(json)
val obs = client.upsertDocument(CosmosDBProviderImpl.getCollectionLink(TestConfigurations.DATABASE, TestConfigurations.SOURCE_COLLECTION), document, null, false)
upsertDocumentsOBs.add(obs)
})
forcedScalaObservable
.map(r => r.getRequestCharge)
.reduce((sum, value) => sum + value)
.subscribe(
t => logger.info(s"upsertDocuments total RU charge is $t"),
e => {
logger.error(s"error upserting documents e:${e.getMessage()} stack:${e.getStackTrace().toString()}")
completionLatch.countDown()
},
() => {
logger.info("upsertDocuments completed")
completionLatch.countDown()
}
)
}
}

Просмотреть файл

@ -0,0 +1,35 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
import com.google.common.collect.Maps
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations}
import org.apache.kafka.connect.runtime.ConnectorConfig
import org.scalatest.{FlatSpec, GivenWhenThen}
class CosmosDBSourceConnectorTest extends FlatSpec with GivenWhenThen {
"CosmosDBSourceConnector" should "Validate all input properties and generate right set of task config properties" in {
Given("Valid set of input properties")
val props = TestConfigurations.getSourceConnectorProperties()
val connector = new CosmosDBSourceConnector
When("Start and TaskConfig are called in right order")
connector.start(Maps.fromProperties(props))
val taskConfigs = connector.taskConfigs(3)
val numWorkers = connector.getNumberOfWorkers
Then("The TaskConfigs have all the expected properties")
assert(taskConfigs.size() == numWorkers)
for (i <- 0 until numWorkers) {
val taskConfig: java.util.Map[String, String] = taskConfigs.get(i)
assert(taskConfig.containsKey(ConnectorConfig.NAME_CONFIG))
assert(taskConfig.containsKey(ConnectorConfig.CONNECTOR_CLASS_CONFIG))
assert(taskConfig.containsKey(ConnectorConfig.TASKS_MAX_CONFIG))
assert(taskConfig.containsKey(CosmosDBConfigConstants.CONNECTION_ENDPOINT_CONFIG))
assert(taskConfig.containsKey(CosmosDBConfigConstants.CONNECTION_MASTERKEY_CONFIG))
assert(taskConfig.containsKey(CosmosDBConfigConstants.DATABASE_CONFIG))
assert(taskConfig.containsKey(CosmosDBConfigConstants.COLLECTION_CONFIG))
assert(taskConfig.containsKey(CosmosDBConfigConstants.TOPIC_CONFIG))
Then("Validate assigned partition")
val partition = taskConfig.get(CosmosDBConfigConstants.ASSIGNED_PARTITIONS)
assert(partition.size == 1)
assert(partition == i.toString)
}
}
}

Просмотреть файл

@ -0,0 +1,230 @@
package com.microsoft.azure.cosmosdb.kafka.connect.source
import java.util
import java.util.UUID._
import java.util.concurrent.{CountDownLatch, TimeUnit}
import java.util.{ArrayList, Properties, UUID}
import com.microsoft.azure.cosmosdb.kafka.connect.{CosmosDBClientSettings, CosmosDBProvider, CosmosDBProviderImpl, MockCosmosDBProvider}
import com.microsoft.azure.cosmosdb.kafka.connect.config.TestConfigurations.{DATABASE, ENDPOINT, MASTER_KEY}
import com.microsoft.azure.cosmosdb.kafka.connect.config.CosmosDBConfigConstants
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.sink.SinkRecord
import org.scalatest.{FlatSpec, GivenWhenThen}
import scala.collection.JavaConverters._
import scala.collection.mutable
import _root_.rx.Observable
import _root_.rx.lang.scala.JavaConversions._
import com.google.common.collect.Maps
import com.google.gson.Gson
import com.microsoft.azure.cosmosdb.kafka.connect.config.{CosmosDBConfigConstants, TestConfigurations}
import com.microsoft.azure.cosmosdb.kafka.connect.model.{CosmosDBDocumentTest, KafkaPayloadTest}
import com.microsoft.azure.cosmosdb.{ConnectionPolicy, ConsistencyLevel, Document, ResourceResponse}
import com.typesafe.scalalogging.LazyLogging
import org.apache.kafka.connect.errors.ConnectException
import org.mockito.MockitoSugar.mock
import org.scalatest.{FlatSpec, GivenWhenThen}
import scala.util.{Failure, Success, Try}
class CosmosDBSourceTaskTest extends FlatSpec with GivenWhenThen with LazyLogging {
private val NUM_DOCS: Int = 20
private val DOC_SIZE: Int = 313
private var testUUID: UUID = null
private var batchSize = NUM_DOCS
private var bufferSize = batchSize * DOC_SIZE
"CosmosDBSourceTask start" should "Initialize all properties" in {
Given("A list of properties for CosmosSourceTask")
val props = TestConfigurations.getSourceConnectorProperties()
// Add the assigned partitions
props.put(CosmosDBConfigConstants.ASSIGNED_PARTITIONS, "0,1")
When("CosmosSourceTask is started")
val mockCosmosProvider = MockCosmosDBProvider
val task = new CosmosDBSourceTask { override val cosmosDBProvider = mockCosmosProvider }
task.start(Maps.fromProperties(props))
Then("CosmosSourceTask should properly initialized the readers")
val readers = task.getReaders()
readers.foreach(r => assert(r._1 == r._2.setting.assignedPartition))
assert(readers.size == 2)
}
"CosmosDBSourceTask poll" should "Return a list of SourceRecords with the right format" in {
Given("A set of SourceConnector properties")
val props: Properties = TestConfigurations.getSourceConnectorProperties()
props.setProperty(CosmosDBConfigConstants.BATCH_SIZE, NUM_DOCS.toString)
props.setProperty(CosmosDBConfigConstants.READER_BUFFER_SIZE, "10000")
props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000")
Then(s"Start the SourceConnector and return the taskConfigs")
// Declare a collection to store the messages from SourceRecord
val kafkaMessages = new util.ArrayList[KafkaPayloadTest]
// Start CosmosDBSourceConnector and return the taskConfigs
val connector = new CosmosDBSourceConnector
connector.start(Maps.fromProperties(props))
val taskConfigs = connector.taskConfigs(2)
taskConfigs.forEach(config => {
When("CosmosSourceTask is started and poll is called")
val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]}
task.start(config)
val sourceRecords = task.poll()
Then("It returns a list of SourceRecords")
assert(sourceRecords != null)
val gson = new Gson()
sourceRecords.forEach(r => {
val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest])
if (message.testID == testUUID) {
kafkaMessages.add(message)
}
})
})
}
"CosmosDBSourceTask poll" should "Return a list of SourceRecords based on the batchSize" in {
Given("A set of SourceConnector properties")
val props: Properties = TestConfigurations.getSourceConnectorProperties()
props.setProperty(CosmosDBConfigConstants.READER_BUFFER_SIZE, "10000")
props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000")
Then(s"Start the SourceConnector and return the taskConfigs")
// Declare a collection to store the messages from SourceRecord
val kafkaMessages = new util.ArrayList[KafkaPayloadTest]
// Start CosmosDBSourceConnector and return the taskConfigs
val connector = new CosmosDBSourceConnector
connector.start(Maps.fromProperties(props))
val taskConfigs = connector.taskConfigs(2)
val numWorkers = connector.getNumberOfWorkers()
taskConfigs.forEach(config => {
When("CosmosSourceTask is started and poll is called")
val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]}
task.start(config)
batchSize = config.get(CosmosDBConfigConstants.BATCH_SIZE).toInt
val sourceRecords = task.poll()
Then("It returns a list of SourceRecords")
assert(sourceRecords != null)
val gson = new Gson()
sourceRecords.forEach(r => {
val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest])
if (message.testID == testUUID) {
kafkaMessages.add(message)
}
})
})
Then(s"Make sure collection of messages is equal to ${batchSize * numWorkers}")
assert(kafkaMessages.size() == batchSize * numWorkers)
}
"CosmosDBSourceTask poll" should "Return a list of SourceRecords based on the bufferSize" in {
Given("A set of SourceConnector properties")
val props: Properties = TestConfigurations.getSourceConnectorProperties()
props.setProperty(CosmosDBConfigConstants.BATCH_SIZE, NUM_DOCS.toString)
props.setProperty(CosmosDBConfigConstants.TIMEOUT, "10000")
Then(s"Start the SourceConnector and return the taskConfigs")
// Declare a collection to store the messages from SourceRecord
val kafkaMessages = new util.ArrayList[KafkaPayloadTest]
// Start CosmosDBSourceConnector and return the taskConfigs
val connector = new CosmosDBSourceConnector
connector.start(Maps.fromProperties(props))
val taskConfigs = connector.taskConfigs(2)
val numWorkers = connector.getNumberOfWorkers()
taskConfigs.forEach(config => {
When("CosmosSourceTask is started and poll is called")
val task = new CosmosDBSourceTask {override val readers = mock[mutable.Map[String, CosmosDBReader]]}
task.start(config)
bufferSize = config.get(CosmosDBConfigConstants.READER_BUFFER_SIZE).toInt
val sourceRecords = task.poll()
Then("It returns a list of SourceRecords")
assert(sourceRecords != null)
val gson = new Gson()
sourceRecords.forEach(r => {
val message = gson.fromJson(r.value().toString, classOf[KafkaPayloadTest])
if (message.testID == testUUID) {
kafkaMessages.add(message)
}
})
})
val minSize = (bufferSize * numWorkers)
val maxSize = ((bufferSize + DOC_SIZE) * numWorkers)
Then(s"Make sure number of bytes in the collection of messages is between ${minSize} and ${maxSize}")
assert(kafkaMessages.size() * DOC_SIZE >= minSize && kafkaMessages.size() * DOC_SIZE <= maxSize)
}
private def mockDocuments(): ArrayList[CosmosDBDocumentTest] = {
val documents: ArrayList[CosmosDBDocumentTest] = new ArrayList[CosmosDBDocumentTest]
testUUID = randomUUID()
for (i <- 1 to NUM_DOCS) {
val doc = new CosmosDBDocumentTest(i.toString, s"Message ${i}", testUUID)
documents.add(doc)
}
return documents
}
private def insertDocuments(cosmosDBProvider: CosmosDBProvider = CosmosDBProviderImpl) = {
// Source Collection
val clientSettings = CosmosDBClientSettings(
TestConfigurations.ENDPOINT,
TestConfigurations.MASTER_KEY,
TestConfigurations.DATABASE,
TestConfigurations.SOURCE_COLLECTION,
ConnectionPolicy.GetDefault(),
ConsistencyLevel.Session
)
//logger.info("");
val client = Try(cosmosDBProvider.getClient(clientSettings)) match {
case Success(conn) =>
logger.info("Connection to CosmosDB established.")
conn
case Failure(f) => throw new ConnectException(s"Couldn't connect to CosmosDB.", f)
}
val gson: Gson = new Gson()
val upsertDocumentsOBs: util.ArrayList[Observable[ResourceResponse[Document]]] = new util.ArrayList[Observable[ResourceResponse[Document]]]
val completionLatch = new CountDownLatch(1)
val forcedScalaObservable: _root_.rx.lang.scala.Observable[ResourceResponse[Document]] = Observable.merge(upsertDocumentsOBs)
mockDocuments().forEach(record => {
val json = gson.toJson(record)
val document = new Document(json)
val obs = client.upsertDocument(CosmosDBProviderImpl.getCollectionLink(TestConfigurations.DATABASE, TestConfigurations.SOURCE_COLLECTION), document, null, false)
upsertDocumentsOBs.add(obs)
})
forcedScalaObservable
.map(r => r.getRequestCharge)
.reduce((sum, value) => sum + value)
.subscribe(
t => logger.info(s"upsertDocuments total RU charge is $t"),
e => {
logger.error(s"error upserting documents e:${e.getMessage()} stack:${e.getStackTrace().toString()}")
completionLatch.countDown()
},
() => {
logger.info("upsertDocuments completed")
completionLatch.countDown()
}
)
}
}