Merge pull request #40 from Azure/addOptionalEngineUrlConfig

Add kusto.engine.url as required param and rename kusto.url to kusto.ingestion.url
This commit is contained in:
Yihezkel Schoenbrun 2020-10-27 00:00:25 +02:00 коммит произвёл GitHub
Родитель 25f91b8dd3 0988d031a6
Коммит 3218b7b25c
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
10 изменённых файлов: 342 добавлений и 396 удалений

2
.gitignore поставляемый
Просмотреть файл

@ -289,7 +289,7 @@ paket-files/
# JetBrains Rider
.idea/
*.sln.iml
*.iml
# CodeRush
.cr/

Просмотреть файл

@ -57,7 +57,7 @@ Integration mode to Azure Data Explorer is batched, queued ingestion leveraging
### 3.3. Configurable retries
- The connector supports retries for transient errors with the ability to provide parameters for the same
- The connector supports retries for transient errors with the ability to provide relevant parameters
- and retries with exponential backoff
### 3.4. Serialization formats
@ -193,7 +193,7 @@ KafkaClient {
<br>
**4. Configs to add to the Docker image:**<br>
This is covered in detail further on. It is specified here for the purpose of completenes of defining what goes onto the worker config.<br>
This is covered in detail further on. It is specified here for the purpose of completeness of defining what goes onto the worker config.<br>
```
COPY krb5.conf /etc/krb5.conf
COPY hdi-esp-jaas.conf /etc/hdi-esp-jaas.conf
@ -219,39 +219,40 @@ The following is complete set of connector sink properties-
| :--- | :--- | :--- | :--- |
| 1 | connector.class | Classname of the Kusto sink | Hard code to ``` com.microsoft.azure.kusto.kafka.connect.sink.KustoSinkConnector ```<br>*Required* |
| 2 | topics | Kafka topic specification | List of topics separated by commas<br>*Required* |
| 3 | kusto.url | Kusto ingest cluster specification | Provide the ingest URI of your ADX cluster<br>Use the following construct for the private URL - https://ingest-private-[cluster].kusto.windows.net<br>*Required* |
| 4 | aad.auth.authority | Credentials for Kusto | Provide the tenant ID of your Azure Active Directory<br>*Required* |
| 5 | aad.auth.appid | Credentials for Kusto | Provide Azure Active Directory Service Principal Name<br>*Required* |
| 6 | aad.auth.appkey | Credentials for Kusto | Provide Azure Active Directory Service Principal secret<br>*Required* |
| 7 | kusto.tables.topics.mapping | Mapping of topics to tables | Provide 1..many topic-table comma-separated mappings as follows-<br>[{'topic': '\<topicName1\>','db': '\<datebaseName\>', 'table': '\<tableName\>','format': '<format-e.g.avro/csv/json>', 'mapping':'\<tableMappingName\>'}]<br>*Required* |
| 8 | key.converter | Deserialization | One of the below supported-<br>org.apache.kafka.connect.storage.StringConverter<br> org.apache.kafka.connect.json.JsonConverter<br>io.confluent.connect.avro.AvroConverter<br>io.confluent.connect.json.JsonSchemaConverter<br> org.apache.kafka.connect.converters.ByteArrayConverter<br><br>*Required* |
| 9 | value.converter | Deserialization | One of the below supported-<br>org.apache.kafka.connect.storage.StringConverter<br> org.apache.kafka.connect.json.JsonConverter<br>io.confluent.connect.avro.AvroConverter<br>io.confluent.connect.json.JsonSchemaConverter<br> org.apache.kafka.connect.converters.ByteArrayConverter<br><br>*Required* |
| 10 | value.converter.schema.registry.url | Schema validation | URI of the Kafka schema registry<br>*Optional* |
| 11 | value.converter.schemas.enable | Schema validation | Set to true if you have embedded schema with payload but are not leveraging the schema registry<br>Applicable for avro and json<br><br>*Optional* |
| 12 | tasks.max | connector parallelism | Specify the number of connector copy/sink tasks<br>*Required* |
| 13 | flush.size.bytes | Performance knob for batching | Maximum bufer byte size per topic+partition combination that in combination with flush.interval.ms (whichever is reached first) should result in sinking to Kusto<br>*Default - 1 MB*<br>*Required* |
| 14 | flush.interval.ms | Performance knob for batching | Minimum time interval per topic+partition combo that in combination with flush.size.bytes (whichever is reached first) should result in sinking to Kusto<br>*Default - 300 ms*<br>*Required* |
| 15 | tempdir.path | Local directory path on Kafka Connect worker to buffer files to before shipping to Kusto | Default is value returned by ```System.getProperty("java.io.tmpdir")``` with a GUID attached to it<br><br>*Optional* |
| 16 | behavior.on.error | Configurable behavior in response to errors encountered | Possible values - log, ignore, fail<br><br>log - log the error, send record to dead letter queue, and continue processing<br>ignore - log the error, send record to dead letter queue, proceed with processing despite errors encountered<br>fail - shut down connector task upon encountering<br><br>*Default - fail*<br>*Optional* |
| 17 | errors.retry.max.time.ms | Configurable retries for transient errors | Period of time in milliseconds to retry for transient errors<br><br>*Default - 300 ms*<br>*Optional* |
| 18 | errors.retry.backoff.time.ms | Configurable retries for transient errors | Period of time in milliseconds to backoff before retry for transient errors<br><br>*Default - 10 ms*<br>*Optional* |
| 19 | errors.deadletterqueue.bootstrap.servers | Channel to write records that failed deserialization | CSV or kafkaBroker:port <br>*Optional* |
| 20 | errors.deadletterqueue.topic.name | Channel to write records that failed deserialization | Pre-created topic name <br>*Optional* |
| 21 | errors.deadletterqueue.security.protocol | Channel to write records that failed deserialization | Securitry protocol of secure Kafka cluster <br>*Optional but when feature is used with secure cluster, is required* |
| 22 | errors.deadletterqueue.sasl.mechanism | Channel to write records that failed deserialization | SASL mechanism of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 23 | errors.deadletterqueue.sasl.jaas.config | Channel to write records that failed deserialization | JAAS config of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 24 | misc.deadletterqueue.bootstrap.servers | Channel to write records that due to reasons other than deserialization | CSV of kafkaBroker:port <br>*Optional* |
| 25 | misc.deadletterqueue.topic.name | Channel to write records that due to reasons other than deserialization | Pre-created topic name <br>*Optional* |
| 26 | misc.deadletterqueue.security.protocol | Channel to write records that due to reasons other than deserialization | Securitry protocol of secure Kafka cluster <br>*Optional but when feature is used with secure cluster, is required* |
| 27 | misc.deadletterqueue.sasl.mechanism | Channel to write records that due to reasons other than deserialization | SASL mechanism of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 28 | misc.deadletterqueue.sasl.jaas.config | Channel to write records that due to reasons other than deserialization | JAAS config of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 29 | consumer.override.bootstrap.servers | Security details explicitly required for secure Kafka clusters | Bootstrap server:port CSV of secure Kafka cluster <br>*Required for secure Kafka clusters* |
| 30 | consumer.override.security.protocol | Security details explicitly required for secure Kafka clusters | Security protocol of secure Kafka cluster <br>*Required for secure Kafka clusters* |
| 31 | consumer.override.sasl.mechanism | Security details explicitly required for secure Kafka clusters | SASL mechanism of secure Kafka cluster<br>*Required for secure Kafka clusters* |
| 32 | consumer.override.sasl.jaas.config | Security details explicitly required for secure Kafka clusters | JAAS config of secure Kafka cluster<br>*Required for secure Kafka clusters* |
| 33 | consumer.override.sasl.kerberos.service.name | Security details explicitly required for secure Kafka clusters, specifically kerberized Kafka | Kerberos service name of kerberized Kafka cluster<br>*Required for kerberized Kafka clusters* |
| 34 | consumer.override.auto.offset.reset | Configurable consuming from offset | Possible values are - earliest or latest<br>*Optional* |
| 35 | consumer.override.max.poll.interval.ms| Config to prevent duplication | Set to a value to avoid consumer leaving the group while the Connector is retrying <br>*Optional* |
| 3 | kusto.ingestion.url | Kusto ingestion endpoint URL | Provide the ingest URL of your ADX cluster<br>Use the following construct for the private URL - https://ingest-private-[cluster].kusto.windows.net<br>*Required* |
| 4 | kusto.query.url | Kusto query endpoint URL | Provide the engine URL of your ADX cluster<br>*Optional* |
| 5 | aad.auth.authority | Credentials for Kusto | Provide the tenant ID of your Azure Active Directory<br>*Required* |
| 6 | aad.auth.appid | Credentials for Kusto | Provide Azure Active Directory Service Principal Name<br>*Required* |
| 7 | aad.auth.appkey | Credentials for Kusto | Provide Azure Active Directory Service Principal secret<br>*Required* |
| 8 | kusto.tables.topics.mapping | Mapping of topics to tables | Provide 1..many topic-table comma-separated mappings as follows-<br>[{'topic': '\<topicName1\>','db': '\<datebaseName\>', 'table': '\<tableName\>','format': '<format-e.g.avro/csv/json>', 'mapping':'\<tableMappingName\>'}]<br>*Required* |
| 9 | key.converter | Deserialization | One of the below supported-<br>org.apache.kafka.connect.storage.StringConverter<br> org.apache.kafka.connect.json.JsonConverter<br>io.confluent.connect.avro.AvroConverter<br>io.confluent.connect.json.JsonSchemaConverter<br> org.apache.kafka.connect.converters.ByteArrayConverter<br><br>*Required* |
| 10 | value.converter | Deserialization | One of the below supported-<br>org.apache.kafka.connect.storage.StringConverter<br> org.apache.kafka.connect.json.JsonConverter<br>io.confluent.connect.avro.AvroConverter<br>io.confluent.connect.json.JsonSchemaConverter<br> org.apache.kafka.connect.converters.ByteArrayConverter<br><br>*Required* |
| 11 | value.converter.schema.registry.url | Schema validation | URI of the Kafka schema registry<br>*Optional* |
| 12 | value.converter.schemas.enable | Schema validation | Set to true if you have embedded schema with payload but are not leveraging the schema registry<br>Applicable for avro and json<br><br>*Optional* |
| 13 | tasks.max | connector parallelism | Specify the number of connector copy/sink tasks<br>*Required* |
| 14 | flush.size.bytes | Performance knob for batching | Maximum bufer byte size per topic+partition combination that in combination with flush.interval.ms (whichever is reached first) should result in sinking to Kusto<br>*Default - 1 MB*<br>*Required* |
| 15 | flush.interval.ms | Performance knob for batching | Minimum time interval per topic+partition combo that in combination with flush.size.bytes (whichever is reached first) should result in sinking to Kusto<br>*Default - 300 ms*<br>*Required* |
| 16 | tempdir.path | Local directory path on Kafka Connect worker to buffer files to before shipping to Kusto | Default is value returned by ```System.getProperty("java.io.tmpdir")``` with a GUID attached to it<br><br>*Optional* |
| 17 | behavior.on.error | Configurable behavior in response to errors encountered | Possible values - log, ignore, fail<br><br>log - log the error, send record to dead letter queue, and continue processing<br>ignore - log the error, send record to dead letter queue, proceed with processing despite errors encountered<br>fail - shut down connector task upon encountering<br><br>*Default - fail*<br>*Optional* |
| 18 | errors.retry.max.time.ms | Configurable retries for transient errors | Period of time in milliseconds to retry for transient errors<br><br>*Default - 300 ms*<br>*Optional* |
| 19 | errors.retry.backoff.time.ms | Configurable retries for transient errors | Period of time in milliseconds to backoff before retry for transient errors<br><br>*Default - 10 ms*<br>*Optional* |
| 20 | errors.deadletterqueue.bootstrap.servers | Channel to write records that failed deserialization | CSV or kafkaBroker:port <br>*Optional* |
| 21 | errors.deadletterqueue.topic.name | Channel to write records that failed deserialization | Pre-created topic name <br>*Optional* |
| 22 | errors.deadletterqueue.security.protocol | Channel to write records that failed deserialization | Securitry protocol of secure Kafka cluster <br>*Optional but when feature is used with secure cluster, is required* |
| 23 | errors.deadletterqueue.sasl.mechanism | Channel to write records that failed deserialization | SASL mechanism of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 24 | errors.deadletterqueue.sasl.jaas.config | Channel to write records that failed deserialization | JAAS config of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 25 | misc.deadletterqueue.bootstrap.servers | Channel to write records that due to reasons other than deserialization | CSV of kafkaBroker:port <br>*Optional* |
| 26 | misc.deadletterqueue.topic.name | Channel to write records that due to reasons other than deserialization | Pre-created topic name <br>*Optional* |
| 27 | misc.deadletterqueue.security.protocol | Channel to write records that due to reasons other than deserialization | Securitry protocol of secure Kafka cluster <br>*Optional but when feature is used with secure cluster, is required* |
| 28 | misc.deadletterqueue.sasl.mechanism | Channel to write records that due to reasons other than deserialization | SASL mechanism of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 29 | misc.deadletterqueue.sasl.jaas.config | Channel to write records that due to reasons other than deserialization | JAAS config of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 30 | consumer.override.bootstrap.servers | Security details explicitly required for secure Kafka clusters | Bootstrap server:port CSV of secure Kafka cluster <br>*Required for secure Kafka clusters* |
| 31 | consumer.override.security.protocol | Security details explicitly required for secure Kafka clusters | Security protocol of secure Kafka cluster <br>*Required for secure Kafka clusters* |
| 32 | consumer.override.sasl.mechanism | Security details explicitly required for secure Kafka clusters | SASL mechanism of secure Kafka cluster<br>*Required for secure Kafka clusters* |
| 33 | consumer.override.sasl.jaas.config | Security details explicitly required for secure Kafka clusters | JAAS config of secure Kafka cluster<br>*Required for secure Kafka clusters* |
| 34 | consumer.override.sasl.kerberos.service.name | Security details explicitly required for secure Kafka clusters, specifically kerberized Kafka | Kerberos service name of kerberized Kafka cluster<br>*Required for kerberized Kafka clusters* |
| 35 | consumer.override.auto.offset.reset | Configurable consuming from offset | Possible values are - earliest or latest<br>*Optional* |
| 36 | consumer.override.max.poll.interval.ms| Config to prevent duplication | Set to a value to avoid consumer leaving the group while the Connector is retrying <br>*Optional* |
<hr>
@ -272,12 +273,12 @@ The following is the roadmap-<br>
Kafka Connect connectors can be deployed in standalone mode (just for development) or in distributed mode (production).<br>
### 7.1. Standalone Kafka Connect deployment mode
This involves having the connector plugin jar in /usr/share/java of a Kafka Connect worker, reference to the same plugin path in connnect-standalone.properties, and launching of the connector from command line. This is not scalable, not fault tolerant, and is not recommeded for production.
This involves having the connector plugin jar in /usr/share/java of a Kafka Connect worker, reference to the same plugin path in connect-standalone.properties, and launching of the connector from command line. This is not scalable, not fault tolerant, and is not recommeded for production.
### 7.2. Distributed Kafka Connect deployment mode
Distributed Kafka Connect essentially involves creation of a KafkaConnect worker cluster as shown in the diagram below.<br>
- Azure Kubernetes Service is a great infrastructure for the connect cluster, due to its managed and scalabale nature
- Kubernetes is a great platform for the connect cluster, due to its scalabale nature and self-healing
- Kubernetes is a great platform for the connect cluster, due to its scalable nature and self-healing
- Each orange polygon is a Kafka Connect worker and each green polygon is a sink connector instance
- A Kafka Connect worker can have 1..many task instances which helps with scale
- When a Kafka Connect worker is maxed out from a resource perspective (CPU, RAM), you can scale horizontally, add more Kafka Connect workers, ands tasks within them

Просмотреть файл

@ -3,7 +3,8 @@ connector.class=com.microsoft.azure.kusto.kafka.connect.sink.KustoSinkConnector
tasks.max=1
#topics=testing1,testing2
#kusto.url=https://ingest-{cluster}.kusto.windows.net/
#kusto.ingestion.url=https://ingest-{cluster}.kusto.windows.net/
#kusto.query.url=https://{cluster}.kusto.windows.net/
#kusto.tables.topics.mapping=[{'topic': 'testing1','db': 'test_db', 'table': 'test_table_1','format': 'json', 'mapping':'JsonMapping'},{'topic': 'testing2','db': 'test_db', 'table': 'test_table_2','format': 'csv', 'mapping':'CsvMapping'}]

Просмотреть файл

@ -18,7 +18,6 @@ import java.util.Properties;
import java.util.concurrent.TimeUnit;
public class KustoSinkConfig extends AbstractConfig {
private static final Logger log = LoggerFactory.getLogger(KustoSinkConfig.class);
private static final String DLQ_PROPS_PREFIX = "misc.deadletterqueue.";
@ -38,9 +37,13 @@ public class KustoSinkConfig extends AbstractConfig {
}
// TODO: this might need to be per kusto cluster...
static final String KUSTO_URL_CONF = "kusto.url";
private static final String KUSTO_URL_DOC = "Kusto ingestion service URI.";
private static final String KUSTO_URL_DISPLAY = "Kusto cluster ingestion URI";
static final String KUSTO_INGEST_URL_CONF = "kusto.ingestion.url";
private static final String KUSTO_INGEST_URL_DOC = "Kusto ingestion endpoint URL.";
private static final String KUSTO_INGEST_URL_DISPLAY = "Kusto cluster ingestion URL";
static final String KUSTO_ENGINE_URL_CONF = "kusto.query.url";
private static final String KUSTO_ENGINE_URL_DOC = "Kusto query endpoint URL.";
private static final String KUSTO_ENGINE_URL_DISPLAY = "Kusto cluster query URL";
static final String KUSTO_AUTH_APPID_CONF = "aad.auth.appid";
private static final String KUSTO_AUTH_APPID_DOC = "Application Id for Azure Active Directory authentication.";
@ -93,7 +96,7 @@ public class KustoSinkConfig extends AbstractConfig {
static final String KUSTO_DLQ_BOOTSTRAP_SERVERS_CONF = "misc.deadletterqueue.bootstrap.servers";
private static final String KUSTO_DLQ_BOOTSTRAP_SERVERS_DOC = "Configure this list to Kafka broker's address(es) "
+ "to which the Connector should write records failed due to restrictions while writing to the file in `tempdir.path`, network interruptions or unavailability of Kusto cluster. "
+ "This list should be in the form host-1:port-1,host-2:port-2,…host-n:port-n. ";
+ "This list should be in the form host-1:port-1,host-2:port-2,…host-n:port-n.";
private static final String KUSTO_DLQ_BOOTSTRAP_SERVERS_DISPLAY = "Miscellaneous Dead-Letter Queue Bootstrap Servers";
static final String KUSTO_DLQ_TOPIC_NAME_CONF = "misc.deadletterqueue.topic.name";
@ -102,7 +105,7 @@ public class KustoSinkConfig extends AbstractConfig {
private static final String KUSTO_DLQ_TOPIC_NAME_DISPLAY = "Miscellaneous Dead-Letter Queue Topic Name";
static final String KUSTO_SINK_MAX_RETRY_TIME_MS_CONF = "errors.retry.max.time.ms";
private static final String KUSTO_SINK_MAX_RETRY_TIME_MS_DOC = "Maximum time upto which the Connector "
private static final String KUSTO_SINK_MAX_RETRY_TIME_MS_DOC = "Maximum time up to which the Connector "
+ "should retry writing records to Kusto table in case of failures.";
private static final String KUSTO_SINK_MAX_RETRY_TIME_MS_DISPLAY = "Errors Maximum Retry Time";
@ -120,18 +123,16 @@ public class KustoSinkConfig extends AbstractConfig {
}
public static ConfigDef getConfig() {
ConfigDef result = new ConfigDef();
defineConnectionConfigs(result);
defineWriteConfigs(result);
defineErrorHandlingAndRetriesConfgis(result);
defineErrorHandlingAndRetriesConfigs(result);
return result;
}
private static void defineErrorHandlingAndRetriesConfgis(ConfigDef result) {
private static void defineErrorHandlingAndRetriesConfigs(ConfigDef result) {
final String errorAndRetriesGroupName = "Error Handling and Retries";
int errorAndRetriesGroupOrder = 0;
@ -193,7 +194,6 @@ public class KustoSinkConfig extends AbstractConfig {
}
private static void defineWriteConfigs(ConfigDef result) {
final String writeGroupName = "Writes";
int writeGroupOrder = 0;
@ -243,21 +243,30 @@ public class KustoSinkConfig extends AbstractConfig {
}
private static void defineConnectionConfigs(ConfigDef result) {
final String connectionGroupName = "Connection";
int connectionGroupOrder = 0;
result
.define(
KUSTO_URL_CONF,
KUSTO_INGEST_URL_CONF,
Type.STRING,
ConfigDef.NO_DEFAULT_VALUE,
Importance.HIGH,
KUSTO_URL_DOC,
KUSTO_INGEST_URL_DOC,
connectionGroupName,
connectionGroupOrder++,
Width.MEDIUM,
KUSTO_URL_DISPLAY)
KUSTO_INGEST_URL_DISPLAY)
.define(
KUSTO_ENGINE_URL_CONF,
Type.STRING,
ConfigDef.NO_DEFAULT_VALUE,
Importance.LOW,
KUSTO_ENGINE_URL_DOC,
connectionGroupName,
connectionGroupOrder++,
Width.MEDIUM,
KUSTO_ENGINE_URL_DISPLAY)
.define(
KUSTO_AUTH_APPKEY_CONF,
Type.PASSWORD,
@ -291,7 +300,11 @@ public class KustoSinkConfig extends AbstractConfig {
}
public String getKustoUrl() {
return this.getString(KUSTO_URL_CONF);
return this.getString(KUSTO_INGEST_URL_CONF);
}
public String getKustoEngineUrl() {
return this.getString(KUSTO_ENGINE_URL_CONF);
}
public String getAuthAppid() {
@ -368,5 +381,4 @@ public class KustoSinkConfig extends AbstractConfig {
public static void main(String[] args) {
System.out.println(getConfig().toEnrichedRst());
}
}

Просмотреть файл

@ -5,9 +5,9 @@ import com.microsoft.azure.kusto.data.*;
import com.microsoft.azure.kusto.data.exceptions.DataClientException;
import com.microsoft.azure.kusto.data.exceptions.DataServiceException;
import com.microsoft.azure.kusto.ingest.IngestClient;
import com.microsoft.azure.kusto.ingest.IngestClientFactory;
import com.microsoft.azure.kusto.ingest.IngestionMapping;
import com.microsoft.azure.kusto.ingest.IngestionProperties;
import com.microsoft.azure.kusto.ingest.IngestClientFactory;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
@ -24,14 +24,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Properties;
import java.util.*;
/**
@ -40,26 +33,36 @@ import java.util.Properties;
* Currently only ingested files are "committed" in the sense that we can advance the offset according to it.
*/
public class KustoSinkTask extends SinkTask {
private static final Logger log = LoggerFactory.getLogger(KustoSinkTask.class);
static final String FETCH_TABLE_QUERY = "%s|count";
static final String FETCH_TABLE_MAPPING_QUERY = ".show table %s ingestion %s mapping '%s'";
static final String FETCH_PRINCIPAL_ROLES_QUERY = ".show principal access with (principal = '%s', accesstype='ingest',database='%s',table='%s')";
static final int INGESTION_ALLOWED_INDEX = 3;
public static final String FETCH_TABLE_QUERY = "%s | count";
public static final String FETCH_TABLE_MAPPING_QUERY = ".show table %s ingestion %s mapping '%s'";
public static final String FETCH_PRINCIPAL_ROLES_QUERY = ".show principal access with (principal = '%s', accesstype='ingest',database='%s',table='%s')";
public static final int INGESTION_ALLOWED_INDEX = 3;
public static final String MAPPING = "mapping";
public static final String MAPPING_FORMAT = "format";
public static final String MAPPING_TABLE = "table";
public static final String MAPPING_DB = "db";
public static final String JSON_FORMAT = "json";
public static final String SINGLEJSON_FORMAT = "singlejson";
public static final String MULTIJSON_FORMAT = "multijson";
public static final String VALIDATION_OK = "OK";
private final Set<TopicPartition> assignment;
private Map<String, TopicIngestionProperties> topicsToIngestionProps;
private KustoSinkConfig config;
IngestClient kustoIngestClient;
Map<TopicPartition, TopicPartitionWriter> writers;
protected IngestClient kustoIngestClient;
protected Map<TopicPartition, TopicPartitionWriter> writers;
private boolean isDlqEnabled;
private String dlqTopicName;
private Producer<byte[], byte[]> dlqProducer;
private static final ClientRequestProperties clientRequestProperties = new ClientRequestProperties();
public KustoSinkTask() {
assignment = new HashSet<>();
writers = new HashMap<>();
clientRequestProperties.setOption("validate_permissions", true);
}
public static IngestClient createKustoIngestClient(KustoSinkConfig config) {
@ -90,7 +93,7 @@ public class KustoSinkTask extends SinkTask {
public static Client createKustoEngineClient(KustoSinkConfig config) {
try {
String engineClientURL = config.getKustoUrl().replace("https://ingest-", "https://");
String engineClientURL = config.getKustoEngineUrl();
if (!Strings.isNullOrEmpty(config.getAuthAppid())) {
if (Strings.isNullOrEmpty(config.getAuthAppkey())) {
throw new ConfigException("Kusto authentication missing App Key.");
@ -115,43 +118,39 @@ public class KustoSinkTask extends SinkTask {
}
public static Map<String, TopicIngestionProperties> getTopicsToIngestionProps(KustoSinkConfig config) {
Map<String, TopicIngestionProperties> result = new HashMap<>();
Map<String, TopicIngestionProperties> result = new HashMap<>();
try {
JSONArray mappings = new JSONArray(config.getTopicToTableMapping());
for (int i =0; i< mappings.length(); i++) {
for (int i = 0; i < mappings.length(); i++) {
JSONObject mapping = mappings.getJSONObject(i);
String db = mapping.getString("db");
String table = mapping.getString("table");
String format = mapping.optString("format");
String db = mapping.getString(MAPPING_DB);
String table = mapping.getString(MAPPING_TABLE);
String format = mapping.optString(MAPPING_FORMAT);
IngestionProperties props = new IngestionProperties(db, table);
if (format != null && !format.isEmpty()) {
if (format.equalsIgnoreCase("json") || format.equalsIgnoreCase("singlejson") || format.equalsIgnoreCase("multijson")) {
props.setDataFormat("multijson");
if (format.equalsIgnoreCase(JSON_FORMAT) || format.equalsIgnoreCase(SINGLEJSON_FORMAT) || format.equalsIgnoreCase(MULTIJSON_FORMAT)) {
props.setDataFormat(MULTIJSON_FORMAT);
}
props.setDataFormat(format);
}
String mappingRef = mapping.optString("mapping");
if (mappingRef != null && !mappingRef.isEmpty()) {
if (format != null) {
if (format.equalsIgnoreCase("json") || format.equalsIgnoreCase("singlejson") || format.equalsIgnoreCase("multijson")) {
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.Json);
} else if (format.equalsIgnoreCase(IngestionProperties.DATA_FORMAT.avro.toString())){
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.Avro);
} else if (format.equalsIgnoreCase(IngestionProperties.DATA_FORMAT.apacheavro.toString())){
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.ApacheAvro);
} else {
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.Csv);
}
String mappingRef = mapping.optString(MAPPING);
if (mappingRef != null && !mappingRef.isEmpty() && format != null) {
if (format.equalsIgnoreCase(JSON_FORMAT) || format.equalsIgnoreCase(SINGLEJSON_FORMAT) || format.equalsIgnoreCase(MULTIJSON_FORMAT)) {
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.Json);
} else if (format.equalsIgnoreCase(IngestionProperties.DATA_FORMAT.avro.toString())) {
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.Avro);
} else if (format.equalsIgnoreCase(IngestionProperties.DATA_FORMAT.apacheavro.toString())) {
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.ApacheAvro);
} else {
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.Csv);
}
}
TopicIngestionProperties topicIngestionProperties = new TopicIngestionProperties();
@ -159,8 +158,7 @@ public class KustoSinkTask extends SinkTask {
result.put(mapping.getString("topic"), topicIngestionProperties);
}
return result;
}
catch (Exception ex) {
} catch (Exception ex) {
throw new ConfigException("Error while parsing kusto ingestion properties.", ex);
}
}
@ -176,31 +174,27 @@ public class KustoSinkTask extends SinkTask {
Client engineClient = createKustoEngineClient(config);
if (config.getTopicToTableMapping() != null) {
JSONArray mappings = new JSONArray(config.getTopicToTableMapping());
if(mappings.length() > 0) {
if(isIngestorRole(mappings.getJSONObject(0), engineClient)) {
for (int i = 0; i < mappings.length(); i++) {
JSONObject mapping = mappings.getJSONObject(i);
validateTableAccess(engineClient, mapping, config, databaseTableErrorList, accessErrorList);
}
if ((mappings.length() > 0) && (isIngestorRole(mappings.getJSONObject(0), engineClient))) {
for (int i = 0; i < mappings.length(); i++) {
JSONObject mapping = mappings.getJSONObject(i);
validateTableAccess(engineClient, mapping, config, databaseTableErrorList, accessErrorList);
}
}
}
String tableAccessErrorMessage = "";
if(!databaseTableErrorList.isEmpty())
{
if (!databaseTableErrorList.isEmpty()) {
tableAccessErrorMessage = "\n\nError occurred while trying to access the following database:table\n" +
String.join("\n",databaseTableErrorList);
String.join("\n", databaseTableErrorList);
}
if(!accessErrorList.isEmpty())
{
if (!accessErrorList.isEmpty()) {
tableAccessErrorMessage = tableAccessErrorMessage + "\n\nUser does not have appropriate permissions " +
"to sink data into the Kusto database:table combination(s). " +
"Verify your Kusto principals and roles before proceeding for the following: \n " +
String.join("\n",accessErrorList);
String.join("\n", accessErrorList);
}
if(!tableAccessErrorMessage.isEmpty()) {
if (!tableAccessErrorMessage.isEmpty()) {
throw new ConnectException(tableAccessErrorMessage);
}
} catch (JSONException e) {
@ -209,12 +203,12 @@ public class KustoSinkTask extends SinkTask {
}
private boolean isIngestorRole(JSONObject testMapping, Client engineClient) throws JSONException {
String database = testMapping.getString("db");
String table = testMapping.getString("table");
String database = testMapping.getString(MAPPING_DB);
String table = testMapping.getString(MAPPING_TABLE);
try {
KustoOperationResult rs = engineClient.execute(database, String.format(FETCH_TABLE_QUERY, table));
} catch(DataServiceException | DataClientException err){
if(err.getCause().getMessage().contains("Forbidden:")){
engineClient.execute(database, String.format(FETCH_TABLE_QUERY, table), clientRequestProperties);
} catch (DataServiceException | DataClientException err) {
if (err.getCause().getMessage().contains("Forbidden:")) {
log.warn("User might have ingestor privileges, table validation will be skipped for all table mappings ");
return false;
}
@ -223,50 +217,44 @@ public class KustoSinkTask extends SinkTask {
}
/**
* This function validates whether the user has the read and write access to the intended table
* before starting to sink records into ADX.
* This function validates whether the user has the read and write access to the intended table
* before starting to sink records into ADX.
*
* @param engineClient Client connection to run queries.
* @param mapping JSON Object containing a Table mapping.
* @param config
* @param mapping JSON Object containing a Table mapping.
* @param config Kusto Sink configuration
*/
private static void validateTableAccess(Client engineClient, JSONObject mapping, KustoSinkConfig config, List<String> databaseTableErrorList, List<String> accessErrorList) throws JSONException {
String database = mapping.getString("db");
String table = mapping.getString("table");
String format = mapping.getString("format");
String mappingName = mapping.getString("mapping");
if (format.equalsIgnoreCase("json") || format.equalsIgnoreCase("singlejson") || format.equalsIgnoreCase("multijson")) {
format = "json";
String database = mapping.getString(MAPPING_DB);
String table = mapping.getString(MAPPING_TABLE);
String format = mapping.getString(MAPPING_FORMAT);
String mappingName = mapping.getString(MAPPING);
if (format.equalsIgnoreCase(JSON_FORMAT) || format.equalsIgnoreCase(SINGLEJSON_FORMAT) || format.equalsIgnoreCase(MULTIJSON_FORMAT)) {
format = JSON_FORMAT;
}
boolean hasAccess = false;
try {
try {
KustoResultSetTable rs = engineClient.execute(database, String.format(FETCH_TABLE_QUERY, table)).getPrimaryResults();
rs.next();
if (rs.getLong(0) >= 0) {
KustoOperationResult rs = engineClient.execute(database, String.format(FETCH_TABLE_QUERY, table), clientRequestProperties);
if (VALIDATION_OK.equals(rs.getPrimaryResults().getData().get(0).get(0))) {
hasAccess = true;
}
} catch (DataServiceException e) {
databaseTableErrorList.add(String.format("Database:%s Table:%s | table not found", database, table));
}
if(hasAccess) {
if (hasAccess) {
try {
KustoOperationResult rp = engineClient.execute(database, String.format(FETCH_TABLE_MAPPING_QUERY, table, format, mappingName));
if (rp.getPrimaryResults().getData().get(0).get(0).toString().equals(mappingName)) {
hasAccess = true;
}
engineClient.execute(database, String.format(FETCH_TABLE_MAPPING_QUERY, table, format, mappingName));
} catch (DataServiceException e) {
hasAccess = false;
databaseTableErrorList.add(String.format("Database:%s Table:%s | %s mapping '%s' not found", database, table, format, mappingName));
}
}
if(hasAccess) {
if (hasAccess) {
String authenticateWith = "aadapp=" + config.getAuthAppid();
String query = String.format(FETCH_PRINCIPAL_ROLES_QUERY, authenticateWith, database, table);
try {
String authenticateWith = "aadapp=" + config.getAuthAppid();
KustoOperationResult rs = engineClient.execute(database, String.format(FETCH_PRINCIPAL_ROLES_QUERY, authenticateWith, database, table));
KustoOperationResult rs = engineClient.execute(database, query);
hasAccess = (boolean) rs.getPrimaryResults().getData().get(0).get(INGESTION_ALLOWED_INDEX);
if (hasAccess) {
log.info("User has appropriate permissions to sink data into the Kusto table={}", table);
@ -277,12 +265,11 @@ public class KustoSinkTask extends SinkTask {
} catch (DataServiceException e) {
// Logging the error so that the trace is not lost.
if (!e.getCause().toString().contains("Forbidden")){
log.error("{}", e);
log.error("Error fetching principal roles with query {}", query, e);
databaseTableErrorList.add(String.format("Database:%s Table:%s", database, table));
} else {
log.warn("Failed to check permissions, will continue the run as the principal might still be able to ingest: {}", e);
}
}
}
} catch (DataClientException e) {
@ -296,11 +283,11 @@ public class KustoSinkTask extends SinkTask {
}
@Override
public void open(Collection<TopicPartition> partitions) throws ConnectException {
public void open(Collection<TopicPartition> partitions) {
assignment.addAll(partitions);
for (TopicPartition tp : assignment) {
TopicIngestionProperties ingestionProps = getIngestionProps(tp.topic());
log.debug(String.format("Open Kusto topic: '%s' with partition: '%s'", tp.topic(), tp.partition()));
log.debug("Open Kusto topic: '{}' with partition: '{}'", tp.topic(), tp.partition());
if (ingestionProps == null) {
throw new ConnectException(String.format("Kusto Sink has no ingestion props mapped " +
"for the topic: %s. please check your configuration.", tp.topic()));
@ -320,17 +307,16 @@ public class KustoSinkTask extends SinkTask {
writers.remove(tp);
assignment.remove(tp);
} catch (ConnectException e) {
log.error("Error closing writer for {}. Error: {}", tp, e);
log.error("Error closing writer for {}.", tp, e);
}
}
}
@Override
public void start(Map<String, String> props) {
config = new KustoSinkConfig(props);
String url = config.getKustoUrl();
validateTableMappings(config);
if (config.isDlqEnabled()) {
isDlqEnabled = true;
@ -348,16 +334,15 @@ public class KustoSinkTask extends SinkTask {
isDlqEnabled = false;
dlqTopicName = null;
}
topicsToIngestionProps = getTopicsToIngestionProps(config);
// this should be read properly from settings
kustoIngestClient = createKustoIngestClient(config);
log.info(String.format("Started KustoSinkTask with target cluster: (%s), source topics: (%s)",
url, topicsToIngestionProps.keySet().toString()));
log.info("Started KustoSinkTask with target cluster: ({}), source topics: ({})", url, topicsToIngestionProps.keySet());
// Adding this check to make code testable
if(context!=null) {
if (context != null) {
open(context.assignment());
}
}
@ -369,7 +354,7 @@ public class KustoSinkTask extends SinkTask {
writer.close();
}
try {
if(kustoIngestClient != null) {
if (kustoIngestClient != null) {
kustoIngestClient.close();
}
} catch (IOException e) {
@ -378,10 +363,10 @@ public class KustoSinkTask extends SinkTask {
}
@Override
public void put(Collection<SinkRecord> records) throws ConnectException {
public void put(Collection<SinkRecord> records) {
SinkRecord lastRecord = null;
for (SinkRecord record : records) {
log.debug("record to topic:" + record.topic());
log.debug("Record to topic: {}", record.topic());
lastRecord = record;
TopicPartition tp = new TopicPartition(record.topic(), record.kafkaPartition());
@ -398,7 +383,7 @@ public class KustoSinkTask extends SinkTask {
}
if (lastRecord != null) {
log.debug("Last record offset:" + lastRecord.kafkaOffset());
log.debug("Last record offset: {}", lastRecord.kafkaOffset());
}
}
@ -410,7 +395,7 @@ public class KustoSinkTask extends SinkTask {
) {
Map<TopicPartition, OffsetAndMetadata> offsetsToCommit = new HashMap<>();
for (TopicPartition tp : assignment) {
if(writers.get(tp) == null) {
if (writers.get(tp) == null) {
throw new ConnectException("Topic Partition not configured properly. " +
"verify your `topics` and `kusto.tables.topics.mapping` configurations");
}
@ -418,7 +403,7 @@ public class KustoSinkTask extends SinkTask {
Long lastCommittedOffset = writers.get(tp).lastCommittedOffset;
if (lastCommittedOffset != null) {
Long offset = lastCommittedOffset + 1L;
long offset = lastCommittedOffset + 1L;
log.debug("Forwarding to framework request to commit offset: {} for {} while the offset is {}", offset, tp, offsets.get(tp));
offsetsToCommit.put(tp, new OffsetAndMetadata(offset));
}
@ -430,6 +415,5 @@ public class KustoSinkTask extends SinkTask {
@Override
public void flush(Map<TopicPartition, OffsetAndMetadata> offsets) {
// do nothing , rolling files can handle writing
}
}
}

Просмотреть файл

@ -1,8 +1,8 @@
package com.microsoft.azure.kusto.kafka.connect.sink;
import com.microsoft.azure.kusto.data.ConnectionStringBuilder;
import com.microsoft.azure.kusto.data.Client;
import com.microsoft.azure.kusto.data.ClientFactory;
import com.microsoft.azure.kusto.data.ConnectionStringBuilder;
import com.microsoft.azure.kusto.data.KustoResultSetTable;
import com.microsoft.azure.kusto.data.exceptions.DataClientException;
import com.microsoft.azure.kusto.data.exceptions.DataServiceException;
@ -25,23 +25,17 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.Properties;
import java.util.*;
import java.util.logging.Logger;
public class E2ETest {
private static final String testPrefix = "tmpKafkaE2ETest";
private String appId = System.getProperty("appId");
private String appKey = System.getProperty("appKey");
private String authority = System.getProperty("authority");
private String cluster = System.getProperty("cluster");
private String database = System.getProperty("database");
private String tableBaseName = System.getProperty("table", testPrefix + UUID.randomUUID().toString().replace('-', '_'));
private static final String appId = System.getProperty("appId");
private static final String appKey = System.getProperty("appKey");
private static final String authority = System.getProperty("authority");
private static final String cluster = System.getProperty("cluster");
private static final String database = System.getProperty("database");
private static final String tableBaseName = System.getProperty("table", testPrefix + UUID.randomUUID().toString().replace('-', '_'));
private String basePath = Paths.get("src/test/resources/", "testE2E").toString();
private Logger log = Logger.getLogger(this.getClass().getName());
private boolean isDlqEnabled;
@ -49,14 +43,14 @@ public class E2ETest {
private Producer<byte[], byte[]> kafkaProducer;
@Before
public void setUp(){
Properties properties = new Properties();
properties.put("bootstrap.servers", "localhost:9000");
properties.put("key.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
properties.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
kafkaProducer = new KafkaProducer<>(properties);
isDlqEnabled = false;
dlqTopicName = null;
public void setUp() {
Properties properties = new Properties();
properties.put("bootstrap.servers", "localhost:9000");
properties.put("key.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
properties.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
kafkaProducer = new KafkaProducer<>(properties);
isDlqEnabled = false;
dlqTopicName = null;
}
@Test
@ -90,14 +84,15 @@ public class E2ETest {
props.ingestionProperties = ingestionProperties;
props.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.csv);
props.ingestionProperties.setIngestionMapping("mappy", IngestionMapping.IngestionMappingKind.Csv);
String KustoUrl = String.format("https://ingest-%s.kusto.windows.net", cluster);
String kustoDmUrl = String.format("https://ingest-%s.kusto.windows.net", cluster);
String kustoEngineUrl = String.format("https://%s.kusto.windows.net", cluster);
String basepath = Paths.get(basePath, "csv").toString();
Map<String, String> settings = getKustoConfigs(KustoUrl, basepath, "mappy", fileThreshold, flushInterval);
KustoSinkConfig config= new KustoSinkConfig(settings);
Map<String, String> settings = getKustoConfigs(kustoDmUrl, kustoEngineUrl, basepath, "mappy", fileThreshold, flushInterval);
KustoSinkConfig config = new KustoSinkConfig(settings);
TopicPartitionWriter writer = new TopicPartitionWriter(tp, ingestClient, props, config, isDlqEnabled, dlqTopicName, kafkaProducer);
writer.open();
List<SinkRecord> records = new ArrayList<SinkRecord>();
List<SinkRecord> records = new ArrayList<>();
records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, Schema.BYTES_SCHEMA, messages[0].getBytes(), 10));
records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, messages[0].getBytes(), 10));
@ -142,15 +137,16 @@ public class E2ETest {
props2.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.avro);
props2.ingestionProperties.setIngestionMapping("avroMapping", IngestionMapping.IngestionMappingKind.Avro);
TopicPartition tp2 = new TopicPartition("testPartition2", 11);
String KustoUrl = String.format("https://ingest-%s.kusto.windows.net", cluster);
String kustoDmUrl = String.format("https://ingest-%s.kusto.windows.net", cluster);
String kustoEngineUrl = String.format("https://%s.kusto.windows.net", cluster);
String basepath = Paths.get(basePath, "avro").toString();
long fileThreshold = 100;
long flushInterval = 300000;
Map<String, String> settings = getKustoConfigs(KustoUrl, basepath, "avri", fileThreshold, flushInterval);
KustoSinkConfig config= new KustoSinkConfig(settings);
Map<String, String> settings = getKustoConfigs(kustoDmUrl, kustoEngineUrl, basepath, "avri", fileThreshold, flushInterval);
KustoSinkConfig config = new KustoSinkConfig(settings);
TopicPartitionWriter writer2 = new TopicPartitionWriter(tp2, ingestClient, props2, config, isDlqEnabled, dlqTopicName, kafkaProducer);
writer2.open();
List<SinkRecord> records2 = new ArrayList<SinkRecord>();
List<SinkRecord> records2 = new ArrayList<>();
FileInputStream fs = new FileInputStream("src/test/resources/data.avro");
byte[] buffer = new byte[1184];
@ -177,10 +173,10 @@ public class E2ETest {
KustoResultSetTable res = engineClient.execute(database, query).getPrimaryResults();
res.next();
Integer timeoutMs = 60 * 6 * 1000;
Integer rowCount = res.getInt(0);
Integer timeElapsedMs = 0;
Integer sleepPeriodMs = 5 * 1000;
int timeoutMs = 60 * 6 * 1000;
int rowCount = res.getInt(0);
int timeElapsedMs = 0;
int sleepPeriodMs = 5 * 1000;
while (rowCount < expectedNumberOfRows && timeElapsedMs < timeoutMs) {
Thread.sleep(sleepPeriodMs);
@ -190,13 +186,14 @@ public class E2ETest {
timeElapsedMs += sleepPeriodMs;
}
Assertions.assertEquals(rowCount, expectedNumberOfRows);
this.log.info("Succesfully ingested " + expectedNumberOfRows + " records.");
this.log.info("Successfully ingested " + expectedNumberOfRows + " records.");
}
private Map<String, String> getKustoConfigs(String clusterUrl, String basePath,String tableMapping, long fileThreshold,
long flushInterval) {
private Map<String, String> getKustoConfigs(String clusterUrl, String engineUrl, String basePath, String tableMapping,
long fileThreshold, long flushInterval) {
Map<String, String> settings = new HashMap<>();
settings.put(KustoSinkConfig.KUSTO_URL_CONF, clusterUrl);
settings.put(KustoSinkConfig.KUSTO_INGEST_URL_CONF, clusterUrl);
settings.put(KustoSinkConfig.KUSTO_ENGINE_URL_CONF, engineUrl);
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, tableMapping);
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, appId);
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, appKey);

Просмотреть файл

@ -8,32 +8,17 @@ import org.apache.commons.io.IOUtils;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.sink.SinkRecord;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.Assert;
import java.io.File;
import java.io.IOException;
import java.io.ByteArrayOutputStream;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.*;
import java.nio.file.Paths;
import java.time.Instant;
import java.util.HashMap;
import java.util.Objects;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.AbstractMap;
import java.util.*;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.function.Consumer;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
public class FileWriterTest {
private File currentDirectory;
@ -68,12 +53,13 @@ public class FileWriterTest {
boolean mkdirs = folder.mkdirs();
Assert.assertTrue(mkdirs);
Assert.assertEquals(Objects.requireNonNull(folder.listFiles()).length, 0);
Assert.assertEquals(0, Objects.requireNonNull(folder.listFiles()).length);
final String FILE_PATH = Paths.get(path, "ABC").toString();
final int MAX_FILE_SIZE = 128;
Consumer<SourceFile> trackFiles = (SourceFile f) -> {};
Consumer<SourceFile> trackFiles = (SourceFile f) -> {
};
Function<Long, String> generateFileName = (Long l) -> FILE_PATH;
@ -82,8 +68,8 @@ public class FileWriterTest {
SinkRecord record = new SinkRecord("topic", 1, null, null, Schema.BYTES_SCHEMA, msg.getBytes(), 10);
fileWriter.initializeRecordWriter(record);
fileWriter.openFile(null);
Assert.assertEquals(Objects.requireNonNull(folder.listFiles()).length, 1);
Assert.assertEquals(fileWriter.currentFile.rawBytes, 0);
Assert.assertEquals(1, Objects.requireNonNull(folder.listFiles()).length);
Assert.assertEquals(0, fileWriter.currentFile.rawBytes);
Assert.assertEquals(fileWriter.currentFile.path, FILE_PATH);
Assert.assertTrue(fileWriter.currentFile.file.canWrite());
@ -97,11 +83,9 @@ public class FileWriterTest {
File folder = new File(path);
boolean mkdirs = folder.mkdirs();
Assert.assertTrue(mkdirs);
Assert.assertEquals(Objects.requireNonNull(folder.listFiles()).length, 0);
Assert.assertEquals(0, Objects.requireNonNull(folder.listFiles()).length);
HashMap<String, Long> files = new HashMap<>();
final int MAX_FILE_SIZE = 100;
Consumer<SourceFile> trackFiles = (SourceFile f) -> files.put(f.path, f.rawBytes);
@ -116,21 +100,21 @@ public class FileWriterTest {
fileWriter.writeData(record1);
}
Assert.assertEquals(files.size(), 4);
Assert.assertEquals(4, files.size());
// should still have 1 open file at this point...
Assert.assertEquals(Objects.requireNonNull(folder.listFiles()).length, 1);
Assert.assertEquals(1, Objects.requireNonNull(folder.listFiles()).length);
// close current file
fileWriter.close();
Assert.assertEquals(files.size(), 5);
Assert.assertEquals(5, files.size());
List<Long> sortedFiles = new ArrayList<>(files.values());
sortedFiles.sort((Long x, Long y) -> (int) (y - x));
Assert.assertEquals(sortedFiles, Arrays.asList((long) 108, (long) 108, (long) 108, (long) 108, (long) 54));
// make sure folder is clear once done
Assert.assertEquals(Objects.requireNonNull(folder.listFiles()).length, 0);
Assert.assertEquals(0, Objects.requireNonNull(folder.listFiles()).length);
}
@Test
@ -157,9 +141,9 @@ public class FileWriterTest {
Thread.sleep(1000);
Assert.assertEquals(files.size(), 0);
Assert.assertEquals(0, files.size());
fileWriter.close();
Assert.assertEquals(files.size(), 1);
Assert.assertEquals(1, files.size());
String path2 = Paths.get(currentDirectory.getPath(), "testGzipFileWriter2_2").toString();
File folder2 = new File(path2);
@ -175,7 +159,7 @@ public class FileWriterTest {
fileWriter2.writeData(record1);
Thread.sleep(1050);
Assert.assertEquals(files.size(), 2);
Assert.assertEquals(2, files.size());
List<Long> sortedFiles = new ArrayList<>(files.values());
sortedFiles.sort((Long x, Long y) -> (int) (y - x));
@ -183,7 +167,7 @@ public class FileWriterTest {
// make sure folder is clear once done
fileWriter2.close();
Assert.assertEquals(Objects.requireNonNull(folder.listFiles()).length, 0);
Assert.assertEquals(0, Objects.requireNonNull(folder.listFiles()).length);
}
@Test
@ -212,7 +196,7 @@ public class FileWriterTest {
boolean mkdirs = folder.mkdirs();
Assert.assertTrue(mkdirs);
Function<Long, String> generateFileName = (Long offset) -> {
if(offset == null){
if (offset == null) {
offset = offsets.currentOffset;
}
return Paths.get(path, Long.toString(offset)).toString();
@ -247,16 +231,19 @@ public class FileWriterTest {
Thread.sleep(510);
// Assertions
Assert.assertEquals(files.size(), 2);
Assert.assertEquals(2, files.size());
// Make sure that the first file is from offset 1 till 2 and second is from 3 till 3
Assert.assertEquals(files.stream().map(Map.Entry::getValue).toArray(Long[]::new), new Long[]{30L, 15L});
Assert.assertEquals(files.stream().map((s)->s.getKey().substring(path.length() + 1)).toArray(String[]::new), new String[]{"1", "3"});
Assert.assertEquals(committedOffsets, new ArrayList<Long>(){{add(2L);add(3L);}});
Assert.assertEquals(new Long[]{30L, 15L}, files.stream().map(Map.Entry::getValue).toArray(Long[]::new));
Assert.assertEquals(new String[]{"1", "3"}, files.stream().map((s) -> s.getKey().substring(path.length() + 1)).toArray(String[]::new));
Assert.assertEquals(committedOffsets, new ArrayList<Long>() {{
add(2L);
add(3L);
}});
// make sure folder is clear once done
fileWriter2.close();
Assert.assertEquals(Objects.requireNonNull(folder.listFiles()).length, 0);
Assert.assertEquals(0, Objects.requireNonNull(folder.listFiles()).length);
}
static Function<SourceFile, String> getAssertFileConsumerFunction(String msg) {
@ -289,14 +276,16 @@ public class FileWriterTest {
}
protected Map<String, String> getProperties() {
Map<String, String> settings = new HashMap<>();
settings.put(KustoSinkConfig.KUSTO_URL_CONF, "xxx");
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "mapping");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
return settings;
Map<String, String> settings = new HashMap<>();
settings.put(KustoSinkConfig.KUSTO_INGEST_URL_CONF, "xxx");
settings.put(KustoSinkConfig.KUSTO_ENGINE_URL_CONF, "xxx");
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "mapping");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
return settings;
}
static Consumer<SourceFile> getAssertFileConsumer(String msg) {
return (SourceFile f) -> {
try (FileInputStream fileInputStream = new FileInputStream(f.file)) {
@ -324,4 +313,4 @@ public class FileWriterTest {
}
};
}
}
}

Просмотреть файл

@ -1,57 +1,35 @@
package com.microsoft.azure.kusto.kafka.connect.sink;
import org.apache.kafka.common.config.ConfigException;
import org.junit.Before;
import org.junit.Test;
import com.microsoft.azure.kusto.kafka.connect.sink.KustoSinkConfig.BehaviorOnError;
import org.apache.kafka.common.config.ConfigException;
import org.junit.Test;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
public class KustoSinkConnectorConfigTest {
Map<String, String> settings;
KustoSinkConfig config;
@Before
public void before() {
settings = new HashMap<>();
config = null;
}
private static final String DM_URL = "https://ingest-cluster_name.kusto.windows.net";
private static final String ENGINE_URL = "https://cluster_name.kusto.windows.net";
@Test
public void shouldAcceptValidConfig() {
// Adding required Configuration with no default value.
settings.put("kusto.tables.topics.mapping","[{'topic': 'xxx','db': 'xxx', 'table': 'xxx','format': 'avro', 'mapping':'avri'}]");
settings.put(KustoSinkConfig.KUSTO_URL_CONF, "kusto-url");
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "mapping");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
config = new KustoSinkConfig(settings);
KustoSinkConfig config = new KustoSinkConfig(setupConfigs());
assertNotNull(config);
}
@Test
public void shouldHaveDefaultValues() {
// Adding required Configuration with no default value.
settings.put("kusto.tables.topics.mapping","[{'topic': 'xxx','db': 'xxx', 'table': 'xxx','format': 'avro', 'mapping':'avri'}]");
settings.put(KustoSinkConfig.KUSTO_URL_CONF, "kusto-url");
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "mapping");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
config = new KustoSinkConfig(settings);
KustoSinkConfig config = new KustoSinkConfig(setupConfigs());
assertNotNull(config.getKustoUrl());
assertNotNull(config.getFlushSizeBytes());
assertNotNull(config.getFlushInterval());
assertNotEquals(0, config.getFlushSizeBytes());
assertNotEquals(0, config.getFlushInterval());
assertFalse(config.isDlqEnabled());
assertEquals(BehaviorOnError.FAIL, config.getBehaviorOnError());
}
@ -59,45 +37,53 @@ public class KustoSinkConnectorConfigTest {
@Test(expected = ConfigException.class)
public void shouldThrowExceptionWhenKustoURLNotGiven() {
// Adding required Configuration with no default value.
settings.remove(KustoSinkConfig.KUSTO_URL_CONF);
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
config = new KustoSinkConfig(settings);
HashMap<String, String> settings = setupConfigs();
settings.remove(KustoSinkConfig.KUSTO_INGEST_URL_CONF);
new KustoSinkConfig(settings);
}
@Test
public void shouldUseDmUrlWhenKustoEngineUrlNotGivenAndCantGuess() {
HashMap<String, String> settings = setupConfigs();
settings.put(KustoSinkConfig.KUSTO_INGEST_URL_CONF, ENGINE_URL);
KustoSinkConfig config = new KustoSinkConfig(settings);
String kustoEngineUrl = config.getKustoEngineUrl();
assertEquals(ENGINE_URL, kustoEngineUrl);
}
@Test
public void shouldUseKustoEngineUrlWhenGiven() {
HashMap<String, String> settings = setupConfigs();
settings.put(KustoSinkConfig.KUSTO_ENGINE_URL_CONF, ENGINE_URL);
KustoSinkConfig config = new KustoSinkConfig(settings);
String kustoEngineUrl = config.getKustoEngineUrl();
assertEquals(ENGINE_URL, kustoEngineUrl);
}
@Test(expected = ConfigException.class)
public void shouldThrowExceptionWhenAppIdNotGiven() {
// Adding required Configuration with no default value.
settings.remove(KustoSinkConfig.KUSTO_URL_CONF);
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
config = new KustoSinkConfig(settings);
HashMap<String, String> settings = setupConfigs();
settings.remove(KustoSinkConfig.KUSTO_AUTH_APPID_CONF);
new KustoSinkConfig(settings);
}
@Test(expected = ConfigException.class)
public void shouldFailWhenBehaviorOnErrorIsIllConfigured() {
// Adding required Configuration with no default value.
settings.put(KustoSinkConfig.KUSTO_URL_CONF, "kusto-url");
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "mapping");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
HashMap<String, String> settings = setupConfigs();
settings.remove(KustoSinkConfig.KUSTO_INGEST_URL_CONF);
settings.put(KustoSinkConfig.KUSTO_BEHAVIOR_ON_ERROR_CONF, "DummyValue");
config = new KustoSinkConfig(settings);
new KustoSinkConfig(settings);
}
@Test
public void verifyDlqSettings() {
settings.put(KustoSinkConfig.KUSTO_URL_CONF, "kusto-url");
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "mapping");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
HashMap<String, String> settings = setupConfigs();
settings.put(KustoSinkConfig.KUSTO_DLQ_BOOTSTRAP_SERVERS_CONF, "localhost:8081,localhost:8082");
settings.put(KustoSinkConfig.KUSTO_DLQ_TOPIC_NAME_CONF, "dlq-error-topic");
config = new KustoSinkConfig(settings);
KustoSinkConfig config = new KustoSinkConfig(settings);
assertTrue(config.isDlqEnabled());
assertEquals(Arrays.asList("localhost:8081", "localhost:8082"), config.getDlqBootstrapServers());
assertEquals("dlq-error-topic", config.getDlqTopicName());
@ -106,16 +92,11 @@ public class KustoSinkConnectorConfigTest {
@Test
public void shouldProcessDlqConfigsWithPrefix() {
// Adding required Configuration with no default value.
settings.put(KustoSinkConfig.KUSTO_URL_CONF, "kusto-url");
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "mapping");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
HashMap<String, String> settings = setupConfigs();
settings.put("misc.deadletterqueue.security.protocol", "SASL_PLAINTEXT");
settings.put("misc.deadletterqueue.sasl.mechanism", "PLAIN");
config = new KustoSinkConfig(settings);
KustoSinkConfig config = new KustoSinkConfig(settings);
assertNotNull(config);
@ -125,4 +106,14 @@ public class KustoSinkConnectorConfigTest {
assertEquals("PLAIN", dlqProps.get("sasl.mechanism"));
}
public static HashMap<String, String> setupConfigs() {
HashMap<String, String> configs = new HashMap<>();
configs.put(KustoSinkConfig.KUSTO_INGEST_URL_CONF, DM_URL);
configs.put(KustoSinkConfig.KUSTO_ENGINE_URL_CONF, ENGINE_URL);
configs.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "[{'topic': 'topic1','db': 'db1', 'table': 'table1','format': 'csv'},{'topic': 'topic2','db': 'db2', 'table': 'table2','format': 'json','mapping': 'Mapping'}]");
configs.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
configs.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
configs.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
return configs;
}
}

Просмотреть файл

@ -17,11 +17,11 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.mockito.Mockito.doNothing;
import static org.mockito.Mockito.spy;
public class KustoSinkTaskTest {
File currentDirectory;
@ -40,19 +40,12 @@ public class KustoSinkTaskTest {
}
@Test
public void testSinkTaskOpen() throws Exception {
HashMap<String, String> props = new HashMap<>();
props.put(KustoSinkConfig.KUSTO_URL_CONF, "https://cluster_name.kusto.windows.net");
props.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "[{'topic': 'topic1','db': 'db1', 'table': 'table1','format': 'csv'},{'topic': 'topic2','db': 'db1', 'table': 'table1','format': 'json','mapping': 'Mapping'}]");
props.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
props.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
props.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
public void testSinkTaskOpen() {
HashMap<String, String> configs = KustoSinkConnectorConfigTest.setupConfigs();
KustoSinkTask kustoSinkTask = new KustoSinkTask();
KustoSinkTask kustoSinkTaskSpy = spy(kustoSinkTask);
doNothing().when(kustoSinkTaskSpy).validateTableMappings(Mockito.<KustoSinkConfig>any());
kustoSinkTaskSpy.start(props);
kustoSinkTaskSpy.start(configs);
ArrayList<TopicPartition> tps = new ArrayList<>();
tps.add(new TopicPartition("topic1", 1));
tps.add(new TopicPartition("topic1", 2));
@ -60,23 +53,16 @@ public class KustoSinkTaskTest {
kustoSinkTaskSpy.open(tps);
assertEquals(kustoSinkTaskSpy.writers.size(), 3);
assertEquals(3, kustoSinkTaskSpy.writers.size());
}
@Test
public void testSinkTaskPutRecord() throws Exception {
HashMap<String, String> props = new HashMap<>();
props.put(KustoSinkConfig.KUSTO_URL_CONF, "https://cluster_name.kusto.windows.net");
props.put(KustoSinkConfig.KUSTO_SINK_TEMP_DIR_CONF, System.getProperty("java.io.tmpdir"));
props.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "[{'topic': 'topic1','db': 'db1', 'table': 'table1','format': 'csv'},{'topic': 'testing1','db': 'db1', 'table': 'table1','format': 'json','mapping': 'Mapping'}]");
props.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
props.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
props.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
public void testSinkTaskPutRecord() {
HashMap<String, String> configs = KustoSinkConnectorConfigTest.setupConfigs();
KustoSinkTask kustoSinkTask = new KustoSinkTask();
KustoSinkTask kustoSinkTaskSpy = spy(kustoSinkTask);
doNothing().when(kustoSinkTaskSpy).validateTableMappings(Mockito.<KustoSinkConfig>any());
kustoSinkTaskSpy.start(props);
kustoSinkTaskSpy.start(configs);
ArrayList<TopicPartition> tps = new ArrayList<>();
TopicPartition tp = new TopicPartition("topic1", 1);
@ -84,68 +70,55 @@ public class KustoSinkTaskTest {
kustoSinkTaskSpy.open(tps);
List<SinkRecord> records = new ArrayList<SinkRecord>();
List<SinkRecord> records = new ArrayList<>();
records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, "stringy message".getBytes(StandardCharsets.UTF_8), 10));
kustoSinkTaskSpy.put(records);
assertEquals(kustoSinkTaskSpy.writers.get(tp).currentOffset, 10);
assertEquals(10, kustoSinkTaskSpy.writers.get(tp).currentOffset);
}
@Test
public void testSinkTaskPutRecordMissingPartition() throws Exception {
HashMap<String, String> props = new HashMap<>();
props.put(KustoSinkConfig.KUSTO_URL_CONF, "https://cluster_name.kusto.windows.net");
props.put(KustoSinkConfig.KUSTO_SINK_TEMP_DIR_CONF, System.getProperty("java.io.tmpdir"));
props.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "[{'topic': 'topic1','db': 'db1', 'table': 'table1','format': 'csv'},{'topic': 'topic2','db': 'db1', 'table': 'table1','format': 'json','mapping': 'Mapping'}]");
props.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
props.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
props.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
public void testSinkTaskPutRecordMissingPartition() {
HashMap<String, String> configs = KustoSinkConnectorConfigTest.setupConfigs();
configs.put(KustoSinkConfig.KUSTO_SINK_TEMP_DIR_CONF, System.getProperty("java.io.tmpdir"));
KustoSinkTask kustoSinkTask = new KustoSinkTask();
KustoSinkTask kustoSinkTaskSpy = spy(kustoSinkTask);
doNothing().when(kustoSinkTaskSpy).validateTableMappings(Mockito.<KustoSinkConfig>any());
kustoSinkTaskSpy.start(props);
kustoSinkTaskSpy.start(configs);
ArrayList<TopicPartition> tps = new ArrayList<>();
tps.add(new TopicPartition("topic1", 1));
kustoSinkTaskSpy.open(tps);
List<SinkRecord> records = new ArrayList<SinkRecord>();
List<SinkRecord> records = new ArrayList<>();
records.add(new SinkRecord("topic2", 1, null, null, null, "stringy message".getBytes(StandardCharsets.UTF_8), 10));
Throwable exception = assertThrows(ConnectException.class, () -> kustoSinkTaskSpy.put(records));
assertEquals(exception.getMessage(), "Received a record without a mapped writer for topic:partition(topic2:1), dropping record.");
assertEquals("Received a record without a mapped writer for topic:partition(topic2:1), dropping record.", exception.getMessage());
}
@Test
public void getTable() {
HashMap<String, String> props = new HashMap<>();
props.put(KustoSinkConfig.KUSTO_URL_CONF, "https://cluster_name.kusto.windows.net");
props.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "[{'topic': 'topic1','db': 'db1', 'table': 'table1','format': 'csv'},{'topic': 'topic2','db': 'db2', 'table': 'table2','format': 'json','mapping': 'Mapping'}]");
props.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
props.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
props.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
HashMap<String, String> configs = KustoSinkConnectorConfigTest.setupConfigs();
KustoSinkTask kustoSinkTask = new KustoSinkTask();
KustoSinkTask kustoSinkTaskSpy = spy(kustoSinkTask);
doNothing().when(kustoSinkTaskSpy).validateTableMappings(Mockito.<KustoSinkConfig>any());
kustoSinkTaskSpy.start(props);
kustoSinkTaskSpy.start(configs);
{
// single table mapping should cause all topics to be mapped to a single table
Assert.assertEquals(kustoSinkTaskSpy.getIngestionProps("topic1").ingestionProperties.getDatabaseName(), "db1");
Assert.assertEquals(kustoSinkTaskSpy.getIngestionProps("topic1").ingestionProperties.getTableName(), "table1");
Assert.assertEquals(kustoSinkTaskSpy.getIngestionProps("topic1").ingestionProperties.getDataFormat(), "csv");
Assert.assertEquals(kustoSinkTaskSpy.getIngestionProps("topic2").ingestionProperties.getDatabaseName(), "db2");
Assert.assertEquals(kustoSinkTaskSpy.getIngestionProps("topic2").ingestionProperties.getTableName(), "table2");
Assert.assertEquals(kustoSinkTaskSpy.getIngestionProps("topic2").ingestionProperties.getDataFormat(), "json");
Assert.assertEquals(kustoSinkTaskSpy.getIngestionProps("topic2").ingestionProperties.getIngestionMapping().getIngestionMappingReference(), "Mapping");
Assert.assertEquals("db1", kustoSinkTaskSpy.getIngestionProps("topic1").ingestionProperties.getDatabaseName());
Assert.assertEquals("table1", kustoSinkTaskSpy.getIngestionProps("topic1").ingestionProperties.getTableName());
Assert.assertEquals("csv", kustoSinkTaskSpy.getIngestionProps("topic1").ingestionProperties.getDataFormat());
Assert.assertEquals("db2", kustoSinkTaskSpy.getIngestionProps("topic2").ingestionProperties.getDatabaseName());
Assert.assertEquals("table2", kustoSinkTaskSpy.getIngestionProps("topic2").ingestionProperties.getTableName());
Assert.assertEquals("json", kustoSinkTaskSpy.getIngestionProps("topic2").ingestionProperties.getDataFormat());
Assert.assertEquals("Mapping", kustoSinkTaskSpy.getIngestionProps("topic2").ingestionProperties.getIngestionMapping().getIngestionMappingReference());
Assert.assertNull(kustoSinkTaskSpy.getIngestionProps("topic3"));
}
}
}
}

Просмотреть файл

@ -11,10 +11,10 @@ import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.sink.SinkRecord;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mockito.ArgumentCaptor;
import org.junit.Assert;
import java.io.ByteArrayOutputStream;
import java.io.File;
@ -22,18 +22,15 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Paths;
import java.time.Instant;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.*;
import static org.mockito.Mockito.*;
public class TopicPartitionWriterTest {
// TODO: should probably find a better way to mock internal class (FileWriter)...
private File currentDirectory;
private static final String KUSTO_CLUSTER_URL = "https://ingest-cluster.kusto.windows.net";
private static final String KUSTO_INGEST_CLUSTER_URL = "https://ingest-cluster.kusto.windows.net";
private static final String KUSTO_CLUSTER_URL = "https://cluster.kusto.windows.net";
private static final String DATABASE = "testdb1";
private static final String TABLE = "testtable1";
private boolean isDlqEnabled;
@ -76,13 +73,13 @@ public class TopicPartitionWriterTest {
TopicIngestionProperties props = new TopicIngestionProperties();
props.ingestionProperties = ingestionProperties;
Map<String, String> settings = getKustoConfigs(basePath, fileThreshold, flushInterval);
KustoSinkConfig config= new KustoSinkConfig(settings);
KustoSinkConfig config = new KustoSinkConfig(settings);
TopicPartitionWriter writer = new TopicPartitionWriter(tp, mockedClient, props, config, isDlqEnabled, dlqTopicName, kafkaProducer);
SourceFile descriptor = new SourceFile();
descriptor.rawBytes = 1024;
descriptor.path = "somepath/somefile";
descriptor.file = new File ("C://myfile.txt");
descriptor.file = new File("C://myfile.txt");
writer.handleRollFile(descriptor);
ArgumentCaptor<FileSourceInfo> fileSourceInfoArgument = ArgumentCaptor.forClass(FileSourceInfo.class);
@ -96,7 +93,7 @@ public class TopicPartitionWriterTest {
Assert.assertEquals(fileSourceInfoArgument.getValue().getFilePath(), descriptor.path);
Assert.assertEquals(TABLE, ingestionPropertiesArgumentCaptor.getValue().getTableName());
Assert.assertEquals(DATABASE, ingestionPropertiesArgumentCaptor.getValue().getDatabaseName());
Assert.assertEquals(fileSourceInfoArgument.getValue().getRawSizeInBytes(), 1024);
Assert.assertEquals(1024, fileSourceInfoArgument.getValue().getRawSizeInBytes());
}
@Test
@ -111,7 +108,7 @@ public class TopicPartitionWriterTest {
props.ingestionProperties = new IngestionProperties(DATABASE, TABLE);
props.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.csv);
Map<String, String> settings = getKustoConfigs(basePath, fileThreshold, flushInterval);
KustoSinkConfig config= new KustoSinkConfig(settings);
KustoSinkConfig config = new KustoSinkConfig(settings);
TopicPartitionWriter writer = new TopicPartitionWriter(tp, mockClient, props, config, isDlqEnabled, dlqTopicName, kafkaProducer);
Assert.assertEquals(writer.getFilePath(null), Paths.get(config.getTempDirPath(), "kafka_testTopic_11_0.csv.gz").toString());
@ -128,7 +125,7 @@ public class TopicPartitionWriterTest {
props.ingestionProperties = new IngestionProperties(DATABASE, TABLE);
props.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.csv);
Map<String, String> settings = getKustoConfigs(basePath, fileThreshold, flushInterval);
KustoSinkConfig config= new KustoSinkConfig(settings);
KustoSinkConfig config = new KustoSinkConfig(settings);
TopicPartitionWriter writer = new TopicPartitionWriter(tp, mockClient, props, config, isDlqEnabled, dlqTopicName, kafkaProducer);
writer.open();
List<SinkRecord> records = new ArrayList<>();
@ -155,7 +152,7 @@ public class TopicPartitionWriterTest {
props.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.csv);
Map<String, String> settings = getKustoConfigs(basePath, fileThreshold, flushInterval);
KustoSinkConfig config= new KustoSinkConfig(settings);
KustoSinkConfig config = new KustoSinkConfig(settings);
TopicPartitionWriter writer = new TopicPartitionWriter(tp, mockClient, props, config, isDlqEnabled, dlqTopicName, kafkaProducer);
writer.open();
writer.close();
@ -188,7 +185,7 @@ public class TopicPartitionWriterTest {
}
@Test
public void testWriteStringyValuesAndOffset() throws Exception {
public void testWriteStringyValuesAndOffset() {
TopicPartition tp = new TopicPartition("testTopic", 2);
IngestClient mockClient = mock(IngestClient.class);
String basePath = Paths.get(currentDirectory.getPath(), "testWriteStringyValuesAndOffset").toString();
@ -199,11 +196,11 @@ public class TopicPartitionWriterTest {
props.ingestionProperties = new IngestionProperties(DATABASE, TABLE);
props.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.csv);
Map<String, String> settings = getKustoConfigs(basePath, fileThreshold, flushInterval);
KustoSinkConfig config= new KustoSinkConfig(settings);
KustoSinkConfig config = new KustoSinkConfig(settings);
TopicPartitionWriter writer = new TopicPartitionWriter(tp, mockClient, props, config, isDlqEnabled, dlqTopicName, kafkaProducer);
writer.open();
List<SinkRecord> records = new ArrayList<SinkRecord>();
List<SinkRecord> records = new ArrayList<>();
records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, Schema.STRING_SCHEMA, "another,stringy,message", 3));
records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, Schema.STRING_SCHEMA, "{'also':'stringy','sortof':'message'}", 4));
@ -221,7 +218,7 @@ public class TopicPartitionWriterTest {
TopicPartition tp = new TopicPartition("testPartition", 11);
IngestClient mockClient = mock(IngestClient.class);
String basePath = Paths.get(currentDirectory.getPath(), "testWriteStringyValuesAndOffset").toString();
String[] messages = new String[]{ "stringy message", "another,stringy,message", "{'also':'stringy','sortof':'message'}"};
String[] messages = new String[]{"stringy message", "another,stringy,message", "{'also':'stringy','sortof':'message'}"};
// Expect to finish file after writing forth message cause of fileThreshold
long fileThreshold = messages[0].length() + messages[1].length() + messages[2].length() + messages[2].length() - 1;
@ -230,11 +227,11 @@ public class TopicPartitionWriterTest {
props.ingestionProperties = new IngestionProperties(DATABASE, TABLE);
props.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.csv);
Map<String, String> settings = getKustoConfigs(basePath, fileThreshold, flushInterval);
KustoSinkConfig config= new KustoSinkConfig(settings);
KustoSinkConfig config = new KustoSinkConfig(settings);
TopicPartitionWriter writer = new TopicPartitionWriter(tp, mockClient, props, config, isDlqEnabled, dlqTopicName, kafkaProducer);
writer.open();
List<SinkRecord> records = new ArrayList<SinkRecord>();
List<SinkRecord> records = new ArrayList<>();
records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, Schema.STRING_SCHEMA, messages[0], 10));
records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, Schema.STRING_SCHEMA, messages[1], 13));
records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, Schema.STRING_SCHEMA, messages[2], 14));
@ -245,8 +242,8 @@ public class TopicPartitionWriterTest {
writer.writeRecord(record);
}
Assert.assertEquals((long) writer.lastCommittedOffset, (long) 15);
Assert.assertEquals(writer.currentOffset, 16);
Assert.assertEquals(15, (long) writer.lastCommittedOffset);
Assert.assertEquals(16, writer.currentOffset);
String currentFileName = writer.fileWriter.currentFile.path;
Assert.assertEquals(currentFileName, Paths.get(config.getTempDirPath(), String.format("kafka_%s_%d_%d.%s.gz", tp.topic(), tp.partition(), 15, IngestionProperties.DATA_FORMAT.csv.name())).toString());
@ -277,19 +274,19 @@ public class TopicPartitionWriterTest {
props.ingestionProperties = new IngestionProperties(DATABASE, TABLE);
props.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.avro);
Map<String, String> settings = getKustoConfigs(basePath, fileThreshold, flushInterval);
KustoSinkConfig config= new KustoSinkConfig(settings);
KustoSinkConfig config = new KustoSinkConfig(settings);
TopicPartitionWriter writer = new TopicPartitionWriter(tp, mockClient, props, config, isDlqEnabled, dlqTopicName, kafkaProducer);
writer.open();
List<SinkRecord> records = new ArrayList<SinkRecord>();
List<SinkRecord> records = new ArrayList<>();
records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, Schema.BYTES_SCHEMA, o.toByteArray(), 10));
for (SinkRecord record : records) {
writer.writeRecord(record);
}
Assert.assertEquals((long) writer.lastCommittedOffset, (long) 10);
Assert.assertEquals(writer.currentOffset, 10);
Assert.assertEquals(10, (long) writer.lastCommittedOffset);
Assert.assertEquals(10, writer.currentOffset);
String currentFileName = writer.fileWriter.currentFile.path;
@ -300,7 +297,8 @@ public class TopicPartitionWriterTest {
private Map<String, String> getKustoConfigs(String basePath, long fileThreshold,
long flushInterval) {
Map<String, String> settings = new HashMap<>();
settings.put(KustoSinkConfig.KUSTO_URL_CONF, KUSTO_CLUSTER_URL);
settings.put(KustoSinkConfig.KUSTO_INGEST_URL_CONF, KUSTO_INGEST_CLUSTER_URL);
settings.put(KustoSinkConfig.KUSTO_ENGINE_URL_CONF, KUSTO_CLUSTER_URL);
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "mapping");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");