зеркало из
1
0
Форкнуть 0

Merge branch 'master' into upgradeToJunit5

This commit is contained in:
Yihezkel Schoenbrun 2020-10-27 01:32:53 +02:00 коммит произвёл GitHub
Родитель 583836f4d6 3218b7b25c
Коммит 326eefe04b
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
9 изменённых файлов: 242 добавлений и 283 удалений

Просмотреть файл

@ -57,7 +57,7 @@ Integration mode to Azure Data Explorer is batched, queued ingestion leveraging
### 3.3. Configurable retries
- The connector supports retries for transient errors with the ability to provide parameters for the same
- The connector supports retries for transient errors with the ability to provide relevant parameters
- and retries with exponential backoff
### 3.4. Serialization formats
@ -193,7 +193,7 @@ KafkaClient {
<br>
**4. Configs to add to the Docker image:**<br>
This is covered in detail further on. It is specified here for the purpose of completenes of defining what goes onto the worker config.<br>
This is covered in detail further on. It is specified here for the purpose of completeness of defining what goes onto the worker config.<br>
```
COPY krb5.conf /etc/krb5.conf
COPY hdi-esp-jaas.conf /etc/hdi-esp-jaas.conf
@ -219,39 +219,40 @@ The following is complete set of connector sink properties-
| :--- | :--- | :--- | :--- |
| 1 | connector.class | Classname of the Kusto sink | Hard code to ``` com.microsoft.azure.kusto.kafka.connect.sink.KustoSinkConnector ```<br>*Required* |
| 2 | topics | Kafka topic specification | List of topics separated by commas<br>*Required* |
| 3 | kusto.url | Kusto ingest cluster specification | Provide the ingest URI of your ADX cluster<br>Use the following construct for the private URL - https://ingest-private-[cluster].kusto.windows.net<br>*Required* |
| 4 | aad.auth.authority | Credentials for Kusto | Provide the tenant ID of your Azure Active Directory<br>*Required* |
| 5 | aad.auth.appid | Credentials for Kusto | Provide Azure Active Directory Service Principal Name<br>*Required* |
| 6 | aad.auth.appkey | Credentials for Kusto | Provide Azure Active Directory Service Principal secret<br>*Required* |
| 7 | kusto.tables.topics.mapping | Mapping of topics to tables | Provide 1..many topic-table comma-separated mappings as follows-<br>[{'topic': '\<topicName1\>','db': '\<datebaseName\>', 'table': '\<tableName\>','format': '<format-e.g.avro/csv/json>', 'mapping':'\<tableMappingName\>'}]<br>*Required* |
| 8 | key.converter | Deserialization | One of the below supported-<br>org.apache.kafka.connect.storage.StringConverter<br> org.apache.kafka.connect.json.JsonConverter<br>io.confluent.connect.avro.AvroConverter<br>io.confluent.connect.json.JsonSchemaConverter<br> org.apache.kafka.connect.converters.ByteArrayConverter<br><br>*Required* |
| 9 | value.converter | Deserialization | One of the below supported-<br>org.apache.kafka.connect.storage.StringConverter<br> org.apache.kafka.connect.json.JsonConverter<br>io.confluent.connect.avro.AvroConverter<br>io.confluent.connect.json.JsonSchemaConverter<br> org.apache.kafka.connect.converters.ByteArrayConverter<br><br>*Required* |
| 10 | value.converter.schema.registry.url | Schema validation | URI of the Kafka schema registry<br>*Optional* |
| 11 | value.converter.schemas.enable | Schema validation | Set to true if you have embedded schema with payload but are not leveraging the schema registry<br>Applicable for avro and json<br><br>*Optional* |
| 12 | tasks.max | connector parallelism | Specify the number of connector copy/sink tasks<br>*Required* |
| 13 | flush.size.bytes | Performance knob for batching | Maximum bufer byte size per topic+partition combination that in combination with flush.interval.ms (whichever is reached first) should result in sinking to Kusto<br>*Default - 1 MB*<br>*Required* |
| 14 | flush.interval.ms | Performance knob for batching | Minimum time interval per topic+partition combo that in combination with flush.size.bytes (whichever is reached first) should result in sinking to Kusto<br>*Default - 300 ms*<br>*Required* |
| 15 | tempdir.path | Local directory path on Kafka Connect worker to buffer files to before shipping to Kusto | Default is value returned by ```System.getProperty("java.io.tmpdir")``` with a GUID attached to it<br><br>*Optional* |
| 16 | behavior.on.error | Configurable behavior in response to errors encountered | Possible values - log, ignore, fail<br><br>log - log the error, send record to dead letter queue, and continue processing<br>ignore - log the error, send record to dead letter queue, proceed with processing despite errors encountered<br>fail - shut down connector task upon encountering<br><br>*Default - fail*<br>*Optional* |
| 17 | errors.retry.max.time.ms | Configurable retries for transient errors | Period of time in milliseconds to retry for transient errors<br><br>*Default - 300 ms*<br>*Optional* |
| 18 | errors.retry.backoff.time.ms | Configurable retries for transient errors | Period of time in milliseconds to backoff before retry for transient errors<br><br>*Default - 10 ms*<br>*Optional* |
| 19 | errors.deadletterqueue.bootstrap.servers | Channel to write records that failed deserialization | CSV or kafkaBroker:port <br>*Optional* |
| 20 | errors.deadletterqueue.topic.name | Channel to write records that failed deserialization | Pre-created topic name <br>*Optional* |
| 21 | errors.deadletterqueue.security.protocol | Channel to write records that failed deserialization | Securitry protocol of secure Kafka cluster <br>*Optional but when feature is used with secure cluster, is required* |
| 22 | errors.deadletterqueue.sasl.mechanism | Channel to write records that failed deserialization | SASL mechanism of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 23 | errors.deadletterqueue.sasl.jaas.config | Channel to write records that failed deserialization | JAAS config of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 24 | misc.deadletterqueue.bootstrap.servers | Channel to write records that due to reasons other than deserialization | CSV of kafkaBroker:port <br>*Optional* |
| 25 | misc.deadletterqueue.topic.name | Channel to write records that due to reasons other than deserialization | Pre-created topic name <br>*Optional* |
| 26 | misc.deadletterqueue.security.protocol | Channel to write records that due to reasons other than deserialization | Securitry protocol of secure Kafka cluster <br>*Optional but when feature is used with secure cluster, is required* |
| 27 | misc.deadletterqueue.sasl.mechanism | Channel to write records that due to reasons other than deserialization | SASL mechanism of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 28 | misc.deadletterqueue.sasl.jaas.config | Channel to write records that due to reasons other than deserialization | JAAS config of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 29 | consumer.override.bootstrap.servers | Security details explicitly required for secure Kafka clusters | Bootstrap server:port CSV of secure Kafka cluster <br>*Required for secure Kafka clusters* |
| 30 | consumer.override.security.protocol | Security details explicitly required for secure Kafka clusters | Security protocol of secure Kafka cluster <br>*Required for secure Kafka clusters* |
| 31 | consumer.override.sasl.mechanism | Security details explicitly required for secure Kafka clusters | SASL mechanism of secure Kafka cluster<br>*Required for secure Kafka clusters* |
| 32 | consumer.override.sasl.jaas.config | Security details explicitly required for secure Kafka clusters | JAAS config of secure Kafka cluster<br>*Required for secure Kafka clusters* |
| 33 | consumer.override.sasl.kerberos.service.name | Security details explicitly required for secure Kafka clusters, specifically kerberized Kafka | Kerberos service name of kerberized Kafka cluster<br>*Required for kerberized Kafka clusters* |
| 34 | consumer.override.auto.offset.reset | Configurable consuming from offset | Possible values are - earliest or latest<br>*Optional* |
| 35 | consumer.override.max.poll.interval.ms| Config to prevent duplication | Set to a value to avoid consumer leaving the group while the Connector is retrying <br>*Optional* |
| 3 | kusto.ingestion.url | Kusto ingestion endpoint URL | Provide the ingest URL of your ADX cluster<br>Use the following construct for the private URL - https://ingest-private-[cluster].kusto.windows.net<br>*Required* |
| 4 | kusto.query.url | Kusto query endpoint URL | Provide the engine URL of your ADX cluster<br>*Optional* |
| 5 | aad.auth.authority | Credentials for Kusto | Provide the tenant ID of your Azure Active Directory<br>*Required* |
| 6 | aad.auth.appid | Credentials for Kusto | Provide Azure Active Directory Service Principal Name<br>*Required* |
| 7 | aad.auth.appkey | Credentials for Kusto | Provide Azure Active Directory Service Principal secret<br>*Required* |
| 8 | kusto.tables.topics.mapping | Mapping of topics to tables | Provide 1..many topic-table comma-separated mappings as follows-<br>[{'topic': '\<topicName1\>','db': '\<datebaseName\>', 'table': '\<tableName\>','format': '<format-e.g.avro/csv/json>', 'mapping':'\<tableMappingName\>'}]<br>*Required* |
| 9 | key.converter | Deserialization | One of the below supported-<br>org.apache.kafka.connect.storage.StringConverter<br> org.apache.kafka.connect.json.JsonConverter<br>io.confluent.connect.avro.AvroConverter<br>io.confluent.connect.json.JsonSchemaConverter<br> org.apache.kafka.connect.converters.ByteArrayConverter<br><br>*Required* |
| 10 | value.converter | Deserialization | One of the below supported-<br>org.apache.kafka.connect.storage.StringConverter<br> org.apache.kafka.connect.json.JsonConverter<br>io.confluent.connect.avro.AvroConverter<br>io.confluent.connect.json.JsonSchemaConverter<br> org.apache.kafka.connect.converters.ByteArrayConverter<br><br>*Required* |
| 11 | value.converter.schema.registry.url | Schema validation | URI of the Kafka schema registry<br>*Optional* |
| 12 | value.converter.schemas.enable | Schema validation | Set to true if you have embedded schema with payload but are not leveraging the schema registry<br>Applicable for avro and json<br><br>*Optional* |
| 13 | tasks.max | connector parallelism | Specify the number of connector copy/sink tasks<br>*Required* |
| 14 | flush.size.bytes | Performance knob for batching | Maximum bufer byte size per topic+partition combination that in combination with flush.interval.ms (whichever is reached first) should result in sinking to Kusto<br>*Default - 1 MB*<br>*Required* |
| 15 | flush.interval.ms | Performance knob for batching | Minimum time interval per topic+partition combo that in combination with flush.size.bytes (whichever is reached first) should result in sinking to Kusto<br>*Default - 300 ms*<br>*Required* |
| 16 | tempdir.path | Local directory path on Kafka Connect worker to buffer files to before shipping to Kusto | Default is value returned by ```System.getProperty("java.io.tmpdir")``` with a GUID attached to it<br><br>*Optional* |
| 17 | behavior.on.error | Configurable behavior in response to errors encountered | Possible values - log, ignore, fail<br><br>log - log the error, send record to dead letter queue, and continue processing<br>ignore - log the error, send record to dead letter queue, proceed with processing despite errors encountered<br>fail - shut down connector task upon encountering<br><br>*Default - fail*<br>*Optional* |
| 18 | errors.retry.max.time.ms | Configurable retries for transient errors | Period of time in milliseconds to retry for transient errors<br><br>*Default - 300 ms*<br>*Optional* |
| 19 | errors.retry.backoff.time.ms | Configurable retries for transient errors | Period of time in milliseconds to backoff before retry for transient errors<br><br>*Default - 10 ms*<br>*Optional* |
| 20 | errors.deadletterqueue.bootstrap.servers | Channel to write records that failed deserialization | CSV or kafkaBroker:port <br>*Optional* |
| 21 | errors.deadletterqueue.topic.name | Channel to write records that failed deserialization | Pre-created topic name <br>*Optional* |
| 22 | errors.deadletterqueue.security.protocol | Channel to write records that failed deserialization | Securitry protocol of secure Kafka cluster <br>*Optional but when feature is used with secure cluster, is required* |
| 23 | errors.deadletterqueue.sasl.mechanism | Channel to write records that failed deserialization | SASL mechanism of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 24 | errors.deadletterqueue.sasl.jaas.config | Channel to write records that failed deserialization | JAAS config of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 25 | misc.deadletterqueue.bootstrap.servers | Channel to write records that due to reasons other than deserialization | CSV of kafkaBroker:port <br>*Optional* |
| 26 | misc.deadletterqueue.topic.name | Channel to write records that due to reasons other than deserialization | Pre-created topic name <br>*Optional* |
| 27 | misc.deadletterqueue.security.protocol | Channel to write records that due to reasons other than deserialization | Securitry protocol of secure Kafka cluster <br>*Optional but when feature is used with secure cluster, is required* |
| 28 | misc.deadletterqueue.sasl.mechanism | Channel to write records that due to reasons other than deserialization | SASL mechanism of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 29 | misc.deadletterqueue.sasl.jaas.config | Channel to write records that due to reasons other than deserialization | JAAS config of secure Kafka cluster<br>*Optional but when feature is used with secure cluster, is required* |
| 30 | consumer.override.bootstrap.servers | Security details explicitly required for secure Kafka clusters | Bootstrap server:port CSV of secure Kafka cluster <br>*Required for secure Kafka clusters* |
| 31 | consumer.override.security.protocol | Security details explicitly required for secure Kafka clusters | Security protocol of secure Kafka cluster <br>*Required for secure Kafka clusters* |
| 32 | consumer.override.sasl.mechanism | Security details explicitly required for secure Kafka clusters | SASL mechanism of secure Kafka cluster<br>*Required for secure Kafka clusters* |
| 33 | consumer.override.sasl.jaas.config | Security details explicitly required for secure Kafka clusters | JAAS config of secure Kafka cluster<br>*Required for secure Kafka clusters* |
| 34 | consumer.override.sasl.kerberos.service.name | Security details explicitly required for secure Kafka clusters, specifically kerberized Kafka | Kerberos service name of kerberized Kafka cluster<br>*Required for kerberized Kafka clusters* |
| 35 | consumer.override.auto.offset.reset | Configurable consuming from offset | Possible values are - earliest or latest<br>*Optional* |
| 36 | consumer.override.max.poll.interval.ms| Config to prevent duplication | Set to a value to avoid consumer leaving the group while the Connector is retrying <br>*Optional* |
<hr>
@ -272,12 +273,12 @@ The following is the roadmap-<br>
Kafka Connect connectors can be deployed in standalone mode (just for development) or in distributed mode (production).<br>
### 7.1. Standalone Kafka Connect deployment mode
This involves having the connector plugin jar in /usr/share/java of a Kafka Connect worker, reference to the same plugin path in connnect-standalone.properties, and launching of the connector from command line. This is not scalable, not fault tolerant, and is not recommeded for production.
This involves having the connector plugin jar in /usr/share/java of a Kafka Connect worker, reference to the same plugin path in connect-standalone.properties, and launching of the connector from command line. This is not scalable, not fault tolerant, and is not recommeded for production.
### 7.2. Distributed Kafka Connect deployment mode
Distributed Kafka Connect essentially involves creation of a KafkaConnect worker cluster as shown in the diagram below.<br>
- Azure Kubernetes Service is a great infrastructure for the connect cluster, due to its managed and scalabale nature
- Kubernetes is a great platform for the connect cluster, due to its scalabale nature and self-healing
- Kubernetes is a great platform for the connect cluster, due to its scalable nature and self-healing
- Each orange polygon is a Kafka Connect worker and each green polygon is a sink connector instance
- A Kafka Connect worker can have 1..many task instances which helps with scale
- When a Kafka Connect worker is maxed out from a resource perspective (CPU, RAM), you can scale horizontally, add more Kafka Connect workers, ands tasks within them

Просмотреть файл

@ -3,7 +3,8 @@ connector.class=com.microsoft.azure.kusto.kafka.connect.sink.KustoSinkConnector
tasks.max=1
#topics=testing1,testing2
#kusto.url=https://ingest-{cluster}.kusto.windows.net/
#kusto.ingestion.url=https://ingest-{cluster}.kusto.windows.net/
#kusto.query.url=https://{cluster}.kusto.windows.net/
#kusto.tables.topics.mapping=[{'topic': 'testing1','db': 'test_db', 'table': 'test_table_1','format': 'json', 'mapping':'JsonMapping'},{'topic': 'testing2','db': 'test_db', 'table': 'test_table_2','format': 'csv', 'mapping':'CsvMapping'}]

Просмотреть файл

@ -18,7 +18,6 @@ import java.util.Properties;
import java.util.concurrent.TimeUnit;
public class KustoSinkConfig extends AbstractConfig {
private static final Logger log = LoggerFactory.getLogger(KustoSinkConfig.class);
private static final String DLQ_PROPS_PREFIX = "misc.deadletterqueue.";
@ -38,9 +37,13 @@ public class KustoSinkConfig extends AbstractConfig {
}
// TODO: this might need to be per kusto cluster...
static final String KUSTO_URL_CONF = "kusto.url";
private static final String KUSTO_URL_DOC = "Kusto ingestion service URI.";
private static final String KUSTO_URL_DISPLAY = "Kusto cluster ingestion URI";
static final String KUSTO_INGEST_URL_CONF = "kusto.ingestion.url";
private static final String KUSTO_INGEST_URL_DOC = "Kusto ingestion endpoint URL.";
private static final String KUSTO_INGEST_URL_DISPLAY = "Kusto cluster ingestion URL";
static final String KUSTO_ENGINE_URL_CONF = "kusto.query.url";
private static final String KUSTO_ENGINE_URL_DOC = "Kusto query endpoint URL.";
private static final String KUSTO_ENGINE_URL_DISPLAY = "Kusto cluster query URL";
static final String KUSTO_AUTH_APPID_CONF = "aad.auth.appid";
private static final String KUSTO_AUTH_APPID_DOC = "Application Id for Azure Active Directory authentication.";
@ -93,7 +96,7 @@ public class KustoSinkConfig extends AbstractConfig {
static final String KUSTO_DLQ_BOOTSTRAP_SERVERS_CONF = "misc.deadletterqueue.bootstrap.servers";
private static final String KUSTO_DLQ_BOOTSTRAP_SERVERS_DOC = "Configure this list to Kafka broker's address(es) "
+ "to which the Connector should write records failed due to restrictions while writing to the file in `tempdir.path`, network interruptions or unavailability of Kusto cluster. "
+ "This list should be in the form host-1:port-1,host-2:port-2,…host-n:port-n. ";
+ "This list should be in the form host-1:port-1,host-2:port-2,…host-n:port-n.";
private static final String KUSTO_DLQ_BOOTSTRAP_SERVERS_DISPLAY = "Miscellaneous Dead-Letter Queue Bootstrap Servers";
static final String KUSTO_DLQ_TOPIC_NAME_CONF = "misc.deadletterqueue.topic.name";
@ -102,7 +105,7 @@ public class KustoSinkConfig extends AbstractConfig {
private static final String KUSTO_DLQ_TOPIC_NAME_DISPLAY = "Miscellaneous Dead-Letter Queue Topic Name";
static final String KUSTO_SINK_MAX_RETRY_TIME_MS_CONF = "errors.retry.max.time.ms";
private static final String KUSTO_SINK_MAX_RETRY_TIME_MS_DOC = "Maximum time upto which the Connector "
private static final String KUSTO_SINK_MAX_RETRY_TIME_MS_DOC = "Maximum time up to which the Connector "
+ "should retry writing records to Kusto table in case of failures.";
private static final String KUSTO_SINK_MAX_RETRY_TIME_MS_DISPLAY = "Errors Maximum Retry Time";
@ -120,18 +123,16 @@ public class KustoSinkConfig extends AbstractConfig {
}
public static ConfigDef getConfig() {
ConfigDef result = new ConfigDef();
defineConnectionConfigs(result);
defineWriteConfigs(result);
defineErrorHandlingAndRetriesConfgis(result);
defineErrorHandlingAndRetriesConfigs(result);
return result;
}
private static void defineErrorHandlingAndRetriesConfgis(ConfigDef result) {
private static void defineErrorHandlingAndRetriesConfigs(ConfigDef result) {
final String errorAndRetriesGroupName = "Error Handling and Retries";
int errorAndRetriesGroupOrder = 0;
@ -193,7 +194,6 @@ public class KustoSinkConfig extends AbstractConfig {
}
private static void defineWriteConfigs(ConfigDef result) {
final String writeGroupName = "Writes";
int writeGroupOrder = 0;
@ -243,21 +243,30 @@ public class KustoSinkConfig extends AbstractConfig {
}
private static void defineConnectionConfigs(ConfigDef result) {
final String connectionGroupName = "Connection";
int connectionGroupOrder = 0;
result
.define(
KUSTO_URL_CONF,
KUSTO_INGEST_URL_CONF,
Type.STRING,
ConfigDef.NO_DEFAULT_VALUE,
Importance.HIGH,
KUSTO_URL_DOC,
KUSTO_INGEST_URL_DOC,
connectionGroupName,
connectionGroupOrder++,
Width.MEDIUM,
KUSTO_URL_DISPLAY)
KUSTO_INGEST_URL_DISPLAY)
.define(
KUSTO_ENGINE_URL_CONF,
Type.STRING,
ConfigDef.NO_DEFAULT_VALUE,
Importance.LOW,
KUSTO_ENGINE_URL_DOC,
connectionGroupName,
connectionGroupOrder++,
Width.MEDIUM,
KUSTO_ENGINE_URL_DISPLAY)
.define(
KUSTO_AUTH_APPKEY_CONF,
Type.PASSWORD,
@ -291,7 +300,11 @@ public class KustoSinkConfig extends AbstractConfig {
}
public String getKustoUrl() {
return this.getString(KUSTO_URL_CONF);
return this.getString(KUSTO_INGEST_URL_CONF);
}
public String getKustoEngineUrl() {
return this.getString(KUSTO_ENGINE_URL_CONF);
}
public String getAuthAppid() {
@ -368,5 +381,4 @@ public class KustoSinkConfig extends AbstractConfig {
public static void main(String[] args) {
System.out.println(getConfig().toEnrichedRst());
}
}

Просмотреть файл

@ -5,9 +5,9 @@ import com.microsoft.azure.kusto.data.*;
import com.microsoft.azure.kusto.data.exceptions.DataClientException;
import com.microsoft.azure.kusto.data.exceptions.DataServiceException;
import com.microsoft.azure.kusto.ingest.IngestClient;
import com.microsoft.azure.kusto.ingest.IngestClientFactory;
import com.microsoft.azure.kusto.ingest.IngestionMapping;
import com.microsoft.azure.kusto.ingest.IngestionProperties;
import com.microsoft.azure.kusto.ingest.IngestClientFactory;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
@ -24,14 +24,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Properties;
import java.util.*;
/**
@ -40,26 +33,36 @@ import java.util.Properties;
* Currently only ingested files are "committed" in the sense that we can advance the offset according to it.
*/
public class KustoSinkTask extends SinkTask {
private static final Logger log = LoggerFactory.getLogger(KustoSinkTask.class);
static final String FETCH_TABLE_QUERY = "%s|count";
static final String FETCH_TABLE_MAPPING_QUERY = ".show table %s ingestion %s mapping '%s'";
static final String FETCH_PRINCIPAL_ROLES_QUERY = ".show principal access with (principal = '%s', accesstype='ingest',database='%s',table='%s')";
static final int INGESTION_ALLOWED_INDEX = 3;
public static final String FETCH_TABLE_QUERY = "%s | count";
public static final String FETCH_TABLE_MAPPING_QUERY = ".show table %s ingestion %s mapping '%s'";
public static final String FETCH_PRINCIPAL_ROLES_QUERY = ".show principal access with (principal = '%s', accesstype='ingest',database='%s',table='%s')";
public static final int INGESTION_ALLOWED_INDEX = 3;
public static final String MAPPING = "mapping";
public static final String MAPPING_FORMAT = "format";
public static final String MAPPING_TABLE = "table";
public static final String MAPPING_DB = "db";
public static final String JSON_FORMAT = "json";
public static final String SINGLEJSON_FORMAT = "singlejson";
public static final String MULTIJSON_FORMAT = "multijson";
public static final String VALIDATION_OK = "OK";
private final Set<TopicPartition> assignment;
private Map<String, TopicIngestionProperties> topicsToIngestionProps;
private KustoSinkConfig config;
IngestClient kustoIngestClient;
Map<TopicPartition, TopicPartitionWriter> writers;
protected IngestClient kustoIngestClient;
protected Map<TopicPartition, TopicPartitionWriter> writers;
private boolean isDlqEnabled;
private String dlqTopicName;
private Producer<byte[], byte[]> dlqProducer;
private static final ClientRequestProperties clientRequestProperties = new ClientRequestProperties();
public KustoSinkTask() {
assignment = new HashSet<>();
writers = new HashMap<>();
clientRequestProperties.setOption("validate_permissions", true);
}
public static IngestClient createKustoIngestClient(KustoSinkConfig config) {
@ -90,7 +93,7 @@ public class KustoSinkTask extends SinkTask {
public static Client createKustoEngineClient(KustoSinkConfig config) {
try {
String engineClientURL = config.getKustoUrl().replace("https://ingest-", "https://");
String engineClientURL = config.getKustoEngineUrl();
if (!Strings.isNullOrEmpty(config.getAuthAppid())) {
if (Strings.isNullOrEmpty(config.getAuthAppkey())) {
throw new ConfigException("Kusto authentication missing App Key.");
@ -115,43 +118,39 @@ public class KustoSinkTask extends SinkTask {
}
public static Map<String, TopicIngestionProperties> getTopicsToIngestionProps(KustoSinkConfig config) {
Map<String, TopicIngestionProperties> result = new HashMap<>();
Map<String, TopicIngestionProperties> result = new HashMap<>();
try {
JSONArray mappings = new JSONArray(config.getTopicToTableMapping());
for (int i =0; i< mappings.length(); i++) {
for (int i = 0; i < mappings.length(); i++) {
JSONObject mapping = mappings.getJSONObject(i);
String db = mapping.getString("db");
String table = mapping.getString("table");
String format = mapping.optString("format");
String db = mapping.getString(MAPPING_DB);
String table = mapping.getString(MAPPING_TABLE);
String format = mapping.optString(MAPPING_FORMAT);
IngestionProperties props = new IngestionProperties(db, table);
if (format != null && !format.isEmpty()) {
if (format.equalsIgnoreCase("json") || format.equalsIgnoreCase("singlejson") || format.equalsIgnoreCase("multijson")) {
props.setDataFormat("multijson");
if (format.equalsIgnoreCase(JSON_FORMAT) || format.equalsIgnoreCase(SINGLEJSON_FORMAT) || format.equalsIgnoreCase(MULTIJSON_FORMAT)) {
props.setDataFormat(MULTIJSON_FORMAT);
}
props.setDataFormat(format);
}
String mappingRef = mapping.optString("mapping");
if (mappingRef != null && !mappingRef.isEmpty()) {
if (format != null) {
if (format.equalsIgnoreCase("json") || format.equalsIgnoreCase("singlejson") || format.equalsIgnoreCase("multijson")) {
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.Json);
} else if (format.equalsIgnoreCase(IngestionProperties.DATA_FORMAT.avro.toString())){
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.Avro);
} else if (format.equalsIgnoreCase(IngestionProperties.DATA_FORMAT.apacheavro.toString())){
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.ApacheAvro);
} else {
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.Csv);
}
String mappingRef = mapping.optString(MAPPING);
if (mappingRef != null && !mappingRef.isEmpty() && format != null) {
if (format.equalsIgnoreCase(JSON_FORMAT) || format.equalsIgnoreCase(SINGLEJSON_FORMAT) || format.equalsIgnoreCase(MULTIJSON_FORMAT)) {
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.Json);
} else if (format.equalsIgnoreCase(IngestionProperties.DATA_FORMAT.avro.toString())) {
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.Avro);
} else if (format.equalsIgnoreCase(IngestionProperties.DATA_FORMAT.apacheavro.toString())) {
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.ApacheAvro);
} else {
props.setIngestionMapping(mappingRef, IngestionMapping.IngestionMappingKind.Csv);
}
}
TopicIngestionProperties topicIngestionProperties = new TopicIngestionProperties();
@ -159,8 +158,7 @@ public class KustoSinkTask extends SinkTask {
result.put(mapping.getString("topic"), topicIngestionProperties);
}
return result;
}
catch (Exception ex) {
} catch (Exception ex) {
throw new ConfigException("Error while parsing kusto ingestion properties.", ex);
}
}
@ -176,31 +174,27 @@ public class KustoSinkTask extends SinkTask {
Client engineClient = createKustoEngineClient(config);
if (config.getTopicToTableMapping() != null) {
JSONArray mappings = new JSONArray(config.getTopicToTableMapping());
if(mappings.length() > 0) {
if(isIngestorRole(mappings.getJSONObject(0), engineClient)) {
for (int i = 0; i < mappings.length(); i++) {
JSONObject mapping = mappings.getJSONObject(i);
validateTableAccess(engineClient, mapping, config, databaseTableErrorList, accessErrorList);
}
if ((mappings.length() > 0) && (isIngestorRole(mappings.getJSONObject(0), engineClient))) {
for (int i = 0; i < mappings.length(); i++) {
JSONObject mapping = mappings.getJSONObject(i);
validateTableAccess(engineClient, mapping, config, databaseTableErrorList, accessErrorList);
}
}
}
String tableAccessErrorMessage = "";
if(!databaseTableErrorList.isEmpty())
{
if (!databaseTableErrorList.isEmpty()) {
tableAccessErrorMessage = "\n\nError occurred while trying to access the following database:table\n" +
String.join("\n",databaseTableErrorList);
String.join("\n", databaseTableErrorList);
}
if(!accessErrorList.isEmpty())
{
if (!accessErrorList.isEmpty()) {
tableAccessErrorMessage = tableAccessErrorMessage + "\n\nUser does not have appropriate permissions " +
"to sink data into the Kusto database:table combination(s). " +
"Verify your Kusto principals and roles before proceeding for the following: \n " +
String.join("\n",accessErrorList);
String.join("\n", accessErrorList);
}
if(!tableAccessErrorMessage.isEmpty()) {
if (!tableAccessErrorMessage.isEmpty()) {
throw new ConnectException(tableAccessErrorMessage);
}
} catch (JSONException e) {
@ -209,12 +203,12 @@ public class KustoSinkTask extends SinkTask {
}
private boolean isIngestorRole(JSONObject testMapping, Client engineClient) throws JSONException {
String database = testMapping.getString("db");
String table = testMapping.getString("table");
String database = testMapping.getString(MAPPING_DB);
String table = testMapping.getString(MAPPING_TABLE);
try {
KustoOperationResult rs = engineClient.execute(database, String.format(FETCH_TABLE_QUERY, table));
} catch(DataServiceException | DataClientException err){
if(err.getCause().getMessage().contains("Forbidden:")){
engineClient.execute(database, String.format(FETCH_TABLE_QUERY, table), clientRequestProperties);
} catch (DataServiceException | DataClientException err) {
if (err.getCause().getMessage().contains("Forbidden:")) {
log.warn("User might have ingestor privileges, table validation will be skipped for all table mappings ");
return false;
}
@ -223,50 +217,44 @@ public class KustoSinkTask extends SinkTask {
}
/**
* This function validates whether the user has the read and write access to the intended table
* before starting to sink records into ADX.
* This function validates whether the user has the read and write access to the intended table
* before starting to sink records into ADX.
*
* @param engineClient Client connection to run queries.
* @param mapping JSON Object containing a Table mapping.
* @param config
* @param mapping JSON Object containing a Table mapping.
* @param config Kusto Sink configuration
*/
private static void validateTableAccess(Client engineClient, JSONObject mapping, KustoSinkConfig config, List<String> databaseTableErrorList, List<String> accessErrorList) throws JSONException {
String database = mapping.getString("db");
String table = mapping.getString("table");
String format = mapping.getString("format");
String mappingName = mapping.getString("mapping");
if (format.equalsIgnoreCase("json") || format.equalsIgnoreCase("singlejson") || format.equalsIgnoreCase("multijson")) {
format = "json";
String database = mapping.getString(MAPPING_DB);
String table = mapping.getString(MAPPING_TABLE);
String format = mapping.getString(MAPPING_FORMAT);
String mappingName = mapping.getString(MAPPING);
if (format.equalsIgnoreCase(JSON_FORMAT) || format.equalsIgnoreCase(SINGLEJSON_FORMAT) || format.equalsIgnoreCase(MULTIJSON_FORMAT)) {
format = JSON_FORMAT;
}
boolean hasAccess = false;
try {
try {
KustoResultSetTable rs = engineClient.execute(database, String.format(FETCH_TABLE_QUERY, table)).getPrimaryResults();
rs.next();
if (rs.getLong(0) >= 0) {
KustoOperationResult rs = engineClient.execute(database, String.format(FETCH_TABLE_QUERY, table), clientRequestProperties);
if (VALIDATION_OK.equals(rs.getPrimaryResults().getData().get(0).get(0))) {
hasAccess = true;
}
} catch (DataServiceException e) {
databaseTableErrorList.add(String.format("Database:%s Table:%s | table not found", database, table));
}
if(hasAccess) {
if (hasAccess) {
try {
KustoOperationResult rp = engineClient.execute(database, String.format(FETCH_TABLE_MAPPING_QUERY, table, format, mappingName));
if (rp.getPrimaryResults().getData().get(0).get(0).toString().equals(mappingName)) {
hasAccess = true;
}
engineClient.execute(database, String.format(FETCH_TABLE_MAPPING_QUERY, table, format, mappingName));
} catch (DataServiceException e) {
hasAccess = false;
databaseTableErrorList.add(String.format("Database:%s Table:%s | %s mapping '%s' not found", database, table, format, mappingName));
}
}
if(hasAccess) {
if (hasAccess) {
String authenticateWith = "aadapp=" + config.getAuthAppid();
String query = String.format(FETCH_PRINCIPAL_ROLES_QUERY, authenticateWith, database, table);
try {
String authenticateWith = "aadapp=" + config.getAuthAppid();
KustoOperationResult rs = engineClient.execute(database, String.format(FETCH_PRINCIPAL_ROLES_QUERY, authenticateWith, database, table));
KustoOperationResult rs = engineClient.execute(database, query);
hasAccess = (boolean) rs.getPrimaryResults().getData().get(0).get(INGESTION_ALLOWED_INDEX);
if (hasAccess) {
log.info("User has appropriate permissions to sink data into the Kusto table={}", table);
@ -277,12 +265,11 @@ public class KustoSinkTask extends SinkTask {
} catch (DataServiceException e) {
// Logging the error so that the trace is not lost.
if (!e.getCause().toString().contains("Forbidden")){
log.error("{}", e);
log.error("Error fetching principal roles with query {}", query, e);
databaseTableErrorList.add(String.format("Database:%s Table:%s", database, table));
} else {
log.warn("Failed to check permissions, will continue the run as the principal might still be able to ingest: {}", e);
}
}
}
} catch (DataClientException e) {
@ -296,11 +283,11 @@ public class KustoSinkTask extends SinkTask {
}
@Override
public void open(Collection<TopicPartition> partitions) throws ConnectException {
public void open(Collection<TopicPartition> partitions) {
assignment.addAll(partitions);
for (TopicPartition tp : assignment) {
TopicIngestionProperties ingestionProps = getIngestionProps(tp.topic());
log.debug(String.format("Open Kusto topic: '%s' with partition: '%s'", tp.topic(), tp.partition()));
log.debug("Open Kusto topic: '{}' with partition: '{}'", tp.topic(), tp.partition());
if (ingestionProps == null) {
throw new ConnectException(String.format("Kusto Sink has no ingestion props mapped " +
"for the topic: %s. please check your configuration.", tp.topic()));
@ -320,17 +307,16 @@ public class KustoSinkTask extends SinkTask {
writers.remove(tp);
assignment.remove(tp);
} catch (ConnectException e) {
log.error("Error closing writer for {}. Error: {}", tp, e);
log.error("Error closing writer for {}.", tp, e);
}
}
}
@Override
public void start(Map<String, String> props) {
config = new KustoSinkConfig(props);
String url = config.getKustoUrl();
validateTableMappings(config);
if (config.isDlqEnabled()) {
isDlqEnabled = true;
@ -348,16 +334,15 @@ public class KustoSinkTask extends SinkTask {
isDlqEnabled = false;
dlqTopicName = null;
}
topicsToIngestionProps = getTopicsToIngestionProps(config);
// this should be read properly from settings
kustoIngestClient = createKustoIngestClient(config);
log.info(String.format("Started KustoSinkTask with target cluster: (%s), source topics: (%s)",
url, topicsToIngestionProps.keySet().toString()));
log.info("Started KustoSinkTask with target cluster: ({}), source topics: ({})", url, topicsToIngestionProps.keySet());
// Adding this check to make code testable
if(context!=null) {
if (context != null) {
open(context.assignment());
}
}
@ -369,7 +354,7 @@ public class KustoSinkTask extends SinkTask {
writer.close();
}
try {
if(kustoIngestClient != null) {
if (kustoIngestClient != null) {
kustoIngestClient.close();
}
} catch (IOException e) {
@ -378,10 +363,10 @@ public class KustoSinkTask extends SinkTask {
}
@Override
public void put(Collection<SinkRecord> records) throws ConnectException {
public void put(Collection<SinkRecord> records) {
SinkRecord lastRecord = null;
for (SinkRecord record : records) {
log.debug("record to topic:" + record.topic());
log.debug("Record to topic: {}", record.topic());
lastRecord = record;
TopicPartition tp = new TopicPartition(record.topic(), record.kafkaPartition());
@ -398,7 +383,7 @@ public class KustoSinkTask extends SinkTask {
}
if (lastRecord != null) {
log.debug("Last record offset:" + lastRecord.kafkaOffset());
log.debug("Last record offset: {}", lastRecord.kafkaOffset());
}
}
@ -410,7 +395,7 @@ public class KustoSinkTask extends SinkTask {
) {
Map<TopicPartition, OffsetAndMetadata> offsetsToCommit = new HashMap<>();
for (TopicPartition tp : assignment) {
if(writers.get(tp) == null) {
if (writers.get(tp) == null) {
throw new ConnectException("Topic Partition not configured properly. " +
"verify your `topics` and `kusto.tables.topics.mapping` configurations");
}
@ -418,7 +403,7 @@ public class KustoSinkTask extends SinkTask {
Long lastCommittedOffset = writers.get(tp).lastCommittedOffset;
if (lastCommittedOffset != null) {
Long offset = lastCommittedOffset + 1L;
long offset = lastCommittedOffset + 1L;
log.debug("Forwarding to framework request to commit offset: {} for {} while the offset is {}", offset, tp, offsets.get(tp));
offsetsToCommit.put(tp, new OffsetAndMetadata(offset));
}
@ -430,6 +415,5 @@ public class KustoSinkTask extends SinkTask {
@Override
public void flush(Map<TopicPartition, OffsetAndMetadata> offsets) {
// do nothing , rolling files can handle writing
}
}
}

Просмотреть файл

@ -30,12 +30,12 @@ import java.util.logging.Logger;
@Disabled("Don't want running as part of build or CI. Comment this line to test manually.")
public class E2ETest {
private static final String testPrefix = "tmpKafkaE2ETest";
private String appId = System.getProperty("appId");
private String appKey = System.getProperty("appKey");
private String authority = System.getProperty("authority");
private String cluster = System.getProperty("cluster");
private String database = System.getProperty("database");
private String tableBaseName = System.getProperty("table", testPrefix + UUID.randomUUID().toString().replace('-', '_'));
private static final String appId = System.getProperty("appId");
private static final String appKey = System.getProperty("appKey");
private static final String authority = System.getProperty("authority");
private static final String cluster = System.getProperty("cluster");
private static final String database = System.getProperty("database");
private static final String tableBaseName = System.getProperty("table", testPrefix + UUID.randomUUID().toString().replace('-', '_'));
private String basePath = Paths.get("src/test/resources/", "testE2E").toString();
private Logger log = Logger.getLogger(this.getClass().getName());
private boolean isDlqEnabled;
@ -83,9 +83,10 @@ public class E2ETest {
props.ingestionProperties = ingestionProperties;
props.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.csv);
props.ingestionProperties.setIngestionMapping("mappy", IngestionMapping.IngestionMappingKind.Csv);
String KustoUrl = String.format("https://ingest-%s.kusto.windows.net", cluster);
String kustoDmUrl = String.format("https://ingest-%s.kusto.windows.net", cluster);
String kustoEngineUrl = String.format("https://%s.kusto.windows.net", cluster);
String basepath = Paths.get(basePath, "csv").toString();
Map<String, String> settings = getKustoConfigs(KustoUrl, basepath, "mappy", fileThreshold, flushInterval);
Map<String, String> settings = getKustoConfigs(kustoDmUrl, kustoEngineUrl, basepath, "mappy", fileThreshold, flushInterval);
KustoSinkConfig config = new KustoSinkConfig(settings);
TopicPartitionWriter writer = new TopicPartitionWriter(tp, ingestClient, props, config, isDlqEnabled, dlqTopicName, kafkaProducer);
writer.open();
@ -134,11 +135,12 @@ public class E2ETest {
props2.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.avro);
props2.ingestionProperties.setIngestionMapping("avroMapping", IngestionMapping.IngestionMappingKind.Avro);
TopicPartition tp2 = new TopicPartition("testPartition2", 11);
String KustoUrl = String.format("https://ingest-%s.kusto.windows.net", cluster);
String kustoDmUrl = String.format("https://ingest-%s.kusto.windows.net", cluster);
String kustoEngineUrl = String.format("https://%s.kusto.windows.net", cluster);
String basepath = Paths.get(basePath, "avro").toString();
long fileThreshold = 100;
long flushInterval = 300000;
Map<String, String> settings = getKustoConfigs(KustoUrl, basepath, "avri", fileThreshold, flushInterval);
Map<String, String> settings = getKustoConfigs(kustoDmUrl, kustoEngineUrl, basepath, "avri", fileThreshold, flushInterval);
KustoSinkConfig config = new KustoSinkConfig(settings);
TopicPartitionWriter writer2 = new TopicPartitionWriter(tp2, ingestClient, props2, config, isDlqEnabled, dlqTopicName, kafkaProducer);
writer2.open();
@ -185,10 +187,11 @@ public class E2ETest {
this.log.info("Successfully ingested " + expectedNumberOfRows + " records.");
}
private Map<String, String> getKustoConfigs(String clusterUrl, String basePath, String tableMapping, long fileThreshold,
long flushInterval) {
private Map<String, String> getKustoConfigs(String clusterUrl, String engineUrl, String basePath, String tableMapping,
long fileThreshold, long flushInterval) {
Map<String, String> settings = new HashMap<>();
settings.put(KustoSinkConfig.KUSTO_URL_CONF, clusterUrl);
settings.put(KustoSinkConfig.KUSTO_INGEST_URL_CONF, clusterUrl);
settings.put(KustoSinkConfig.KUSTO_ENGINE_URL_CONF, engineUrl);
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, tableMapping);
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, appId);
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, appKey);

Просмотреть файл

@ -83,11 +83,9 @@ public class FileWriterTest {
File folder = new File(path);
boolean mkdirs = folder.mkdirs();
Assertions.assertTrue(mkdirs);
Assertions.assertEquals(0, Objects.requireNonNull(folder.listFiles()).length);
HashMap<String, Long> files = new HashMap<>();
final int MAX_FILE_SIZE = 100;
Consumer<SourceFile> trackFiles = (SourceFile f) -> files.put(f.path, f.rawBytes);
@ -281,7 +279,8 @@ public class FileWriterTest {
protected Map<String, String> getProperties() {
Map<String, String> settings = new HashMap<>();
settings.put(KustoSinkConfig.KUSTO_URL_CONF, "xxx");
settings.put(KustoSinkConfig.KUSTO_INGEST_URL_CONF, "xxx");
settings.put(KustoSinkConfig.KUSTO_ENGINE_URL_CONF, "xxx");
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "mapping");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");

Просмотреть файл

@ -8,95 +8,73 @@ import org.junit.jupiter.api.Test;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
public class KustoSinkConnectorConfigTest {
Map<String, String> settings;
KustoSinkConfig config;
@BeforeEach
public void before() {
settings = new HashMap<>();
config = null;
}
private static final String DM_URL = "https://ingest-cluster_name.kusto.windows.net";
private static final String ENGINE_URL = "https://cluster_name.kusto.windows.net";
@Test
public void shouldAcceptValidConfig() {
// Adding required Configuration with no default value.
settings.put(KustoSinkConfig.KUSTO_URL_CONF, "kusto-url");
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "mapping");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
config = new KustoSinkConfig(settings);
Assertions.assertNotNull(config);
KustoSinkConfig config = new KustoSinkConfig(setupConfigs());
assertNotNull(config);
}
@Test
public void shouldHaveDefaultValues() {
// Adding required Configuration with no default value.
settings.put(KustoSinkConfig.KUSTO_URL_CONF, "kusto-url");
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "mapping");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
config = new KustoSinkConfig(settings);
Assertions.assertNotNull(config.getKustoUrl());
Assertions.assertTrue(config.getFlushSizeBytes() > 0);
Assertions.assertTrue(config.getFlushInterval() > 0);
Assertions.assertFalse(config.isDlqEnabled());
KustoSinkConfig config = new KustoSinkConfig(setupConfigs());
assertNotNull(config.getKustoUrl());
assertNotEquals(0, config.getFlushSizeBytes());
assertNotEquals(0, config.getFlushInterval());
assertFalse(config.isDlqEnabled());
assertEquals(BehaviorOnError.FAIL, config.getBehaviorOnError());
}
@Test
public void shouldThrowExceptionWhenKustoURLNotGiven() {
// Adding required Configuration with no default value.
settings.remove(KustoSinkConfig.KUSTO_URL_CONF);
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
Assertions.assertThrows(ConfigException.class, () -> {
new KustoSinkConfig(settings);
});
HashMap<String, String> settings = setupConfigs();
settings.remove(KustoSinkConfig.KUSTO_INGEST_URL_CONF);
new KustoSinkConfig(settings);
}
@Test
public void shouldUseKustoEngineUrlWhenGiven() {
HashMap<String, String> settings = setupConfigs();
settings.put(KustoSinkConfig.KUSTO_ENGINE_URL_CONF, ENGINE_URL);
KustoSinkConfig config = new KustoSinkConfig(settings);
String kustoEngineUrl = config.getKustoEngineUrl();
assertEquals(ENGINE_URL, kustoEngineUrl);
}
@Test(expected = ConfigException.class)
public void shouldThrowExceptionWhenAppIdNotGiven() {
settings.remove(KustoSinkConfig.KUSTO_URL_CONF);
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
Assertions.assertThrows(ConfigException.class, () -> {
new KustoSinkConfig(settings);
});
// Adding required Configuration with no default value.
HashMap<String, String> settings = setupConfigs();
settings.remove(KustoSinkConfig.KUSTO_AUTH_APPID_CONF);
new KustoSinkConfig(settings);
}
@Test
@Test(expected = ConfigException.class)
public void shouldFailWhenBehaviorOnErrorIsIllConfigured() {
// Adding required Configuration with no default value.
settings.put(KustoSinkConfig.KUSTO_URL_CONF, "kusto-url");
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "mapping");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
HashMap<String, String> settings = setupConfigs();
settings.remove(KustoSinkConfig.KUSTO_INGEST_URL_CONF);
settings.put(KustoSinkConfig.KUSTO_BEHAVIOR_ON_ERROR_CONF, "DummyValue");
Assertions.assertThrows(ConfigException.class, () -> {
new KustoSinkConfig(settings);
});
new KustoSinkConfig(settings);
}
@Test
public void verifyDlqSettings() {
settings.put(KustoSinkConfig.KUSTO_URL_CONF, "kusto-url");
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "mapping");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
HashMap<String, String> settings = setupConfigs();
settings.put(KustoSinkConfig.KUSTO_DLQ_BOOTSTRAP_SERVERS_CONF, "localhost:8081,localhost:8082");
settings.put(KustoSinkConfig.KUSTO_DLQ_TOPIC_NAME_CONF, "dlq-error-topic");
config = new KustoSinkConfig(settings);
KustoSinkConfig config = new KustoSinkConfig(settings);
Assertions.assertTrue(config.isDlqEnabled());
assertEquals(Arrays.asList("localhost:8081", "localhost:8082"), config.getDlqBootstrapServers());
@ -106,16 +84,11 @@ public class KustoSinkConnectorConfigTest {
@Test
public void shouldProcessDlqConfigsWithPrefix() {
// Adding required Configuration with no default value.
settings.put(KustoSinkConfig.KUSTO_URL_CONF, "kusto-url");
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "mapping");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
settings.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
HashMap<String, String> settings = setupConfigs();
settings.put("misc.deadletterqueue.security.protocol", "SASL_PLAINTEXT");
settings.put("misc.deadletterqueue.sasl.mechanism", "PLAIN");
config = new KustoSinkConfig(settings);
KustoSinkConfig config = new KustoSinkConfig(settings);
Assertions.assertNotNull(config);
@ -124,4 +97,15 @@ public class KustoSinkConnectorConfigTest {
assertEquals("SASL_PLAINTEXT", dlqProps.get("security.protocol"));
assertEquals("PLAIN", dlqProps.get("sasl.mechanism"));
}
public static HashMap<String, String> setupConfigs() {
HashMap<String, String> configs = new HashMap<>();
configs.put(KustoSinkConfig.KUSTO_INGEST_URL_CONF, DM_URL);
configs.put(KustoSinkConfig.KUSTO_ENGINE_URL_CONF, ENGINE_URL);
configs.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "[{'topic': 'topic1','db': 'db1', 'table': 'table1','format': 'csv'},{'topic': 'topic2','db': 'db2', 'table': 'table2','format': 'json','mapping': 'Mapping'}]");
configs.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
configs.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
configs.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
return configs;
}
}

Просмотреть файл

@ -40,19 +40,12 @@ public class KustoSinkTaskTest {
}
@Test
public void testSinkTaskOpen() throws {
HashMap<String, String> props = new HashMap<>();
props.put(KustoSinkConfig.KUSTO_URL_CONF, "https://cluster_name.kusto.windows.net");
props.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "[{'topic': 'topic1','db': 'db1', 'table': 'table1','format': 'csv'},{'topic': 'topic2','db': 'db1', 'table': 'table1','format': 'json','mapping': 'Mapping'}]");
props.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
props.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
props.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
public void testSinkTaskOpen() {
HashMap<String, String> configs = KustoSinkConnectorConfigTest.setupConfigs();
KustoSinkTask kustoSinkTask = new KustoSinkTask();
KustoSinkTask kustoSinkTaskSpy = spy(kustoSinkTask);
doNothing().when(kustoSinkTaskSpy).validateTableMappings(Mockito.<KustoSinkConfig>any());
kustoSinkTaskSpy.start(props);
kustoSinkTaskSpy.start(configs);
ArrayList<TopicPartition> tps = new ArrayList<>();
tps.add(new TopicPartition("topic1", 1));
tps.add(new TopicPartition("topic1", 2));
@ -64,19 +57,12 @@ public class KustoSinkTaskTest {
}
@Test
public void testSinkTaskPutRecord() throws {
HashMap<String, String> props = new HashMap<>();
props.put(KustoSinkConfig.KUSTO_URL_CONF, "https://cluster_name.kusto.windows.net");
props.put(KustoSinkConfig.KUSTO_SINK_TEMP_DIR_CONF, System.getProperty("java.io.tmpdir"));
props.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "[{'topic': 'topic1','db': 'db1', 'table': 'table1','format': 'csv'},{'topic': 'testing1','db': 'db1', 'table': 'table1','format': 'json','mapping': 'Mapping'}]");
props.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
props.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
props.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
public void testSinkTaskPutRecord() {
HashMap<String, String> configs = KustoSinkConnectorConfigTest.setupConfigs();
KustoSinkTask kustoSinkTask = new KustoSinkTask();
KustoSinkTask kustoSinkTaskSpy = spy(kustoSinkTask);
doNothing().when(kustoSinkTaskSpy).validateTableMappings(Mockito.<KustoSinkConfig>any());
kustoSinkTaskSpy.start(props);
kustoSinkTaskSpy.start(configs);
ArrayList<TopicPartition> tps = new ArrayList<>();
TopicPartition tp = new TopicPartition("topic1", 1);
@ -95,18 +81,12 @@ public class KustoSinkTaskTest {
@Test
public void testSinkTaskPutRecordMissingPartition() {
HashMap<String, String> props = new HashMap<>();
props.put(KustoSinkConfig.KUSTO_URL_CONF, "https://cluster_name.kusto.windows.net");
props.put(KustoSinkConfig.KUSTO_SINK_TEMP_DIR_CONF, System.getProperty("java.io.tmpdir"));
props.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "[{'topic': 'topic1','db': 'db1', 'table': 'table1','format': 'csv'},{'topic': 'topic2','db': 'db1', 'table': 'table1','format': 'json','mapping': 'Mapping'}]");
props.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
props.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
props.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
HashMap<String, String> configs = KustoSinkConnectorConfigTest.setupConfigs();
configs.put(KustoSinkConfig.KUSTO_SINK_TEMP_DIR_CONF, System.getProperty("java.io.tmpdir"));
KustoSinkTask kustoSinkTask = new KustoSinkTask();
KustoSinkTask kustoSinkTaskSpy = spy(kustoSinkTask);
doNothing().when(kustoSinkTaskSpy).validateTableMappings(Mockito.<KustoSinkConfig>any());
kustoSinkTaskSpy.start(props);
kustoSinkTaskSpy.start(configs);
ArrayList<TopicPartition> tps = new ArrayList<>();
tps.add(new TopicPartition("topic1", 1));
@ -120,22 +100,15 @@ public class KustoSinkTaskTest {
Throwable exception = assertThrows(ConnectException.class, () -> kustoSinkTaskSpy.put(records));
assertEquals("Received a record without a mapped writer for topic:partition(topic2:1), dropping record.", exception.getMessage());
}
@Test
public void getTable() {
HashMap<String, String> props = new HashMap<>();
props.put(KustoSinkConfig.KUSTO_URL_CONF, "https://cluster_name.kusto.windows.net");
props.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "[{'topic': 'topic1','db': 'db1', 'table': 'table1','format': 'csv'},{'topic': 'topic2','db': 'db2', 'table': 'table2','format': 'json','mapping': 'Mapping'}]");
props.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
props.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");
props.put(KustoSinkConfig.KUSTO_AUTH_AUTHORITY_CONF, "some-authority");
HashMap<String, String> configs = KustoSinkConnectorConfigTest.setupConfigs();
KustoSinkTask kustoSinkTask = new KustoSinkTask();
KustoSinkTask kustoSinkTaskSpy = spy(kustoSinkTask);
doNothing().when(kustoSinkTaskSpy).validateTableMappings(Mockito.<KustoSinkConfig>any());
kustoSinkTaskSpy.start(props);
kustoSinkTaskSpy.start(configs);
{
// single table mapping should cause all topics to be mapped to a single table
Assertions.assertEquals("db1", kustoSinkTaskSpy.getIngestionProps("topic1").ingestionProperties.getDatabaseName());

Просмотреть файл

@ -26,7 +26,8 @@ import static org.mockito.Mockito.*;
public class TopicPartitionWriterTest {
// TODO: should probably find a better way to mock internal class (FileWriter)...
private File currentDirectory;
private static final String KUSTO_CLUSTER_URL = "https://ingest-cluster.kusto.windows.net";
private static final String KUSTO_INGEST_CLUSTER_URL = "https://ingest-cluster.kusto.windows.net";
private static final String KUSTO_CLUSTER_URL = "https://cluster.kusto.windows.net";
private static final String DATABASE = "testdb1";
private static final String TABLE = "testtable1";
private static final String basePath = "somepath";
@ -248,7 +249,8 @@ public class TopicPartitionWriterTest {
private Map<String, String> getKustoConfigs(String basePath, long fileThreshold, long flushInterval) {
Map<String, String> settings = new HashMap<>();
settings.put(KustoSinkConfig.KUSTO_URL_CONF, KUSTO_CLUSTER_URL);
settings.put(KustoSinkConfig.KUSTO_INGEST_URL_CONF, KUSTO_INGEST_CLUSTER_URL);
settings.put(KustoSinkConfig.KUSTO_ENGINE_URL_CONF, KUSTO_CLUSTER_URL);
settings.put(KustoSinkConfig.KUSTO_TABLES_MAPPING_CONF, "mapping");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPID_CONF, "some-appid");
settings.put(KustoSinkConfig.KUSTO_AUTH_APPKEY_CONF, "some-appkey");