* ARM support for databricks

* PR feedback

* create databricks in existing vnet, update hdinsight kafka zookeeper vmsize to standard_a4_v2, add port 443 rule for hdinsight
This commit is contained in:
Rohit Agrawal - MSFT 2019-08-12 12:01:11 -07:00 коммит произвёл GitHub
Родитель 206334f306
Коммит 4923294d29
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
17 изменённых файлов: 914 добавлений и 194 удалений

Просмотреть файл

@ -6,7 +6,7 @@
"icon": "/img/iot.png",
"displayName": "###REQUIRED###",
"properties": {
"inputEventHubSubscriptionId": "keyvault://${serviceKeyVaultName}/$clientSecretPrefix-subscriptionId",
"inputEventHubSubscriptionId": "$keyvaultPrefix://${serviceKeyVaultName}/$clientSecretPrefix-subscriptionId",
"inputEventHubResourceGroupName": "${eventHubResourceGroupName}"
},
"commonProcessor": {
@ -31,7 +31,7 @@
},
"process": {
"metric": {
"eventhub": "keyvault://${sparkKeyVaultName}/${metricEventHubConnectionStringKey}"
"eventhub": "$keyvaultPrefix://${sparkKeyVaultName}/${metricEventHubConnectionStringKey}"
},
"timestampColumn": "${processTimestampColumn}",
"watermark": "${processWatermark}",
@ -46,7 +46,7 @@
},
"outputs": "${outputs}"
},
"sparkJobTemplateRef": "DataXDirect",
"sparkJobTemplateRef": "$dataxJobTemplate",
"jobCommonTokens": {
"jobName": "${name}",
"sparkJobName": "DataXDirect-${name}",
@ -54,6 +54,10 @@
"sparkJobExecutorLogLevel": "INFO",
"sparkJobNumExecutors": "${guiSparkJobNumExecutors}",
"sparkJobExecutorMemory": "${guiSparkJobExecutorMemory}",
"sparkJobDatabricksMinWorkers" : "${guiSparkJobDatabricksMinWorkers}",
"sparkJobDatabricksMaxWorkers" : "${guiSparkJobDatabricksMaxWorkers}",
"sparkDatabricksToken" : "${guiSparkDatabricksToken}",
"sparkJobDatabricksAutoScale" : "${guiSparkJobDatabricksAutoScale}",
"processedSchemaPath": null
},
"jobs": [
@ -174,6 +178,82 @@
}
}
},
{
"name": "DataXDirectDatabricks",
"content": {
"name" : "${sparkJobName}",
"cluster" : "$sparkName",
"databricksToken" : "${sparkDatabricksToken}",
"options" : {
"name" : "${sparkJobName}",
"new_cluster" : {
"spark_version" : "$databricksClusterSparkVersion",
"node_type_id" : "$databricksClusterNodeType",
"autoscale" : {
"min_workers" : "${sparkJobDatabricksMinWorkers}",
"max_workers" : "${sparkJobDatabricksMaxWorkers}"
},
"num_workers" : "${sparkJobDatabricksMinWorkers}",
"enableAutoscale" : "${sparkJobDatabricksAutoScale}",
"spark_conf" : {
"spark\uff0edatabricks\uff0edelta\uff0epreview\uff0eenabled" : true,
"spark\uff0esql\uff0ehive\uff0emetastore\uff0eversion" : "1.2.1",
"spark\uff0esql\uff0ehive\uff0emetastore\uff0ejars" : "builtin"
},
"spark_env_vars" : {
"DATAX_DEFAULTVAULTNAME" : "$sparkKVName"
}
},
"libraries" : [
{
"jar" : "dbfs:/datax/applicationinsights-core-2.2.1.jar"
},
{
"jar" : "dbfs:/datax/azure-documentdb-1.16.1.jar"
},
{
"jar" : "dbfs:/datax/azure-eventhubs-1.2.1.jar"
},
{
"jar" : "dbfs:/datax/azure-eventhubs-spark_2.11-2.3.6.jar"
},
{
"jar" : "dbfs:/datax/azure-keyvault-webkey-1.1.jar"
},
{
"jar" : "dbfs:/datax/datax-core_2.4_2.11-1.2.0.jar"
},
{
"jar" : "dbfs:/datax/datax-host_2.4_2.11-1.2.0.jar"
},
{
"jar" : "dbfs:/datax/datax-keyvault_2.4_2.11-1.2.0-with-dependencies.jar"
},
{
"jar" : "dbfs:/datax/datax-udf-samples_2.4_2.11-1.2.0.jar"
},
{
"jar" : "dbfs:/datax/datax-utility_2.4_2.11-1.2.0.jar"
},
{
"jar" : "dbfs:/datax/java-uuid-generator-3.1.5.jar"
},
{
"jar" : "dbfs:/datax/proton-j-0.31.0.jar"
},
{
"jar" : "dbfs:/datax/scala-java8-compat_2.11-0.9.0.jar"
}
],
"spark_jar_task" : {
"main_class_name" : "datax.app.DirectStreamingApp",
"parameters" : [
"conf=${sparkJobConfigFilePath}"
]
}
}
}
},
{
"name" : "flattener",
"content" : {
@ -597,6 +677,88 @@
}
}
},
{
"name" : "kafkaDataXDirectDatabricks",
"content" : {
"name" : "${sparkJobName}",
"cluster" : "$sparkName",
"databricksToken" : "${sparkDatabricksToken}",
"options" : {
"name" : "${sparkJobName}",
"new_cluster" : {
"spark_version" : "$databricksClusterSparkVersion",
"node_type_id" : "$databricksClusterNodeType",
"autoscale" : {
"min_workers" : "${sparkJobDatabricksMinWorkers}",
"max_workers" : "${sparkJobDatabricksMaxWorkers}"
},
"num_workers" : "${sparkJobDatabricksMinWorkers}",
"enableAutoscale" : "${sparkJobDatabricksAutoScale}",
"spark_conf" : {
"spark\uff0edatabricks\uff0edelta\uff0epreview\uff0eenabled" : true,
"spark\uff0esql\uff0ehive\uff0emetastore\uff0eversion" : "1.2.1",
"spark\uff0esql\uff0ehive\uff0emetastore\uff0ejars" : "builtin"
},
"spark_env_vars" : {
"DATAX_DEFAULTVAULTNAME" : "$sparkKVName"
}
},
"libraries" : [
{
"jar" : "dbfs:/datax/applicationinsights-core-2.2.1.jar"
},
{
"jar" : "dbfs:/datax/azure-documentdb-1.16.1.jar"
},
{
"jar" : "dbfs:/datax/azure-eventhubs-1.2.1.jar"
},
{
"jar" : "dbfs:/datax/azure-eventhubs-spark_2.11-2.3.6.jar"
},
{
"jar" : "dbfs:/datax/azure-keyvault-webkey-1.1.jar"
},
{
"jar" : "dbfs:/datax/datax-core_2.4_2.11-1.2.0.jar"
},
{
"jar" : "dbfs:/datax/datax-host_2.4_2.11-1.2.0.jar"
},
{
"jar" : "dbfs:/datax/datax-keyvault_2.4_2.11-1.2.0-with-dependencies.jar"
},
{
"jar" : "dbfs:/datax/datax-udf-samples_2.4_2.11-1.2.0.jar"
},
{
"jar" : "dbfs:/datax/datax-utility_2.4_2.11-1.2.0.jar"
},
{
"jar" : "dbfs:/datax/java-uuid-generator-3.1.5.jar"
},
{
"jar" : "dbfs:/datax/proton-j-0.31.0.jar"
},
{
"jar" : "dbfs:/datax/scala-java8-compat_2.11-0.9.0.jar"
},
{
"jar" : "dbfs:/datax/spark-streaming-kafka-0-10_2.11-2.4.0.jar"
},
{
"jar" : "dbfs:/datax/kafka-clients-2.0.0.jar"
}
],
"spark_jar_task" : {
"main_class_name" : "datax.app.DirectKafkaStreamingApp",
"parameters" : [
"conf=${sparkJobConfigFilePath}"
]
}
}
}
},
{
"name" : "kafkaFlowConfig",
"content" : {
@ -604,7 +766,7 @@
"icon" : "/img/iot.png",
"displayName" : "###REQUIRED###",
"properties" : {
"inputEventHubSubscriptionId" : "keyvault://${serviceKeyVaultName}/$clientSecretPrefix-subscriptionId",
"inputEventHubSubscriptionId" : "$keyvaultPrefix://${serviceKeyVaultName}/$clientSecretPrefix-subscriptionId",
"inputEventHubResourceGroupName" : "${eventHubResourceGroupName}"
},
"commonProcessor" : {
@ -630,7 +792,7 @@
},
"process" : {
"metric" : {
"eventhub" : "keyvault://${sparkKeyVaultName}/${metricEventHubConnectionStringKey}"
"eventhub" : "$keyvaultPrefix://${sparkKeyVaultName}/${metricEventHubConnectionStringKey}"
},
"timestampColumn" : "${processTimestampColumn}",
"watermark" : "${processWatermark}",
@ -645,7 +807,7 @@
},
"outputs" : "${outputs}"
},
"sparkJobTemplateRef" : "kafkaDataXDirect",
"sparkJobTemplateRef" : "$dataxKafkaJobTemplate",
"jobCommonTokens" : {
"jobName" : "${name}",
"sparkJobName" : "DataXDirect-${name}",
@ -653,6 +815,10 @@
"sparkJobExecutorLogLevel" : "INFO",
"sparkJobNumExecutors" : "${guiSparkJobNumExecutors}",
"sparkJobExecutorMemory" : "${guiSparkJobExecutorMemory}",
"sparkJobDatabricksMinWorkers" : "${guiSparkJobDatabricksMinWorkers}",
"sparkJobDatabricksMaxWorkers" : "${guiSparkJobDatabricksMaxWorkers}",
"sparkDatabricksToken" : "${guiSparkDatabricksToken}",
"sparkJobDatabricksAutoScale" : "${guiSparkJobDatabricksAutoScale}",
"processedSchemaPath" : null
},
"jobs" : [
@ -725,7 +891,7 @@
"icon" : "/img/iot.png",
"displayName" : "###REQUIRED###",
"properties" : {
"inputEventHubSubscriptionId" : "keyvault://${serviceKeyVaultName}/web-subscriptionId",
"inputEventHubSubscriptionId" : "$keyvaultPrefix://${serviceKeyVaultName}/web-subscriptionId",
"inputEventHubResourceGroupName" : "${eventHubResourceGroupName}"
},
"commonProcessor" : {
@ -741,7 +907,7 @@
"process" : {
"metric" : {
"eventhub" :
"keyvault://${sparkKeyVaultName}/${metricEventHubConnectionStringKey}"
"$keyvaultPrefix://${sparkKeyVaultName}/${metricEventHubConnectionStringKey}"
},
"timestampColumn" : "${processTimestampColumn}",
"watermark" : "${processWatermark}",
@ -756,7 +922,7 @@
},
"outputs" : "${outputs}"
},
"sparkJobTemplateRef" : "DataXBatch",
"sparkJobTemplateRef" : "$dataxBatchJobTemplate",
"jobCommonTokens" : {
"jobName" : "${name}",
"sparkJobName" : "DataXBatch-${name}",
@ -764,6 +930,10 @@
"sparkJobExecutorLogLevel" : "INFO",
"sparkJobNumExecutors" : "${guiSparkJobNumExecutors}",
"sparkJobExecutorMemory" : "${guiSparkJobExecutorMemory}",
"sparkJobDatabricksMinWorkers" : "${guiSparkJobDatabricksMinWorkers}",
"sparkJobDatabricksMaxWorkers" : "${guiSparkJobDatabricksMaxWorkers}",
"sparkDatabricksToken" : "${guiSparkDatabricksToken}",
"sparkJobDatabricksAutoScale" : "${guiSparkJobDatabricksAutoScale}",
"processedSchemaPath" : null
},
"jobs" : [
@ -883,5 +1053,81 @@
}
}
}
},
{
"name" : "DataXBatchDatabricks",
"content" : {
"name" : "${sparkJobName}",
"cluster" : "$sparkName",
"databricksToken" : "${sparkDatabricksToken}",
"options" : {
"name" : "${sparkJobName}",
"new_cluster" : {
"spark_version" : "$databricksClusterSparkVersion",
"node_type_id" : "$databricksClusterNodeType",
"autoscale" : {
"min_workers" : "${sparkJobDatabricksMinWorkers}",
"max_workers" : "${sparkJobDatabricksMaxWorkers}"
},
"num_workers" : "${sparkJobDatabricksMinWorkers}",
"enableAutoscale" : "${sparkJobDatabricksAutoScale}",
"spark_conf" : {
"spark\uff0edatabricks\uff0edelta\uff0epreview\uff0eenabled" : true,
"spark\uff0esql\uff0ehive\uff0emetastore\uff0eversion" : "1.2.1",
"spark\uff0esql\uff0ehive\uff0emetastore\uff0ejars" : "builtin"
},
"spark_env_vars" : {
"DATAX_DEFAULTVAULTNAME" : "$sparkKVName"
}
},
"libraries" : [
{
"jar" : "dbfs:/datax/applicationinsights-core-2.2.1.jar"
},
{
"jar" : "dbfs:/datax/azure-documentdb-1.16.1.jar"
},
{
"jar" : "dbfs:/datax/azure-eventhubs-1.2.1.jar"
},
{
"jar" : "dbfs:/datax/azure-eventhubs-spark_2.11-2.3.6.jar"
},
{
"jar" : "dbfs:/datax/azure-keyvault-webkey-1.1.jar"
},
{
"jar" : "dbfs:/datax/datax-core_2.4_2.11-1.2.0.jar"
},
{
"jar" : "dbfs:/datax/datax-host_2.4_2.11-1.2.0.jar"
},
{
"jar" : "dbfs:/datax/datax-keyvault_2.4_2.11-1.2.0-with-dependencies.jar"
},
{
"jar" : "dbfs:/datax/datax-udf-samples_2.4_2.11-1.2.0.jar"
},
{
"jar" : "dbfs:/datax/datax-utility_2.4_2.11-1.2.0.jar"
},
{
"jar" : "dbfs:/datax/java-uuid-generator-3.1.5.jar"
},
{
"jar" : "dbfs:/datax/proton-j-0.31.0.jar"
},
{
"jar" : "dbfs:/datax/scala-java8-compat_2.11-0.9.0.jar"
}
],
"spark_jar_task" : {
"main_class_name" : "datax.app.BatchApp",
"parameters" : [
"conf=${sparkJobConfigFilePath}"
]
}
}
}
}
]

Просмотреть файл

@ -41,6 +41,8 @@
"interactiveQueryDefaultContainer" : "defaultdx",
"sparkClusterName" : "$sparkName",
"subscriptionId" : "keyvault://$servicesKVName/$clientSecretPrefix-subscriptionId",
"sparkConnectionString" : "keyvault://$servicesKVName/$serviceSecretPrefix-livyconnectionstring-$sparkName"
"sparkConnectionString" : "keyvault://$servicesKVName/$serviceSecretPrefix-livyconnectionstring-$sparkName",
"sparkRegion" : "$resourceLocation",
"sparkType" : "$sparkType"
}
]

Просмотреть файл

@ -0,0 +1,33 @@
{
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"default_resource_location": {
"value": "$resourceLocation"
},
"sparkClusterName": {
"value": "$sparkName"
},
"databricksSku": {
"value": "$databricksSku"
},
"virtualNetworkName": {
"value": "VNet-$serviceFabricName"
},
"subnetdb1Name": {
"value": "subnet-db-1"
},
"subnetdb1Prefix": {
"value": "10.0.1.0/24"
},
"subnetdb2Name": {
"value": "subnet-db-2"
},
"subnetdb2Prefix": {
"value": "10.0.2.0/24"
},
"dbResourceGroupName": {
"value": "$dbResourceGroupName"
}
}
}

Просмотреть файл

@ -32,9 +32,9 @@
"vaults_fabricRDPKV_name": {
"value": "$fabricRDPKVName"
},
"sourceVaultValue": {
"value": "$sfKVName"
},
"sourceVaultValue": {
"value": "$sfKVName"
},
"redis_name": {
"value": "$redisName"
},
@ -86,23 +86,29 @@
"certType": {
"value": "$certtype"
},
"virtualNetworkName": {
"value": "VNet-$serviceFabricName"
},
"addressPrefix": {
"value": "10.0.0.0/16"
},
"computeLocation": {
"value": "$resourceLocationForServiceFabric"
},
"subnet0Name": {
"value": "Subnet-0"
},
"subnet0Prefix": {
"value": "10.0.0.0/24"
},
"clusterName": {
"value": "$serviceFabricName"
}
"virtualNetworkName": {
"value": "VNet-$serviceFabricName"
},
"addressPrefix": {
"value": "10.0.0.0/16"
},
"computeLocation": {
"value": "$resourceLocationForServiceFabric"
},
"subnet0Name": {
"value": "Subnet-0"
},
"subnet0Prefix": {
"value": "10.0.0.0/24"
},
"clusterName": {
"value": "$serviceFabricName"
},
"sparkType": {
"value": "$sparkType"
},
"storageAccounts_spark_name": {
"value": "$sparkBlobAccountName"
}
}
}

Просмотреть файл

@ -52,8 +52,8 @@
"storageAccounts_spark_name": {
"value": "$sparkBlobAccountName"
},
"sparkManagedIdentity": {
"value": "$sparkManagedIdentityName"
"sparkManagedIdentity": {
"value": "$sparkManagedIdentityName"
},
"minInstanceCountSparkHeadnode": {
"value": $minInstanceCountSparkHeadnode
@ -73,11 +73,11 @@
"userAssignedIdentitiesName": {
"value": "$sparkManagedIdentityName"
},
"virtualNetworkName": {
"value": "VNet-$serviceFabricName"
},
"subnet0Name": {
"value": "Subnet-0"
}
"virtualNetworkName": {
"value": "VNet-$serviceFabricName"
},
"subnet0Name": {
"value": "Subnet-0"
}
}
}

Просмотреть файл

@ -0,0 +1,299 @@
{
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"default_resource_location": {
"type": "string"
},
"sparkClusterName": {
"type": "string",
},
"databricksSku": {
"type": "string",
},
"virtualNetworkName": {
"type": "string"
},
"subnetdb1Name": {
"type": "string"
},
"subnetdb1Prefix": {
"type": "string"
},
"subnetdb2Name": {
"type": "string"
},
"subnetdb2Prefix": {
"type": "string"
},
"dbResourceGroupName": {
"type": "string"
}
},
"variables": {
"azureRegionToControlPlaneIp": {
"australiacentral": "13.70.105.50/32",
"australiacentral2": "13.70.105.50/32",
"australiaeast": "13.70.105.50/32",
"australiasoutheast": "13.70.105.50/32",
"canadacentral": "40.85.223.25/32",
"canadaeast": "40.85.223.25/32",
"centralindia": "104.211.101.14/32",
"centralus": "23.101.152.95/32",
"eastasia": "52.187.0.85/32",
"eastus": "23.101.152.95/32",
"eastus2": "23.101.152.95/32",
"eastus2euap": "23.101.152.95/32",
"japaneast": "13.78.19.235/32",
"japanwest": "13.78.19.235/32",
"northcentralus": "23.101.152.95/32",
"northeurope": "23.100.0.135/32",
"southcentralus": "40.83.178.242/32",
"southeastasia": "52.187.0.85/32",
"southindia": "104.211.101.14/32",
"uksouth": "51.140.203.27/32",
"ukwest": "51.140.203.27/32",
"westcentralus": "40.83.178.242/32",
"westeurope": "23.100.0.135/32",
"westindia": "104.211.101.14/32",
"westus": "40.83.178.242/32",
"westus2": "40.83.178.242/32",
"koreacentral": "52.141.6.181/32",
"southafricanorth": "40.127.5.82/32"
},
"azureRegionToWebappIp": {
"australiacentral": "13.75.218.172/32",
"australiacentral2": "13.75.218.172/32",
"australiaeast": "13.75.218.172/32",
"australiasoutheast": "13.75.218.172/32",
"canadacentral": "13.71.184.74/32",
"canadaeast": "13.71.184.74/32",
"centralindia": "104.211.89.81/32",
"centralus": "40.70.58.221/32",
"eastasia": "52.187.145.107/32",
"eastus": "40.70.58.221/32",
"eastus2": "40.70.58.221/32",
"eastus2euap": "40.70.58.221/32",
"japaneast": "52.246.160.72/32",
"japanwest": "52.246.160.72/32",
"northcentralus": "40.70.58.221/32",
"northeurope": "52.232.19.246/32",
"southcentralus": "40.118.174.12/32",
"southeastasia": "52.187.145.107/32",
"southindia": "104.211.89.81/32",
"uksouth": "51.140.204.4/32",
"ukwest": "51.140.204.4/32",
"westcentralus": "40.118.174.12/32",
"westeurope": "52.232.19.246/32",
"westindia": "104.211.89.81/32",
"westus": "40.118.174.12/32",
"westus2": "40.118.174.12/32",
"koreacentral": "52.141.22.164/32",
"southafricanorth": "102.133.224.24/32"
},
"controlPlaneIp": "[variables('azureRegionToControlPlaneIp')[parameters('default_resource_location')]]",
"webappIp": "[variables('azureRegionToWebappIp')[parameters('default_resource_location')]]",
"nsgName": "[concat(parameters('sparkClusterName'), '-db-nsg')]",
"nsgId": "[resourceId('Microsoft.Network/networkSecurityGroups', variables('nsgName'))]",
"managedResourceGroupName": "[concat('databricks-rg-', parameters('sparkClusterName'), '-', uniqueString(parameters('sparkClusterName'), resourceGroup().id))]"
},
"resources": [
{
"apiVersion": "2018-02-01",
"type": "Microsoft.Network/networkSecurityGroups",
"location": "[parameters('default_resource_location')]",
"name": "[variables('nsgName')]",
"properties": {
"securityRules": [
{
"name": "databricks-worker-to-worker",
"properties": {
"access": "Allow",
"description": "Required for worker nodes communication within a cluster.",
"destinationAddressPrefix": "*",
"destinationPortRange": "*",
"direction": "Inbound",
"priority": 105,
"protocol": "*",
"sourceAddressPrefix": "VirtualNetwork",
"sourcePortRange": "*"
}
},
{
"name": "databricks-control-plane-ssh",
"properties": {
"access": "Allow",
"description": "Required for Databricks control plane management of worker nodes.",
"destinationAddressPrefix": "*",
"destinationPortRange": "22",
"direction": "Inbound",
"priority": 106,
"protocol": "*",
"sourceAddressPrefix": "[variables('controlPlaneIp')]",
"sourcePortRange": "*"
}
},
{
"name": "databricks-control-plane-worker-proxy",
"properties": {
"access": "Allow",
"description": "Required for Databricks control plane communication with worker nodes.",
"destinationAddressPrefix": "*",
"destinationPortRange": "5557",
"direction": "Inbound",
"priority": 107,
"protocol": "*",
"sourceAddressPrefix": "[variables('controlPlaneIp')]",
"sourcePortRange": "*"
}
},
{
"name": "databricks-worker-to-webapp",
"properties": {
"access": "Allow",
"description": "Required for workers communication with Databricks Webapp.",
"destinationAddressPrefix": "[variables('webappIp')]",
"destinationPortRange": "*",
"direction": "Outbound",
"priority": 100,
"protocol": "*",
"sourceAddressPrefix": "*",
"sourcePortRange": "*"
}
},
{
"name": "databricks-worker-to-sql",
"properties": {
"access": "Allow",
"description": "Required for workers communication with Azure SQL services.",
"destinationAddressPrefix": "Sql",
"destinationPortRange": "*",
"direction": "Outbound",
"priority": 101,
"protocol": "*",
"sourceAddressPrefix": "*",
"sourcePortRange": "*"
}
},
{
"name": "databricks-worker-to-storage",
"properties": {
"access": "Allow",
"description": "Required for workers communication with Azure Storage services.",
"destinationAddressPrefix": "Storage",
"destinationPortRange": "*",
"direction": "Outbound",
"priority": 102,
"protocol": "*",
"sourceAddressPrefix": "*",
"sourcePortRange": "*"
}
},
{
"name": "databricks-worker-to-worker-outbound",
"properties": {
"access": "Allow",
"description": "Required for worker nodes communication within a cluster.",
"destinationAddressPrefix": "VirtualNetwork",
"destinationPortRange": "*",
"direction": "Outbound",
"priority": 103,
"protocol": "*",
"sourceAddressPrefix": "*",
"sourcePortRange": "*"
}
},
{
"name": "databricks-worker-to-any",
"properties": {
"access": "Allow",
"description": "Required for worker nodes communication with any destination.",
"destinationAddressPrefix": "*",
"destinationPortRange": "*",
"direction": "Outbound",
"priority": 104,
"protocol": "*",
"sourceAddressPrefix": "*",
"sourcePortRange": "*"
}
}
]
}
},
{
"apiVersion": "2017-05-10",
"name": "nestedTemplate",
"type": "Microsoft.Resources/deployments",
"dependsOn": [
"[concat('Microsoft.Network/networkSecurityGroups/', concat(parameters('sparkClusterName'), '-db-nsg'))]"
],
"resourceGroup": "[parameters('dbResourceGroupName')]",
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {},
"variables": {},
"resources": [
{
"apiVersion": "2018-04-01",
"type": "Microsoft.Network/virtualNetworks/subnets",
"name": "[concat(parameters('virtualNetworkName'), '/', parameters('subnetdb1Name'))]",
"location": "[parameters('default_resource_location')]",
"properties": {
"addressPrefix": "[parameters('subnetdb1Prefix')]",
"networkSecurityGroup": {
"id": "[variables('nsgId')]"
}
}
},
{
"apiVersion": "2018-04-01",
"type": "Microsoft.Network/virtualNetworks/subnets",
"name": "[concat(parameters('virtualNetworkName'), '/', parameters('subnetdb2Name'))]",
"location": "[parameters('default_resource_location')]",
"dependsOn": [
"[concat('Microsoft.Network/virtualNetworks/', parameters('virtualNetworkName'), '/subnets/', parameters('subnetdb1Name'))]"
],
"properties": {
"addressPrefix": "[parameters('subnetdb2Prefix')]",
"networkSecurityGroup": {
"id": "[variables('nsgId')]"
}
}
}
]
},
"parameters": {}
}
},
{
"type": "Microsoft.Databricks/workspaces",
"name": "[parameters('sparkClusterName')]",
"location": "[parameters('default_resource_location')]",
"apiVersion": "2018-04-01",
"dependsOn": [
"[concat('Microsoft.Network/networkSecurityGroups/', variables('nsgName'))]",
"['Microsoft.Resources/deployments/nestedTemplate']"
],
"sku": {
"name": "[parameters('databricksSku')]"
},
"properties": {
"ManagedResourceGroupId": "[concat(subscription().id, '/resourceGroups/', variables('managedResourceGroupName'))]",
"parameters": {
"customVirtualNetworkId": {
"value": "[resourceId('Microsoft.Network/virtualNetworks', parameters('virtualNetworkName'))]"
},
"customPublicSubnetName": {
"value": "[parameters('subnetdb1Name')]"
},
"customPrivateSubnetName": {
"value": "[parameters('subnetdb2Name')]"
}
}
}
}
]
}

Просмотреть файл

@ -32,9 +32,9 @@
"vaults_fabricRDPKV_name": {
"type": "String"
},
"sourceVaultValue": {
"type": "string"
},
"sourceVaultValue": {
"type": "string"
},
"redis_name": {
"type": "String"
},
@ -86,35 +86,74 @@
"certType": {
"type": "String"
},
"virtualNetworkName": {
"type": "string"
},
"addressPrefix": {
"type": "string"
},
"computeLocation": {
"type": "string"
},
"subnet0Name": {
"type": "string"
},
"subnet0Prefix": {
"type": "string"
},
"clusterName": {
"type": "string"
}
"sparkType": {
"type": "String"
},
"virtualNetworkName": {
"type": "string"
},
"addressPrefix": {
"type": "string"
},
"computeLocation": {
"type": "string"
},
"subnet0Name": {
"type": "string"
},
"subnet0Prefix": {
"type": "string"
},
"clusterName": {
"type": "string"
},
"storageAccounts_spark_name": {
"type": "String"
}
},
"variables": {
"vNetApiVersion": "2015-06-15"
"vNetApiVersion": "2015-06-15",
"storageAccounts_containerName_defaultdx": "defaultdx",
"storageAccounts_containerName_diagnostics": "diagnostics",
"storageAccounts_containerName_deployment": "deployment",
"storageAccounts_containerName_scripts": "scripts",
"storageAccounts_containerName_samples": "samples",
"storageAccounts_containerName_usercontent": "usercontent",
"storageAccounts_containerName_flow": "flows",
"storageAccounts_containerName_checkpoints": "checkpoints"
},
"resources": [
{
"type": "Microsoft.Network/networkSecurityGroups",
"apiVersion": "2018-02-01",
"name": "[concat(parameters('virtualNetworkName'),'-nsg')]",
"location": "[parameters('computeLocation')]",
"properties": {
"securityRules": [
{
"name": "port_443",
"properties": {
"protocol": "*",
"sourcePortRange": "*",
"destinationPortRange": "443",
"sourceAddressPrefix": "*",
"destinationAddressPrefix": "*",
"access": "Allow",
"priority": 100,
"direction": "Inbound"
}
}
]
}
},
{
"apiVersion": "[variables('vNetApiVersion')]",
"type": "Microsoft.Network/virtualNetworks",
"name": "[parameters('virtualNetworkName')]",
"location": "[parameters('computeLocation')]",
"dependsOn": [],
"dependsOn": [
"[concat('Microsoft.Network/networkSecurityGroups/', concat(parameters('virtualNetworkName'),'-nsg'))]"
],
"properties": {
"addressSpace": {
"addressPrefixes": [
@ -125,7 +164,10 @@
{
"name": "[parameters('subnet0Name')]",
"properties": {
"addressPrefix": "[parameters('subnet0Prefix')]"
"addressPrefix": "[parameters('subnet0Prefix')]",
"networkSecurityGroup" : {
"id": "[resourceId('Microsoft.Network/networkSecurityGroups', concat(parameters('virtualNetworkName'),'-nsg'))]"
}
}
}
]
@ -598,6 +640,10 @@
{
"name": "DATAXDEV_CERTTYPE",
"value": "[parameters('certType')]"
},
{
"name": "DATAX_SPARK_TYPE",
"value": "[parameters('sparkType')]"
}
]
},
@ -812,6 +858,81 @@
"dependsOn": [
"[resourceId('Microsoft.Web/sites', parameters('sites_web_name'))]"
]
},
{
"type": "Microsoft.Storage/storageAccounts",
"name": "[parameters('storageAccounts_spark_name')]",
"apiVersion": "2015-05-01-preview",
"location": "[parameters('default_resource_location')]",
"properties": {
"accountType": "Standard_LRS"
},
"resources": [
{
"name": "[concat('default/', variables('storageAccounts_containerName_defaultdx'))]",
"type": "blobServices/containers",
"apiVersion": "2018-03-01-preview",
"dependsOn": [
"[parameters('storageAccounts_spark_name')]"
]
},
{
"name": "[concat('default/', variables('storageAccounts_containerName_diagnostics'))]",
"type": "blobServices/containers",
"apiVersion": "2018-03-01-preview",
"dependsOn": [
"[parameters('storageAccounts_spark_name')]"
]
},
{
"name": "[concat('default/', variables('storageAccounts_containerName_deployment'))]",
"type": "blobServices/containers",
"apiVersion": "2018-03-01-preview",
"dependsOn": [
"[parameters('storageAccounts_spark_name')]"
]
},
{
"name": "[concat('default/', variables('storageAccounts_containerName_scripts'))]",
"type": "blobServices/containers",
"apiVersion": "2018-03-01-preview",
"dependsOn": [
"[parameters('storageAccounts_spark_name')]"
]
},
{
"name": "[concat('default/', variables('storageAccounts_containerName_samples'))]",
"type": "blobServices/containers",
"apiVersion": "2018-03-01-preview",
"dependsOn": [
"[parameters('storageAccounts_spark_name')]"
]
},
{
"name": "[concat('default/', variables('storageAccounts_containerName_usercontent'))]",
"type": "blobServices/containers",
"apiVersion": "2018-03-01-preview",
"dependsOn": [
"[parameters('storageAccounts_spark_name')]"
]
},
{
"name": "[concat('default/', variables('storageAccounts_containerName_flow'))]",
"type": "blobServices/containers",
"apiVersion": "2018-03-01-preview",
"dependsOn": [
"[parameters('storageAccounts_spark_name')]"
]
},
{
"name": "[concat('default/', variables('storageAccounts_containerName_checkpoints'))]",
"type": "blobServices/containers",
"apiVersion": "2018-03-01-preview",
"dependsOn": [
"[parameters('storageAccounts_spark_name')]"
]
}
]
}
]
}

Просмотреть файл

@ -55,8 +55,8 @@
"storageAccounts_spark_name": {
"type": "String"
},
"sparkManagedIdentity": {
"type": "string"
"sparkManagedIdentity": {
"type": "string"
},
"minInstanceCountSparkHeadnode": {
"type": "int"
@ -76,42 +76,35 @@
"userAssignedIdentitiesName": {
"type": "string"
},
"virtualNetworkName": {
"type": "string"
},
"subnet0Name": {
"type": "string"
}
"virtualNetworkName": {
"type": "string"
},
"subnet0Name": {
"type": "string"
}
},
"variables": {
"storageAccounts_containerName_scripts": "scripts",
"storageAccounts_containerName_deployment": "deployment",
"storageAccounts_containerName_diagnostics": "diagnostics",
"storageAccounts_containerName_samples": "samples",
"storageAccounts_containerName_usercontent": "usercontent",
"storageAccounts_containerName_flow": "flows",
"storageAccounts_containerName_checkpoints": "checkpoints",
"storageAccountsURI": "[concat(parameters('storageAccounts_spark_name'),'.blob.core.windows.net')]",
"storageAccountsResourceID": "[resourceId('Microsoft.Storage/storageAccounts',parameters('storageAccounts_spark_name'))]",
"storageAccountsResourceID": "[resourceId('Microsoft.Storage/storageAccounts',parameters('storageAccounts_spark_name'))]",
"userAssignedIdentitiesResourceID": "[resourceId('Microsoft.ManagedIdentity/userAssignedIdentities',parameters('userAssignedIdentitiesName'))]",
"vnetID": "[resourceId('Microsoft.Network/virtualNetworks',parameters('virtualNetworkName'))]",
"subnet0Ref": "[concat(variables('vnetID'),'/subnets/',parameters('subnet0Name'))]"
"vnetID": "[resourceId('Microsoft.Network/virtualNetworks',parameters('virtualNetworkName'))]",
"subnet0Ref": "[concat(variables('vnetID'),'/subnets/',parameters('subnet0Name'))]"
},
"resources": [
{
"apiVersion": "2015-08-31-preview",
"name": "[parameters('sparkManagedIdentity')]",
"location": "[parameters('default_resource_location')]",
"type": "Microsoft.ManagedIdentity/userAssignedIdentities",
"properties": {}
},
"resources": [
{
"apiVersion": "2015-08-31-preview",
"name": "[parameters('sparkManagedIdentity')]",
"location": "[parameters('default_resource_location')]",
"type": "Microsoft.ManagedIdentity/userAssignedIdentities",
"properties": {}
},
{
"apiVersion": "2015-03-01-preview",
"name": "[parameters('sparkClusterName')]",
"type": "Microsoft.HDInsight/clusters",
"location": "[parameters('default_resource_location')]",
"dependsOn": [
"[variables('storageAccountsResourceID')]"
"[variables('userAssignedIdentitiesResourceID')]"
],
"properties": {
"clusterVersion": "[parameters('sparkClusterVersion')]",
@ -190,73 +183,6 @@
"[variables('userAssignedIdentitiesResourceID')]": {}
}
}
},
{
"type": "Microsoft.Storage/storageAccounts",
"name": "[parameters('storageAccounts_spark_name')]",
"apiVersion": "2015-05-01-preview",
"location": "[parameters('default_resource_location')]",
"properties": {
"accountType": "Standard_LRS"
},
"resources": [
{
"name": "[concat('default/', variables('storageAccounts_containerName_diagnostics'))]",
"type": "blobServices/containers",
"apiVersion": "2018-03-01-preview",
"dependsOn": [
"[parameters('storageAccounts_spark_name')]"
]
},
{
"name": "[concat('default/', variables('storageAccounts_containerName_deployment'))]",
"type": "blobServices/containers",
"apiVersion": "2018-03-01-preview",
"dependsOn": [
"[parameters('storageAccounts_spark_name')]"
]
},
{
"name": "[concat('default/', variables('storageAccounts_containerName_scripts'))]",
"type": "blobServices/containers",
"apiVersion": "2018-03-01-preview",
"dependsOn": [
"[parameters('storageAccounts_spark_name')]"
]
},
{
"name": "[concat('default/', variables('storageAccounts_containerName_samples'))]",
"type": "blobServices/containers",
"apiVersion": "2018-03-01-preview",
"dependsOn": [
"[parameters('storageAccounts_spark_name')]"
]
},
{
"name": "[concat('default/', variables('storageAccounts_containerName_usercontent'))]",
"type": "blobServices/containers",
"apiVersion": "2018-03-01-preview",
"dependsOn": [
"[parameters('storageAccounts_spark_name')]"
]
},
{
"name": "[concat('default/', variables('storageAccounts_containerName_flow'))]",
"type": "blobServices/containers",
"apiVersion": "2018-03-01-preview",
"dependsOn": [
"[parameters('storageAccounts_spark_name')]"
]
},
{
"name": "[concat('default/', variables('storageAccounts_containerName_checkpoints'))]",
"type": "blobServices/containers",
"apiVersion": "2018-03-01-preview",
"dependsOn": [
"[parameters('storageAccounts_spark_name')]"
]
}
]
}
]
}

Просмотреть файл

@ -163,6 +163,28 @@ function Get-Tokens {
$tokens.Add('tenantId', $tenantId )
$tokens.Add('userId', $userId )
$sparkType = 'hdinsight'
$keyvaultPrefix = 'keyvault'
$dataxJobTemplate = 'DataXDirect'
$dataxKafkaJobTemplate = 'kafkaDataXDirect'
$dataxBatchJobTemplate = 'DataXBatch'
if ($useDatabricks -eq 'y') {
$sparkType = 'databricks'
$keyvaultPrefix = 'secretscope'
$dataxJobTemplate = 'DataXDirectDatabricks'
$dataxKafkaJobTemplate = 'kafkaDataXDirectDatabricks'
$dataxBatchJobTemplate = 'DataXBatchDatabricks'
$tokens.Add('databricksClusterSparkVersion', $databricksClusterSparkVersion)
$tokens.Add('databricksClusterNodeType', $databricksClusterNodeType)
$tokens.Add('databricksSku', $databricksSku)
$tokens.Add('dbResourceGroupName', $resourceGroupName)
}
$tokens.Add('sparkType', $sparkType)
$tokens.Add('keyvaultPrefix', $keyvaultPrefix)
$tokens.Add('dataxJobTemplate', $dataxJobTemplate)
$tokens.Add('dataxKafkaJobTemplate', $dataxKafkaJobTemplate)
$tokens.Add('dataxBatchJobTemplate', $dataxBatchJobTemplate)
# CosmosDB
$tokens.Add('blobopsconnectionString', $blobopsconnectionString )
@ -491,6 +513,9 @@ function Setup-SecretsForSpark {
$secretName = $prefix + "livyconnectionstring-" + $sparkName
$tValue = "endpoint=https://$sparkName.azurehdinsight.net/livy;username=$sparkLogin;password=$sparkPwd"
if ($useDatabricks -eq 'y') {
$tValue = "endpoint=https://$resourceGroupLocation.azuredatabricks.net/api/2.0/;dbtoken="
}
Setup-Secret -VaultName $vaultName -SecretName $secretName -Value $tValue
}
@ -618,8 +643,7 @@ function Setup-Secrets {
function Setup-KVAccess {
# Get ObjectId of web app
$servicePrincipalId = az resource show -g $resourceGroupName --name $websiteName --resource-type Microsoft.Web/sites --query identity.principalId
# Get ObjectId of sparkManagedIdentityName
$SparkManagedIdentityId = az resource show -g $resourceGroupName --name $sparkManagedIdentityName --resource-type Microsoft.ManagedIdentity/userAssignedIdentities --query properties.principalId
# Get ObjectId of vmss
$vmssId = az resource show -g $resourceGroupName --name $vmNodeTypeName --resource-type Microsoft.Compute/virtualMachineScaleSets --query identity.principalId
@ -633,14 +657,18 @@ function Setup-KVAccess {
}
az keyvault set-policy --name $servicesKVName --object-id $servicePrincipalId --secret-permissions get, list, set > $null 2>&1
az keyvault set-policy --name $servicesKVName --object-id $servicePrincipalConfiggenId --secret-permissions get, list, set > $null 2>&1
az keyvault set-policy --name $servicesKVName --object-id $SparkManagedIdentityId --secret-permissions get, list, set > $null 2>&1
az keyvault set-policy --name $servicesKVName --object-id $servicePrincipalConfiggenId --secret-permissions get, list, set > $null 2>&1
az keyvault set-policy --name $servicesKVName --object-id $vmssId --secret-permissions get, list, set > $null 2>&1
az keyvault set-policy --name $sparkKVName --object-id $servicePrincipalId --secret-permissions get, list, set > $null 2>&1
az keyvault set-policy --name $sparkKVName --object-id $servicePrincipalConfiggenId --secret-permissions get, list, set, delete > $null 2>&1
az keyvault set-policy --name $sparkKVName --object-id $SparkManagedIdentityId --secret-permissions get, list, set > $null 2>&1
az keyvault set-policy --name $sparkKVName --object-id $vmssId --secret-permissions get, list, set > $null 2>&1
if($useDatabricks -eq 'n') {
# Get ObjectId of sparkManagedIdentityName
$SparkManagedIdentityId = az resource show -g $resourceGroupName --name $sparkManagedIdentityName --resource-type Microsoft.ManagedIdentity/userAssignedIdentities --query properties.principalId
az keyvault set-policy --name $servicesKVName --object-id $SparkManagedIdentityId --secret-permissions get, list, set > $null 2>&1
az keyvault set-policy --name $sparkKVName --object-id $SparkManagedIdentityId --secret-permissions get, list, set > $null 2>&1
}
}
# Import SSL Cert To Service Fabric
@ -810,12 +838,18 @@ if($resourceCreation -eq 'y') {
}
if($sparkCreation -eq 'y') {
Write-Host -ForegroundColor Green "Deploying resources (2/16 steps): A HDInsight cluster will be deployed"
Write-Host -ForegroundColor Green "Estimated time to complete: 20 mins"
Write-Host -ForegroundColor Green "Deploying resources (2/16 steps): A spark cluster will be deployed"
Setup-SecretsForSpark
$tokens = Get-Tokens
Deploy-Resources -templateName "Spark-Template.json" -paramName "Spark-parameter.json" -templatePath $templatePath -tokens $tokens
if ($useDatabricks -eq 'n') {
Write-Host -ForegroundColor Green "Estimated time to complete: 20 mins"
Deploy-Resources -templateName "Spark-Template.json" -paramName "Spark-parameter.json" -templatePath $templatePath -tokens $tokens
}
else {
Write-Host -ForegroundColor Green "Estimated time to complete: 5 mins"
Deploy-Resources -templateName "Databricks-Template.json" -paramName "Databricks-Parameter.json" -templatePath $templatePath -tokens $tokens
}
}
# Preparing certs...
@ -893,9 +927,11 @@ if ($setupSecrets -eq 'y') {
# Spark
if ($sparkCreation -eq 'y') {
Write-Host -ForegroundColor Green "Setting up ScriptActions... (6/16 steps)"
Write-Host -ForegroundColor Green "Estimated time to complete: 2 mins"
Add-ScriptActions
Write-Host -ForegroundColor Green "Setting up ScriptActions... (6/16 steps)"
if ($useDatabricks -eq 'n') {
Write-Host -ForegroundColor Green "Estimated time to complete: 2 mins"
Add-ScriptActions
}
}
# cosmosDB

Просмотреть файл

@ -136,7 +136,7 @@
"name": "zookeepernode",
"targetInstanceCount": 3,
"hardwareProfile": {
"vmSize": "A5"
"vmSize": "Standard_A4_V2"
},
"osProfile": {
"linuxOperatingSystemProfile": {

Просмотреть файл

@ -24,7 +24,7 @@
},
"process" : {
"metric" : {
"eventhub" : "keyvault://${sparkKeyVaultName}/${metricEventHubConnectionStringKey}"
"eventhub" : "$keyvaultPrefix://${sparkKeyVaultName}/${metricEventHubConnectionStringKey}"
},
"timestampColumn" : "${processTimestampColumn}",
"watermark" : "${processWatermark}",
@ -48,6 +48,10 @@
"sparkJobExecutorLogLevel" : "INFO",
"sparkJobNumExecutors" : "${guiSparkJobNumExecutors}",
"sparkJobExecutorMemory" : "${guiSparkJobExecutorMemory}",
"sparkJobDatabricksMinWorkers" : "${guiSparkJobDatabricksMinWorkers}",
"sparkJobDatabricksMaxWorkers" : "${guiSparkJobDatabricksMaxWorkers}",
"sparkDatabricksToken" : "${guiSparkDatabricksToken}",
"sparkJobDatabricksAutoScale" : "${guiSparkJobDatabricksAutoScale}",
"processedSchemaPath" : null
},
"jobs" : [
@ -362,13 +366,14 @@
"gui" : {
"name" : "eventhub",
"displayName" : "EventHub",
"databricksToken" : "",
"owner" : "eventhub",
"input" : {
"mode" : "streaming",
"type" : "events",
"properties" : {
"inputEventhubName" : "",
"inputEventhubConnection" : "keyvault://$sparkKVName/eventhub-input-eventhubconnectionstring",
"inputEventhubConnection" : "$keyvaultPrefix://$sparkKVName/eventhub-input-eventhubconnectionstring",
"windowDuration" : "60",
"timestampColumn" : "eventTimeStamp",
"watermarkValue" : "60",
@ -384,7 +389,7 @@
"type" : "csv",
"typeDisplay" : "CSV/TSV File",
"properties" : {
"path" : "keyvault://$sparkKVName/eventhub-referencedata-devicesdata",
"path" : "$keyvaultPrefix://$sparkKVName/eventhub-referencedata-devicesdata",
"delimiter" : ",",
"header" : true
}
@ -400,7 +405,7 @@
"type" : "jarUDF",
"properties" : {
"name" : null,
"path" : "keyvault://$sparkKVName/eventhub-jarpath-udfsample",
"path" : "$keyvaultPrefix://$sparkKVName/eventhub-jarpath-udfsample",
"class" : "datax.sample.udf.UdfHelloWorld",
"libs" : [ ]
},
@ -412,7 +417,10 @@
],
"jobconfig" : {
"jobNumExecutors" : "4",
"jobExecutorMemory" : "1024"
"jobExecutorMemory" : "1024",
"jobDatabricksAutoScale" : true,
"jobDatabricksMinWorkers" : "1",
"jobDatabricksMaxWorkers" : "8"
}
},
"outputs" : [
@ -526,7 +534,7 @@
"subscription" : ""
},
"properties" : {
"inputEventHubSubscriptionId" : "keyvault://${serviceKeyVaultName}/$clientSecretPrefix-subscriptionId",
"inputEventHubSubscriptionId" : "$keyvaultPrefix://${serviceKeyVaultName}/$clientSecretPrefix-subscriptionId",
"inputEventHubResourceGroupName" : "${eventHubResourceGroupName}"
},
"jobNames" : [

Просмотреть файл

@ -25,7 +25,7 @@
},
"process" : {
"metric" : {
"eventhub" : "keyvault://${sparkKeyVaultName}/${metricEventHubConnectionStringKey}"
"eventhub" : "$keyvaultPrefix://${sparkKeyVaultName}/${metricEventHubConnectionStringKey}"
},
"timestampColumn" : "${processTimestampColumn}",
"watermark" : "${processWatermark}",
@ -49,6 +49,10 @@
"sparkJobExecutorLogLevel" : "INFO",
"sparkJobNumExecutors" : "${guiSparkJobNumExecutors}",
"sparkJobExecutorMemory" : "${guiSparkJobExecutorMemory}",
"sparkJobDatabricksMinWorkers" : "${guiSparkJobDatabricksMinWorkers}",
"sparkJobDatabricksMaxWorkers" : "${guiSparkJobDatabricksMaxWorkers}",
"sparkDatabricksToken" : "${guiSparkDatabricksToken}",
"sparkJobDatabricksAutoScale" : "${guiSparkJobDatabricksAutoScale}",
"processedSchemaPath" : null
},
"jobs" : [
@ -363,13 +367,14 @@
"gui" : {
"name" : "eventhubkafka",
"displayName" : "EventhubKafka",
"databricksToken" : "",
"owner" : "eventhubkafka",
"input" : {
"mode" : "streaming",
"type" : "kafkaeventhub",
"properties" : {
"inputEventhubName" : "kafka1,kafka2",
"inputEventhubConnection" : "keyvault://$sparkKVName/eventhubkafka-input-eventhubconnectionstring",
"inputEventhubConnection" : "$keyvaultPrefix://$sparkKVName/eventhubkafka-input-eventhubconnectionstring",
"windowDuration" : "60",
"timestampColumn" : "eventTimeStamp",
"watermarkValue" : "60",
@ -385,7 +390,7 @@
"type" : "csv",
"typeDisplay" : "CSV/TSV File",
"properties" : {
"path" : "keyvault://$sparkKVName/eventhubkafka-referencedata-devicesdata",
"path" : "$keyvaultPrefix://$sparkKVName/eventhubkafka-referencedata-devicesdata",
"delimiter" : ",",
"header" : true
}
@ -401,7 +406,7 @@
"type" : "jarUDF",
"properties" : {
"name" : null,
"path" : "keyvault://$sparkKVName/eventhubkafka-jarpath-udfsample",
"path" : "$keyvaultPrefix://$sparkKVName/eventhubkafka-jarpath-udfsample",
"class" : "datax.sample.udf.UdfHelloWorld",
"libs" : [ ]
},
@ -413,7 +418,10 @@
],
"jobconfig" : {
"jobNumExecutors" : "4",
"jobExecutorMemory" : "1024"
"jobExecutorMemory" : "1024",
"jobDatabricksAutoScale" : true,
"jobDatabricksMinWorkers" : "1",
"jobDatabricksMaxWorkers" : "8"
}
},
"outputs" : [
@ -527,7 +535,7 @@
"subscription" : ""
},
"properties" : {
"inputEventHubSubscriptionId" : "keyvault://${serviceKeyVaultName}/$clientSecretPrefix-subscriptionId",
"inputEventHubSubscriptionId" : "$keyvaultPrefix://${serviceKeyVaultName}/$clientSecretPrefix-subscriptionId",
"inputEventHubResourceGroupName" : "${eventHubResourceGroupName}"
},
"jobNames" : [

Просмотреть файл

@ -24,7 +24,7 @@
},
"process" : {
"metric" : {
"eventhub" : "keyvault://${sparkKeyVaultName}/${metricEventHubConnectionStringKey}"
"eventhub" : "$keyvaultPrefix://${sparkKeyVaultName}/${metricEventHubConnectionStringKey}"
},
"timestampColumn" : "${processTimestampColumn}",
"watermark" : "${processWatermark}",
@ -48,6 +48,10 @@
"sparkJobExecutorLogLevel" : "INFO",
"sparkJobNumExecutors" : "${guiSparkJobNumExecutors}",
"sparkJobExecutorMemory" : "${guiSparkJobExecutorMemory}",
"sparkJobDatabricksMinWorkers" : "${guiSparkJobDatabricksMinWorkers}",
"sparkJobDatabricksMaxWorkers" : "${guiSparkJobDatabricksMaxWorkers}",
"sparkDatabricksToken" : "${guiSparkDatabricksToken}",
"sparkJobDatabricksAutoScale" : "${guiSparkJobDatabricksAutoScale}",
"processedSchemaPath" : null
},
"jobs" : [
@ -362,13 +366,14 @@
"gui" : {
"name" : "iotsample",
"displayName" : "IoT Sample",
"databricksToken" : "",
"owner" : "iotsample",
"input" : {
"mode" : "streaming",
"type" : "iothub",
"properties" : {
"inputEventhubName" : "$iotHubName",
"inputEventhubConnection" : "keyvault://$sparkKVName/iotsample-input-eventhubconnectionstring",
"inputEventhubConnection" : "$keyvaultPrefix://$sparkKVName/iotsample-input-eventhubconnectionstring",
"windowDuration" : "60",
"timestampColumn" : "eventTimeStamp",
"watermarkValue" : "60",
@ -384,7 +389,7 @@
"type" : "csv",
"typeDisplay" : "CSV/TSV File",
"properties" : {
"path" : "keyvault://$sparkKVName/iotsample-referencedata-devicesdata",
"path" : "$keyvaultPrefix://$sparkKVName/iotsample-referencedata-devicesdata",
"delimiter" : ",",
"header" : true
}
@ -400,7 +405,7 @@
"type" : "jarUDF",
"properties" : {
"name" : null,
"path" : "keyvault://$sparkKVName/iotsample-jarpath-udfsample",
"path" : "$keyvaultPrefix://$sparkKVName/iotsample-jarpath-udfsample",
"class" : "datax.sample.udf.UdfHelloWorld",
"libs" : [ ]
},
@ -412,7 +417,10 @@
],
"jobconfig" : {
"jobNumExecutors" : "4",
"jobExecutorMemory" : "1024"
"jobExecutorMemory" : "1024",
"jobDatabricksAutoScale" : true,
"jobDatabricksMinWorkers" : "1",
"jobDatabricksMaxWorkers" : "8"
}
},
"outputs" : [
@ -526,7 +534,7 @@
"subscription" : ""
},
"properties" : {
"inputEventHubSubscriptionId" : "keyvault://${serviceKeyVaultName}/$clientSecretPrefix-subscriptionId",
"inputEventHubSubscriptionId" : "$keyvaultPrefix://${serviceKeyVaultName}/$clientSecretPrefix-subscriptionId",
"inputEventHubResourceGroupName" : "${eventHubResourceGroupName}"
},
"jobNames" : [

Просмотреть файл

@ -25,7 +25,7 @@
},
"process" : {
"metric" : {
"eventhub" : "keyvault://${sparkKeyVaultName}/${metricEventHubConnectionStringKey}"
"eventhub" : "$keyvaultPrefix://${sparkKeyVaultName}/${metricEventHubConnectionStringKey}"
},
"timestampColumn" : "${processTimestampColumn}",
"watermark" : "${processWatermark}",
@ -49,6 +49,10 @@
"sparkJobExecutorLogLevel" : "INFO",
"sparkJobNumExecutors" : "${guiSparkJobNumExecutors}",
"sparkJobExecutorMemory" : "${guiSparkJobExecutorMemory}",
"sparkJobDatabricksMinWorkers" : "${guiSparkJobDatabricksMinWorkers}",
"sparkJobDatabricksMaxWorkers" : "${guiSparkJobDatabricksMaxWorkers}",
"sparkDatabricksToken" : "${guiSparkDatabricksToken}",
"sparkJobDatabricksAutoScale" : "${guiSparkJobDatabricksAutoScale}",
"processedSchemaPath" : null
},
"jobs" : [
@ -363,13 +367,14 @@
"gui" : {
"name" : "nativekafka",
"displayName" : "NativeKafka",
"databricksToken" : "",
"owner" : "nativekafka",
"input" : {
"mode" : "streaming",
"type" : "kafka",
"properties" : {
"inputEventhubName" : "kafka1,kafka2",
"inputEventhubConnection" : "keyvault://$sparkKVName/nativekafka-input-eventhubconnectionstring",
"inputEventhubConnection" : "$keyvaultPrefix://$sparkKVName/nativekafka-input-eventhubconnectionstring",
"windowDuration" : "60",
"timestampColumn" : "eventTimeStamp",
"watermarkValue" : "60",
@ -385,7 +390,7 @@
"type" : "csv",
"typeDisplay" : "CSV/TSV File",
"properties" : {
"path" : "keyvault://$sparkKVName/nativekafka-referencedata-devicesdata",
"path" : "$keyvaultPrefix://$sparkKVName/nativekafka-referencedata-devicesdata",
"delimiter" : ",",
"header" : true
}
@ -401,7 +406,7 @@
"type" : "jarUDF",
"properties" : {
"name" : null,
"path" : "keyvault://$sparkKVName/nativekafka-jarpath-udfsample",
"path" : "$keyvaultPrefix://$sparkKVName/nativekafka-jarpath-udfsample",
"class" : "datax.sample.udf.UdfHelloWorld",
"libs" : [ ]
},
@ -413,7 +418,10 @@
],
"jobconfig" : {
"jobNumExecutors" : "4",
"jobExecutorMemory" : "1024"
"jobExecutorMemory" : "1024",
"jobDatabricksAutoScale" : true,
"jobDatabricksMinWorkers" : "1",
"jobDatabricksMaxWorkers" : "8"
}
},
"outputs" : [
@ -527,7 +535,7 @@
"subscription" : ""
},
"properties" : {
"inputEventHubSubscriptionId" : "keyvault://${serviceKeyVaultName}/$clientSecretPrefix-subscriptionId",
"inputEventHubSubscriptionId" : "$keyvaultPrefix://${serviceKeyVaultName}/$clientSecretPrefix-subscriptionId",
"inputEventHubResourceGroupName" : "${eventHubResourceGroupName}"
},
"jobNames" : [

Просмотреть файл

@ -24,7 +24,8 @@
"cosmosDBConfigDatabaseName": "keyvault://$servicesKVName/$serviceSecretPrefix-configgenconfigsdatabasename",
"cosmosDBConfigCollectionName": "configgenConfigs",
"AppInsightsIntrumentationKey": "$serviceSecretPrefix-aiInstrumentationKey",
"CACertificateLocation": "keyvault://$servicesKVName/$serviceSecretPrefix-cacertificatelocation"
"CACertificateLocation": "keyvault://$servicesKVName/$serviceSecretPrefix-cacertificatelocation",
"SparkType": "$sparkType"
}
}
}

Просмотреть файл

@ -20,6 +20,18 @@ subscriptionId=
# ResourceGroupName to generate resources
resourceGroupName=DataX
# y if you want to run spark jobs on Databricks, n for running jobs on HDinsight
useDatabricks=n
#Databricks cluster spark version
databricksClusterSparkVersion=5.3.x-scala2.11
#Databricks cluster node type id
databricksClusterNodeType=Standard_DS3_v2
#Databricks SKU
databricksSku=premium
# y if you want to deploy Kafka sample, n otherwise
# This will deploy the Kafka specific samples and resources such as HDinsight Kafka and EventHub Kafka
enableKafkaSample=y

Просмотреть файл

@ -185,6 +185,12 @@ function Get-Tokens {
$tokens.Add('kafkaName', $kafkaName)
$tokens.Add('kafkaEventHubNamespaceName', $kafkaEventHubNamespaceName)
$tokens.Add('clientSecretPrefix', $clientSecretPrefix)
$keyvaultPrefix = 'keyvault'
if ($useDatabricks -eq 'y') {
$keyvaultPrefix = 'secretscope'
}
$tokens.Add('keyvaultPrefix', $keyvaultPrefix)
$tokens
}