From b8bd32802d1c3d84e3a9967a99e96b93035b17f0 Mon Sep 17 00:00:00 2001 From: Rohit Agrawal - MSFT Date: Wed, 20 Oct 2021 11:51:49 -0700 Subject: [PATCH] Fix regex and add support for custom ambari SQL db in HDI (#195) --- .../scala/datax/input/BlobPointerInput.scala | 2 +- .../Helpers/UtilityModule.psm1 | 13 ++++ .../Resources/Parameters/Spark-Parameter.json | 8 +++ .../Resources/Templates/Spark-Template.json | 66 ++++++++++++++++++- .../Deployment.Common/deployResources.ps1 | 8 ++- .../Deployment.DataX/common.parameters.txt | 5 ++ 6 files changed, 97 insertions(+), 5 deletions(-) diff --git a/DataProcessing/datax-host/src/main/scala/datax/input/BlobPointerInput.scala b/DataProcessing/datax-host/src/main/scala/datax/input/BlobPointerInput.scala index 9d462777..dc50c247 100644 --- a/DataProcessing/datax-host/src/main/scala/datax/input/BlobPointerInput.scala +++ b/DataProcessing/datax-host/src/main/scala/datax/input/BlobPointerInput.scala @@ -39,7 +39,7 @@ object BlobPointerInput { (new StructType).add("BlobPath", StringType) } - private lazy val saRegex = s"""wasbs?://[\w-]+@([\w\d]+)\${BlobProperties.BlobHostPath}/.*""".r + private lazy val saRegex = s"""wasbs?://[\\w-]+@([\\w\\d]+)${BlobProperties.BlobHostPath}/.*""".r private def extractSourceId(blobPath: String, regex: String): String = { val r = if(regex == null) saRegex else regex.r r.findFirstMatchIn(blobPath) match { diff --git a/DeploymentCloud/Deployment.Common/Helpers/UtilityModule.psm1 b/DeploymentCloud/Deployment.Common/Helpers/UtilityModule.psm1 index 5e31123a..3458ca0d 100644 --- a/DeploymentCloud/Deployment.Common/Helpers/UtilityModule.psm1 +++ b/DeploymentCloud/Deployment.Common/Helpers/UtilityModule.psm1 @@ -34,6 +34,10 @@ param( [Parameter(Mandatory=$True)] [string] $sparkPassword, + + [Parameter(Mandatory=$True)] + [string] + $sqlPassword, [Parameter(Mandatory=$True)] [string] @@ -141,6 +145,13 @@ else { $sparkPwd = $sparkPassword } +if (!$sqlPassword) { + $sqlPwd = Get-Password +} +else { + $sqlPwd = $sqlPassword +} + if (!$kafkaPassword) { $kafkaPwd = Get-Password } @@ -652,6 +663,7 @@ function Get-DefaultTokens { $tokens.Add('resourceGroup', $resourceGroupName ) $tokens.Add('sparkPwd', $sparkPwd ) + $tokens.Add('sqlPwd', $sqlPwd ) $tokens.Add('sparkSshPwd', $sparkSshPwd ) $tokens.Add('sfPwd', $sfPwd ) $tokens.Add('name', $name ) @@ -690,6 +702,7 @@ Export-ModuleMember -Variable "iotHubName" Export-ModuleMember -Variable "kafkaEventHubNamespaceName" Export-ModuleMember -Variable "kafkaName" Export-ModuleMember -Variable "sparkPwd" +Export-ModuleMember -Variable "sqlPwd" Export-ModuleMember -Variable "sparkSshPwd" Export-ModuleMember -Variable "kafkaPwd" Export-ModuleMember -Variable "kafkaSshPwd" diff --git a/DeploymentCloud/Deployment.Common/Resources/Parameters/Spark-Parameter.json b/DeploymentCloud/Deployment.Common/Resources/Parameters/Spark-Parameter.json index 28f8159b..e2b66899 100644 --- a/DeploymentCloud/Deployment.Common/Resources/Parameters/Spark-Parameter.json +++ b/DeploymentCloud/Deployment.Common/Resources/Parameters/Spark-Parameter.json @@ -24,6 +24,14 @@ "secretName": "sparkclusterloginpassword" } }, + "sqlServerLoginPassword": { + "reference": { + "keyVault": { + "id": "/subscriptions/$subscriptionId/resourceGroups/$resourceGroup/providers/Microsoft.KeyVault/vaults/$sparkRDPKVName" + }, + "secretName": "sqlServerLoginPassword" + } + }, "sparkClusterVersion": { "value": "$HDInsightVersion" }, diff --git a/DeploymentCloud/Deployment.Common/Resources/Templates/Spark-Template.json b/DeploymentCloud/Deployment.Common/Resources/Templates/Spark-Template.json index 0d3d59bd..8ee9899c 100644 --- a/DeploymentCloud/Deployment.Common/Resources/Templates/Spark-Template.json +++ b/DeploymentCloud/Deployment.Common/Resources/Templates/Spark-Template.json @@ -20,7 +20,14 @@ "sparkClusterLoginPassword": { "type": "securestring", "metadata": { - "description": "The password must be at least 10 characters in length and must contain at least one digit, one non-alphanumeric character, and one upper or lower case letter." } + "description": "The password must be at least 10 characters in length and must contain at least one digit, one non-alphanumeric character, and one upper or lower case letter." + } + }, + "sqlServerLoginPassword": { + "type": "securestring", + "metadata": { + "description": "SQL server password. Password cannot contain semicolon or equals special characters." + } }, "sparkClusterVersion": { "type": "string", @@ -94,7 +101,9 @@ "storageAccountsResourceID": "[resourceId('Microsoft.Storage/storageAccounts',parameters('storageAccounts_spark_name'))]", "userAssignedIdentitiesResourceID": "[resourceId('Microsoft.ManagedIdentity/userAssignedIdentities',parameters('userAssignedIdentitiesName'))]", "vnetID": "[resourceId('Microsoft.Network/virtualNetworks',parameters('virtualNetworkName'))]", - "subnet0Ref": "[concat(variables('vnetID'),'/subnets/',parameters('subnet0Name'))]" + "subnet0Ref": "[concat(variables('vnetID'),'/subnets/',parameters('subnet0Name'))]", + "sqlservername": "[concat(parameters('sparkClusterName'), 'dataxsql')]", + "sqldbname": "[concat('metastoredb')]" }, "resources": [ { @@ -104,13 +113,58 @@ "type": "Microsoft.ManagedIdentity/userAssignedIdentities", "properties": {} }, + { + "type": "Microsoft.Sql/servers", + "apiVersion": "2020-02-02-preview", + "name": "[variables('sqlservername')]", + "location": "[parameters('default_resource_location')]", + "properties": { + "administratorLogin": "[parameters('sparkClusterLoginUserName')]", + "administratorLoginPassword": "[parameters('sqlServerLoginPassword')]" + }, + "resources": [ + { + "name": "AllowAllWindowsAzureIps", + "type": "firewallrules", + "location": "[parameters('default_resource_location')]", + "apiVersion": "2020-08-01-preview", + "dependsOn": [ + "[resourceId('Microsoft.Sql/servers', variables('sqlservername'))]" + ], + "properties": { + "startIpAddress": "0.0.0.0", + "endIpAddress": "0.0.0.0" + } + }, + { + "type": "databases", + "apiVersion": "2020-08-01-preview", + "name": "[variables('sqldbname')]", + "location": "[parameters('default_resource_location')]", + "properties": { + "collation": "SQL_Latin1_General_CP1_CI_AS", + "zoneRedundant": false, + "readScale": "Disabled", + "readReplicaCount": 0 + }, + "sku": { + "name": "S2", + "tier": "Standard" + }, + "dependsOn": [ + "[resourceId('Microsoft.Sql/servers', variables('sqlservername'))]" + ] + } + ] + }, { "apiVersion": "2018-06-01-preview", "name": "[parameters('sparkClusterName')]", "type": "Microsoft.HDInsight/clusters", "location": "[parameters('default_resource_location')]", "dependsOn": [ - "[variables('userAssignedIdentitiesResourceID')]" + "[variables('userAssignedIdentitiesResourceID')]", + "[variables('sqldbname')]" ], "properties": { "clusterVersion": "[parameters('sparkClusterVersion')]", @@ -126,6 +180,12 @@ "restAuthCredential.isEnabled": true, "restAuthCredential.username": "[parameters('sparkClusterLoginUserName')]", "restAuthCredential.password": "[parameters('sparkClusterLoginPassword')]" + }, + "ambari-conf": { + "database-server": "[reference(resourceId('Microsoft.Sql/servers',concat(parameters('sparkClusterName'), 'dataxsql')), '2020-02-02-preview').fullyQualifiedDomainName]", + "database-name": "metastoredb", + "database-user-name": "[parameters('sparkClusterLoginUserName')]", + "database-user-password": "[parameters('sqlServerLoginPassword')]" } } }, diff --git a/DeploymentCloud/Deployment.Common/deployResources.ps1 b/DeploymentCloud/Deployment.Common/deployResources.ps1 index 784894b7..47fd7244 100644 --- a/DeploymentCloud/Deployment.Common/deployResources.ps1 +++ b/DeploymentCloud/Deployment.Common/deployResources.ps1 @@ -10,6 +10,9 @@ [string] $sparkPassword, + + [string] + $sqlPassword, [string] $sparkSshPassword, @@ -116,7 +119,7 @@ if ($generateNewSelfSignedCerts -eq 'n' -and !$certPassword -and $useCertFromKV Remove-Item -path ".\cachedVariables" -Force -ErrorAction SilentlyContinue $rootFolderPath = $PSScriptRoot -Import-Module "..\Deployment.Common\Helpers\UtilityModule" -ArgumentList $rootFolderPath, $resourceGroupName, $productName, $sparkClusterName, $randomizeProductName, $serviceFabricClusterName, $serviceAppName, $clientAppName, $sparkPassword, $sparkSshPassword, $sfPassword, $certPassword, $redisCacheSize, $useCertFromKV, $certKVName -WarningAction SilentlyContinue +Import-Module "..\Deployment.Common\Helpers\UtilityModule" -ArgumentList $rootFolderPath, $resourceGroupName, $productName, $sparkClusterName, $randomizeProductName, $serviceFabricClusterName, $serviceAppName, $clientAppName, $sparkPassword, $sqlPassword, $sparkSshPassword, $sfPassword, $certPassword, $redisCacheSize, $useCertFromKV, $certKVName -WarningAction SilentlyContinue Set-Content -Path ".\cachedVariables" -NoNewline -Value $name function Install-Modules { @@ -531,6 +534,9 @@ function Setup-SecretsForSpark { $secretName = "sparkclusterloginpassword" Setup-Secret -VaultName $vaultName -SecretName $secretName -Value $sparkPwd + + $secretName = "sqlServerLoginPassword" + Setup-Secret -VaultName $vaultName -SecretName $secretName -Value $sqlPwd $secretName = "sparksshuser" Setup-Secret -VaultName $vaultName -SecretName $secretName -Value $sparksshuser diff --git a/DeploymentCloud/Deployment.DataX/common.parameters.txt b/DeploymentCloud/Deployment.DataX/common.parameters.txt index f920d729..8af56685 100644 --- a/DeploymentCloud/Deployment.DataX/common.parameters.txt +++ b/DeploymentCloud/Deployment.DataX/common.parameters.txt @@ -110,6 +110,11 @@ writerRole=DataXWriter # If password is empty, a new password will be generated sparkPassword= +# SQL server password used for spark HDI ambari +# Password cannot have semicolon or equal spacial characters +# If password is empty, a new password will be generated +sqlPassword= + # Admin password for Spark kafkaPassword=