From 5a6debf9dcb41f5ab4bff62c984ea79a0aff0d79 Mon Sep 17 00:00:00 2001 From: Heba Elayoty <31887807+helayoty@users.noreply.github.com> Date: Thu, 30 Jul 2020 21:05:36 -0700 Subject: [PATCH] feat: Add Azure Data Factory module (#397) * Add ADF module * address code review comments * go lint fix * format fix * upgrade azurerm to 2.9 --- .../providers/azure/data-factory/README.md | 78 ++++++ .../providers/azure/data-factory/datasets.tf | 8 + .../azure/data-factory/linkedservices.tf | 8 + .../providers/azure/data-factory/main.tf | 51 ++++ .../providers/azure/data-factory/output.tf | 50 ++++ .../data-factory/terraform.tfvars.template | 12 + .../data-factory/tests/.env.testing.template | 3 + .../data_factory_integration_test.go | 41 +++ .../tests/integration/datafactory_tests.go | 82 ++++++ .../azure/data-factory/tests/test.tfvars | 13 + .../azure/data-factory/tests/tf_options.go | 16 ++ .../tests/unit/data_factory_unit_test.go | 81 ++++++ .../providers/azure/data-factory/variables.tf | 101 +++++++ .../providers/azure/data-factory/versions.tf | 8 + .../modules/azure/datafactory.go | 253 ++++++++++++++++++ 15 files changed, 805 insertions(+) create mode 100644 infra/modules/providers/azure/data-factory/README.md create mode 100644 infra/modules/providers/azure/data-factory/datasets.tf create mode 100644 infra/modules/providers/azure/data-factory/linkedservices.tf create mode 100644 infra/modules/providers/azure/data-factory/main.tf create mode 100644 infra/modules/providers/azure/data-factory/output.tf create mode 100644 infra/modules/providers/azure/data-factory/terraform.tfvars.template create mode 100644 infra/modules/providers/azure/data-factory/tests/.env.testing.template create mode 100644 infra/modules/providers/azure/data-factory/tests/integration/data_factory_integration_test.go create mode 100644 infra/modules/providers/azure/data-factory/tests/integration/datafactory_tests.go create mode 100644 infra/modules/providers/azure/data-factory/tests/test.tfvars create mode 100644 infra/modules/providers/azure/data-factory/tests/tf_options.go create mode 100644 infra/modules/providers/azure/data-factory/tests/unit/data_factory_unit_test.go create mode 100644 infra/modules/providers/azure/data-factory/variables.tf create mode 100644 infra/modules/providers/azure/data-factory/versions.tf create mode 100644 test-harness/terratest-extensions/modules/azure/datafactory.go diff --git a/infra/modules/providers/azure/data-factory/README.md b/infra/modules/providers/azure/data-factory/README.md new file mode 100644 index 0000000..fd47441 --- /dev/null +++ b/infra/modules/providers/azure/data-factory/README.md @@ -0,0 +1,78 @@ +# Data Factory + +This Terrafom based data-factory module grants templates the ability to create Data factory instance along with the main components. + +## _More on Data Factory_ + +Azure Data Factory is the cloud-based ETL and data integration service that allows you to create data-driven workflows for orchestrating data movement and transforming data at scale. Using Azure Data Factory, you can create and schedule data-driven workflows (called pipelines) that can ingest data from disparate data stores. You can build complex ETL processes that transform data visually with data flows or by using compute services such as Azure HDInsight Hadoop, Azure Databricks, and Azure SQL Database. + +Additionally, you can publish your transformed data to data stores such as Azure SQL Data Warehouse for business intelligence (BI) applications to consume. Ultimately, through Azure Data Factory, raw data can be organized into meaningful data stores and data lakes for better business decisions. + +For more information, Please check Microsoft Azure Data Factory [Documentation](https://docs.microsoft.com/en-us/azure/data-factory/introduction). + +## Characteristics + +An instance of the `data-factory` module deploys the _**Data Factory**_ in order to provide templates with the following: + +- Ability to provision a single Data Factory instance +- Ability to provision a configurable Pipeline +- Ability to configure Trigger +- Ability to configure SQL server Dataset +- Ability to configure SQL server Linked Service + +## Out Of Scope + +The following are not support in the time being + +- Creating Multiple pipelines +- Only SQL server Dataset/Linked Service are implemented. + +## Definition + +Terraform resources used to define the `data-factory` module include the following: + +- [azurerm_data_factory](https://www.terraform.io/docs/providers/azurerm/r/data_factory.html) +- [azurerm_data_factory_integration_runtime_managed](https://www.terraform.io/docs/providers/azurerm/r/data_factory_integration_runtime_managed.html) +- [azurerm_data_factory_pipeline](https://www.terraform.io/docs/providers/azurerm/r/data_factory_pipeline.html) +- [azurerm_data_factory_trigger_schedule](https://www.terraform.io/docs/providers/azurerm/r/data_factory_trigger_schedule.html) +- [azurerm_data_factory_dataset_sql_server](https://www.terraform.io/docs/providers/azurerm/r/data_factory_dataset_sql_server_table.html) +- [azurerm_data_factory_linked_service_sql_server](https://www.terraform.io/docs/providers/azurerm/r/data_factory_linked_service_sql_server.html) + +## Usage + +Data Factory usage example: + +``` yaml +module "data_factory" { + source = "../../modules/providers/azure/data-factory" + data_factory_name = "adf" + resource_group_name = "rg" + data_factory_runtime_name = "adfrt" + node_size = "Standard_D2_v3" + number_of_nodes = 1 + edition = "Standard" + max_parallel_executions_per_node = 1 + vnet_integration = { + vnet_id = "/subscriptions/resourceGroups/providers/Microsoft.Network/virtualNetworks/testvnet" + subnet_name = "default" + } + data_factory_pipeline_name = "adfpipeline" + data_factory_trigger_name = "adftrigger" + data_factory_trigger_interval = 1 + data_factory_trigger_frequency = "Minute" + data_factory_dataset_sql_name = "adfsqldataset" + data_factory_dataset_sql_table_name = "adfsqldatasettable" + data_factory_dataset_sql_folder = "" + data_factory_linked_sql_name = "adfsqllinked" + data_factory_linked_sql_connection_string = "Server=tcp:adfsql..." +} +``` + +## Outputs + +The output values for this module are available in [output.tf](output.tf) + + +## Argument Reference + +Supported arguments for this module are available in [variables.tf](variables.tf) \ No newline at end of file diff --git a/infra/modules/providers/azure/data-factory/datasets.tf b/infra/modules/providers/azure/data-factory/datasets.tf new file mode 100644 index 0000000..8133ddb --- /dev/null +++ b/infra/modules/providers/azure/data-factory/datasets.tf @@ -0,0 +1,8 @@ + +resource "azurerm_data_factory_dataset_sql_server_table" "main" { + name = var.data_factory_dataset_sql_name + resource_group_name = data.azurerm_resource_group.main.name + data_factory_name = azurerm_data_factory.main.name + linked_service_name = azurerm_data_factory_linked_service_sql_server.main.name + table_name = var.data_factory_dataset_sql_table_name +} \ No newline at end of file diff --git a/infra/modules/providers/azure/data-factory/linkedservices.tf b/infra/modules/providers/azure/data-factory/linkedservices.tf new file mode 100644 index 0000000..1c4d5c6 --- /dev/null +++ b/infra/modules/providers/azure/data-factory/linkedservices.tf @@ -0,0 +1,8 @@ + +resource "azurerm_data_factory_linked_service_sql_server" "main" { + name = var.data_factory_linked_sql_name + resource_group_name = data.azurerm_resource_group.main.name + data_factory_name = azurerm_data_factory.main.name + connection_string = var.data_factory_linked_sql_connection_string + integration_runtime_name = azurerm_data_factory_integration_runtime_managed.main.name +} diff --git a/infra/modules/providers/azure/data-factory/main.tf b/infra/modules/providers/azure/data-factory/main.tf new file mode 100644 index 0000000..c80976b --- /dev/null +++ b/infra/modules/providers/azure/data-factory/main.tf @@ -0,0 +1,51 @@ +module "azure-provider" { + source = "../provider" +} + +data "azurerm_resource_group" "main" { + name = var.resource_group_name +} + +resource "azurerm_data_factory" "main" { + #required + name = var.data_factory_name + resource_group_name = data.azurerm_resource_group.main.name + location = data.azurerm_resource_group.main.location + + # This will be static as "SystemAssigned" is the only identity available now + identity { + type = "SystemAssigned" + } +} + +resource "azurerm_data_factory_integration_runtime_managed" "main" { + name = var.data_factory_runtime_name + data_factory_name = azurerm_data_factory.main.name + resource_group_name = data.azurerm_resource_group.main.name + location = data.azurerm_resource_group.main.location + node_size = var.node_size + number_of_nodes = var.number_of_nodes + edition = var.edition + max_parallel_executions_per_node = var.max_parallel_executions_per_node + + vnet_integration { + vnet_id = var.vnet_integration.vnet_id + subnet_name = var.vnet_integration.subnet_name + } +} + +resource "azurerm_data_factory_pipeline" "main" { + name = var.data_factory_pipeline_name + resource_group_name = data.azurerm_resource_group.main.name + data_factory_name = azurerm_data_factory.main.name +} + +resource "azurerm_data_factory_trigger_schedule" "main" { + name = var.data_factory_trigger_name + data_factory_name = azurerm_data_factory.main.name + resource_group_name = data.azurerm_resource_group.main.name + pipeline_name = azurerm_data_factory_pipeline.main.name + + interval = var.data_factory_trigger_interval + frequency = var.data_factory_trigger_frequency +} diff --git a/infra/modules/providers/azure/data-factory/output.tf b/infra/modules/providers/azure/data-factory/output.tf new file mode 100644 index 0000000..41183a8 --- /dev/null +++ b/infra/modules/providers/azure/data-factory/output.tf @@ -0,0 +1,50 @@ +output "resource_group_name" { + description = "The resource group name of the Service Bus namespace." + value = data.azurerm_resource_group.main.name +} + +output "data_factory_name" { + description = "The name of the Azure Data Factory created" + value = azurerm_data_factory.main.name +} + +output "data_factory_id" { + description = "The ID of the Azure Data Factory created" + value = azurerm_data_factory.main.id +} + +output "identity_principal_id" { + description = "The ID of the principal(client) in Azure active directory" + value = azurerm_data_factory.main.identity[0].principal_id +} + +output "pipeline_name" { + description = "the name of the pipeline created" + value = azurerm_data_factory_pipeline.main.name +} + +output "trigger_interval" { + description = "the trigger interval time for the pipeline created" + value = azurerm_data_factory_trigger_schedule.main.interval +} + +output "sql_dataset_id" { + description = "The ID of the SQL server dataset created" + value = azurerm_data_factory_dataset_sql_server_table.main.id +} + +output "sql_linked_service_id" { + description = "The ID of the SQL server Linked service created" + value = azurerm_data_factory_linked_service_sql_server.main.id +} + +output "adf_identity_principal_id" { + description = "The ID of the principal(client) in Azure active directory" + value = azurerm_data_factory.main.identity[0].principal_id +} + +output "adf_identity_tenant_id" { + description = "The Tenant ID for the Service Principal associated with the Managed Service Identity of this App Service." + value = azurerm_data_factory.main.identity[0].tenant_id +} + diff --git a/infra/modules/providers/azure/data-factory/terraform.tfvars.template b/infra/modules/providers/azure/data-factory/terraform.tfvars.template new file mode 100644 index 0000000..8404829 --- /dev/null +++ b/infra/modules/providers/azure/data-factory/terraform.tfvars.template @@ -0,0 +1,12 @@ +resource_group_name = "" +data_factory_name = "" +data_factory_runtime_name = "" +data_factory_pipeline_name = "" +data_factory_dataset_sql_name = "" +data_factory_dataset_sql_table_name = "" +data_factory_linked_sql_name = "" +data_factory_linked_sql_connection_string = "" +vnet_integration = { + vnet_id = "" + subnet_name = "" +} diff --git a/infra/modules/providers/azure/data-factory/tests/.env.testing.template b/infra/modules/providers/azure/data-factory/tests/.env.testing.template new file mode 100644 index 0000000..b0b2823 --- /dev/null +++ b/infra/modules/providers/azure/data-factory/tests/.env.testing.template @@ -0,0 +1,3 @@ +RESOURCE_GROUP_NAME="..." +STORAGE_ACCOUNT_NAME="..." +CONTAINER_NAME="..." diff --git a/infra/modules/providers/azure/data-factory/tests/integration/data_factory_integration_test.go b/infra/modules/providers/azure/data-factory/tests/integration/data_factory_integration_test.go new file mode 100644 index 0000000..66c11b3 --- /dev/null +++ b/infra/modules/providers/azure/data-factory/tests/integration/data_factory_integration_test.go @@ -0,0 +1,41 @@ +package integraton + +import ( + "os" + "testing" + + "github.com/microsoft/cobalt/infra/modules/providers/azure/data-factory/tests" + "github.com/microsoft/terratest-abstraction/integration" +) + +var subscription = os.Getenv("ARM_SUBSCRIPTION_ID") + +func TestDataFactory(t *testing.T) { + testFixture := integration.IntegrationTestFixture{ + GoTest: t, + TfOptions: tests.DataFactoryTFOptions, + ExpectedTfOutputCount: 8, + TfOutputAssertions: []integration.TerraformOutputValidation{ + VerifyCreatedDataFactory(subscription, + "resource_group_name", + "data_factory_name", + ), + VerifyCreatedPipeline(subscription, + "resource_group_name", + "data_factory_name", + "pipeline_name", + ), + VerifyCreatedDataset(subscription, + "resource_group_name", + "data_factory_name", + "sql_dataset_id", + ), + VerifyCreatedLinkedService(subscription, + "resource_group_name", + "data_factory_name", + "sql_linked_service_id", + ), + }, + } + integration.RunIntegrationTests(&testFixture) +} diff --git a/infra/modules/providers/azure/data-factory/tests/integration/datafactory_tests.go b/infra/modules/providers/azure/data-factory/tests/integration/datafactory_tests.go new file mode 100644 index 0000000..373d431 --- /dev/null +++ b/infra/modules/providers/azure/data-factory/tests/integration/datafactory_tests.go @@ -0,0 +1,82 @@ +package integraton + +import ( + "testing" + + "github.com/microsoft/cobalt/test-harness/terratest-extensions/modules/azure" + "github.com/microsoft/terratest-abstraction/integration" + "github.com/stretchr/testify/require" +) + +// healthCheck - Asserts that the deployment was successful. +func healthCheck(t *testing.T, provisionState *string) { + require.Equal(t, "Succeeded", *provisionState, "The deployment hasn't succeeded.") +} + +// VerifyCreatedDataFactory - validate the created data factory +func VerifyCreatedDataFactory(subscriptionID, resourceGroupOutputName, dataFactoryOutputName string) func(goTest *testing.T, output integration.TerraformOutput) { + return func(goTest *testing.T, output integration.TerraformOutput) { + + dataFactory := output[dataFactoryOutputName].(string) + resourceGroup := output[resourceGroupOutputName].(string) + + dataFactoryNameFromAzure := azure.GetDataFactoryNameByResourceGroup( + goTest, + subscriptionID, + resourceGroup) + + require.Equal(goTest, dataFactoryNameFromAzure, dataFactory, "The data factory does not exist") + } +} + +// VerifyCreatedPipeline - validate the pipeline name for the created data factory +func VerifyCreatedPipeline(subscriptionID, resourceGroupOutputName, dataFactoryOutputName, pipelineOutputName string) func(goTest *testing.T, output integration.TerraformOutput) { + return func(goTest *testing.T, output integration.TerraformOutput) { + pipelineNameFromOutput := output[pipelineOutputName].(string) + + dataFactory := output[dataFactoryOutputName].(string) + resourceGroup := output[resourceGroupOutputName].(string) + + pipelineNameFromAzure := azure.GetPipeLineNameByDataFactory( + goTest, + subscriptionID, + resourceGroup, + dataFactory) + + require.Equal(goTest, pipelineNameFromAzure, pipelineNameFromOutput, "The pipeline does not exist in the data factory") + } +} + +// VerifyCreatedDataset - validate the SQL dataset for the created pipeline +func VerifyCreatedDataset(subscriptionID, resourceGroupOutputName, dataFactoryOutputName, datasetOutputID string) func(goTest *testing.T, output integration.TerraformOutput) { + return func(goTest *testing.T, output integration.TerraformOutput) { + datasetIDFromOutput := output[datasetOutputID].(string) + + dataFactory := output[dataFactoryOutputName].(string) + resourceGroup := output[resourceGroupOutputName].(string) + + datasetIDFromAzure := azure.ListDatasetIDByDataFactory(goTest, + subscriptionID, + resourceGroup, + dataFactory) + + require.Contains(goTest, *datasetIDFromAzure, datasetIDFromOutput, "The dataset does not exist") + } +} + +// VerifyCreatedLinkedService - validate the SQL dataset for the created pipeline +func VerifyCreatedLinkedService(subscriptionID, resourceGroupOutputName, dataFactoryOutputName, linkedServiceIDOutputName string) func(goTest *testing.T, output integration.TerraformOutput) { + return func(goTest *testing.T, output integration.TerraformOutput) { + linkedServiceIDFromOutput := output[linkedServiceIDOutputName].(string) + + dataFactory := output[dataFactoryOutputName].(string) + resourceGroup := output[resourceGroupOutputName].(string) + + linkedServiceIDFromAzure := azure.ListLinkedServicesIDByDataFactory(goTest, + subscriptionID, + resourceGroup, + dataFactory) + + require.Contains(goTest, *linkedServiceIDFromAzure, linkedServiceIDFromOutput, "The Linked Servicee does not exist") + } +} diff --git a/infra/modules/providers/azure/data-factory/tests/test.tfvars b/infra/modules/providers/azure/data-factory/tests/test.tfvars new file mode 100644 index 0000000..e216681 --- /dev/null +++ b/infra/modules/providers/azure/data-factory/tests/test.tfvars @@ -0,0 +1,13 @@ +resource_group_name = "adftest" +data_factory_name = "adftest" +data_factory_runtime_name = "adfrttest" +data_factory_pipeline_name = "testpipeline" +data_factory_trigger_name = "testtrigger" +data_factory_dataset_sql_name = "testsql" +data_factory_dataset_sql_table_name = "adfsqltableheba" +data_factory_linked_sql_name = "testlinkedsql" +data_factory_linked_sql_connection_string = "connectionstring" +vnet_integration = { + vnet_id = "/subscriptions/resourceGroups/providers/Microsoft.Network/virtualNetworks/testvnet" + subnet_name = "default" +} diff --git a/infra/modules/providers/azure/data-factory/tests/tf_options.go b/infra/modules/providers/azure/data-factory/tests/tf_options.go new file mode 100644 index 0000000..809aea1 --- /dev/null +++ b/infra/modules/providers/azure/data-factory/tests/tf_options.go @@ -0,0 +1,16 @@ +package tests + +import ( + "os" + + "github.com/gruntwork-io/terratest/modules/terraform" +) + +// ResourceGroupName - The Resource Group Name +var ResourceGroupName = os.Getenv("RESOURCE_GROUP_NAME") + +// DataFactoryTFOptions common terraform options used for unit and integration testing +var DataFactoryTFOptions = &terraform.Options{ + TerraformDir: "../../", + VarFiles: []string{"./tests/test.tfvars"}, +} diff --git a/infra/modules/providers/azure/data-factory/tests/unit/data_factory_unit_test.go b/infra/modules/providers/azure/data-factory/tests/unit/data_factory_unit_test.go new file mode 100644 index 0000000..c6dd0a0 --- /dev/null +++ b/infra/modules/providers/azure/data-factory/tests/unit/data_factory_unit_test.go @@ -0,0 +1,81 @@ +package unit + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/gruntwork-io/terratest/modules/random" + tests "github.com/microsoft/cobalt/infra/modules/providers/azure/data-factory/tests" + "github.com/microsoft/terratest-abstraction/unit" +) + +// helper function to parse blocks of JSON into a generic Go map +func asMap(t *testing.T, jsonString string) map[string]interface{} { + var theMap map[string]interface{} + if err := json.Unmarshal([]byte(jsonString), &theMap); err != nil { + t.Fatal(err) + } + return theMap +} + +func TestTemplate(t *testing.T) { + + expectedDataFactory := map[string]interface{}{ + "name": "adftest", + "resource_group_name": tests.ResourceGroupName, + "identity": []interface{}{ + map[string]interface{}{"type": "SystemAssigned"}, + }, + } + + expectedDFIntRunTime := map[string]interface{}{ + "name": "adfrttest", + "node_size": "Standard_D2_v3", + "number_of_nodes": 1.0, + "edition": "Standard", + "max_parallel_executions_per_node": 1.0, + "vnet_integration": []interface{}{ + map[string]interface{}{ + "vnet_id": "/subscriptions/resourceGroups/providers/Microsoft.Network/virtualNetworks/testvnet", + "subnet_name": "default", + }, + }, + } + + expectedPipeline := map[string]interface{}{ + "name": "testpipeline", + } + + expectedTrigger := map[string]interface{}{ + "name": "testtrigger", + "interval": 1.0, + "frequency": "Minute", + } + + expectedDatasetSQL := map[string]interface{}{ + "name": "testsql", + } + + expectedLinkedSQL := map[string]interface{}{ + "name": "testlinkedsql", + "connection_string": "connectionstring", + } + + testFixture := unit.UnitTestFixture{ + GoTest: t, + TfOptions: tests.DataFactoryTFOptions, + PlanAssertions: nil, + ExpectedResourceCount: 6, + ExpectedResourceAttributeValues: unit.ResourceDescription{ + "azurerm_data_factory.main": expectedDataFactory, + "azurerm_data_factory_integration_runtime_managed.main": expectedDFIntRunTime, + "azurerm_data_factory_pipeline.main": expectedPipeline, + "azurerm_data_factory_trigger_schedule.main": expectedTrigger, + "azurerm_data_factory_dataset_sql_server_table.main": expectedDatasetSQL, + "azurerm_data_factory_linked_service_sql_server.main": expectedLinkedSQL, + }, + } + + unit.RunUnitTests(&testFixture) +} diff --git a/infra/modules/providers/azure/data-factory/variables.tf b/infra/modules/providers/azure/data-factory/variables.tf new file mode 100644 index 0000000..96ab067 --- /dev/null +++ b/infra/modules/providers/azure/data-factory/variables.tf @@ -0,0 +1,101 @@ +variable "resource_group_name" { + description = "(The name of the resource group in which to create the Managed Integration Runtime. Changing this forces a new resource to be created." + type = string +} + +variable "data_factory_name" { + description = "Specifies the name of the Data Factory the Managed Integration Runtime belongs to. Changing this forces a new resource to be created." + type = string +} + +variable "data_factory_runtime_name" { + description = "Specifies the name of the Managed Integration Runtime. Changing this forces a new resource to be created. Must be globally unique." + type = string + default = "" +} + +variable "node_size" { + description = "The size of the nodes on which the Managed Integration Runtime runs. Valid values are: Standard_D2_v3" + type = string + default = "Standard_D2_v3" +} + +variable "number_of_nodes" { + description = "Number of nodes for the Managed Integration Runtime. Max is 10. Defaults to 1." + type = number + default = 1 +} + +variable "edition" { + description = "The Managed Integration Runtime edition. Valid values are Standard and Enterprise. Defaults to Standard." + type = string + default = "Standard" +} + +variable "max_parallel_executions_per_node" { + description = "Defines the maximum parallel executions per node. Defaults to 1. Max is 16." + type = number + default = 1 +} + +variable "vnet_integration" { + + type = object({ + vnet_id = string #ID of the virtual network to which the nodes of the Managed Integration Runtime will be added + subnet_name = string #Name of the subnet to which the nodes of the Managed Integration Runtime will be added. + }) +} + +variable "data_factory_pipeline_name" { + description = "Specifies the name of the Data Factory Pipeline. Changing this forces a new resource to be created. Must be globally unique." + type = string + default = "" +} + +variable "data_factory_trigger_name" { + description = "Specifies the name of the Data Factory Schedule Trigger. Changing this forces a new resource to be created. Must be globally unique." + type = string + default = "" +} + +variable "data_factory_trigger_interval" { + description = "The interval for how often the trigger occurs. This defaults to 1." + type = number + default = 1 +} + +variable "data_factory_trigger_frequency" { + description = "The trigger freqency. Valid values include Minute, Hour, Day, Week, Month. Defaults to Minute." + type = string + default = "Minute" +} + +variable "data_factory_dataset_sql_name" { + description = "Specifies the name of the Data Factory Dataset SQL Server Table. Only letters, numbers and '_' are allowed." + type = string + default = "" +} + +variable "data_factory_dataset_sql_table_name" { + description = "The table name of the Data Factory Dataset SQL Server Table." + type = string + default = "" +} + +variable "data_factory_dataset_sql_folder" { + description = "The folder that this Dataset is in. If not specified, the Dataset will appear at the root level." + type = string + default = "" +} + +variable "data_factory_linked_sql_name" { + description = "Specifies the name of the Data Factory Linked Service SQL Server. Changing this forces a new resource to be created." + type = string + default = "" +} + +variable "data_factory_linked_sql_connection_string" { + description = "The connection string in which to authenticate with the SQL Server." + type = string + default = "" +} \ No newline at end of file diff --git a/infra/modules/providers/azure/data-factory/versions.tf b/infra/modules/providers/azure/data-factory/versions.tf new file mode 100644 index 0000000..284618f --- /dev/null +++ b/infra/modules/providers/azure/data-factory/versions.tf @@ -0,0 +1,8 @@ + +terraform { + required_version = ">= 0.12" +} +provider "azurerm" { + version = "~>2.9.0" + features {} +} \ No newline at end of file diff --git a/test-harness/terratest-extensions/modules/azure/datafactory.go b/test-harness/terratest-extensions/modules/azure/datafactory.go new file mode 100644 index 0000000..7299a2c --- /dev/null +++ b/test-harness/terratest-extensions/modules/azure/datafactory.go @@ -0,0 +1,253 @@ +package azure + +import ( + "context" + "testing" + + "github.com/Azure/azure-sdk-for-go/services/datafactory/mgmt/2018-06-01/datafactory" +) + +func dataFactoryClientE(subscriptionID string) (*datafactory.FactoriesClient, error) { + authorizer, err := DeploymentServicePrincipalAuthorizer() + if err != nil { + return nil, err + } + + client := datafactory.NewFactoriesClient(subscriptionID) + client.Authorizer = authorizer + return &client, nil +} + +func pipelineClientE(subscriptionID string) (*datafactory.PipelinesClient, error) { + authorizer, err := DeploymentServicePrincipalAuthorizer() + if err != nil { + return nil, err + } + + pClient := datafactory.NewPipelinesClient(subscriptionID) + pClient.Authorizer = authorizer + return &pClient, nil +} + +func datasetClientE(subscriptionID string) (*datafactory.DatasetsClient, error) { + authorizer, err := DeploymentServicePrincipalAuthorizer() + if err != nil { + return nil, err + } + + dsClient := datafactory.NewDatasetsClient(subscriptionID) + dsClient.Authorizer = authorizer + return &dsClient, nil +} + +func linkedServiceClientE(subscriptionID string) (*datafactory.LinkedServicesClient, error) { + authorizer, err := DeploymentServicePrincipalAuthorizer() + if err != nil { + return nil, err + } + + lsClient := datafactory.NewLinkedServicesClient(subscriptionID) + lsClient.Authorizer = authorizer + return &lsClient, nil +} + +func triggersClientE(subscriptionID string) (*datafactory.TriggersClient, error) { + authorizer, err := DeploymentServicePrincipalAuthorizer() + if err != nil { + return nil, err + } + + tClient := datafactory.NewTriggersClient(subscriptionID) + tClient.Authorizer = authorizer + return &tClient, nil +} + +// GetDataFactoryNameByResourceGroup - +func GetDataFactoryNameByResourceGroup(t *testing.T, subscriptionID, resourceGroupName string) string { + dfList, err := listDataFactoryByResourceGroupE(subscriptionID, resourceGroupName) + if err != nil { + t.Fatal(err) + } + + result := "" + for _, datafactory := range *dfList { + result = *datafactory.Name + break + } + return result +} + +func listDataFactoryByResourceGroupE(subscriptionID, resourceGroupName string) (*[]datafactory.Factory, error) { + dfClient, err := dataFactoryClientE(subscriptionID) + + if err != nil { + return nil, err + } + + iteratorDataFactory, err := dfClient.ListByResourceGroupComplete(context.Background(), resourceGroupName) + if err != nil { + return nil, err + } + + dataFactoryList := make([]datafactory.Factory, 0) + + for iteratorDataFactory.NotDone() { + dataFactoryList = append(dataFactoryList, iteratorDataFactory.Value()) + err = iteratorDataFactory.Next() + if err != nil { + return nil, err + } + } + return &dataFactoryList, err +} + +// ListPipelinesByDataFactory - list names of all pipelines in the given data factory +func ListPipelinesByDataFactory(t *testing.T, subscriptionID string, resourceGroupName string, dataFactoryName string) *[]datafactory.PipelineResource { + pResults, err := listPipelinesByDataFactoryE(subscriptionID, resourceGroupName, dataFactoryName) + + if err != nil { + t.Fatal(err) + } + + return pResults +} + +// GetPipeLineNameByDataFactory - +func GetPipeLineNameByDataFactory(t *testing.T, subscriptionID string, resourceGroupName string, dataFactoryName string) string { + pipelinesList := ListPipelinesByDataFactory(t, subscriptionID, resourceGroupName, dataFactoryName) + + result := "" + for _, pipeline := range *pipelinesList { + result = *pipeline.Name + break + } + return result +} + +func listPipelinesByDataFactoryE(subscriptionID string, resourceGroupName string, dataFactoryName string) (*[]datafactory.PipelineResource, error) { + pClient, err := pipelineClientE(subscriptionID) + if err != nil { + return nil, err + } + + iteratorPipelines, err := pClient.ListByFactoryComplete(context.Background(), resourceGroupName, dataFactoryName) + if err != nil { + return nil, err + } + + pipelinesList := make([]datafactory.PipelineResource, 0) + + for iteratorPipelines.NotDone() { + pipelinesList = append(pipelinesList, iteratorPipelines.Value()) + err = iteratorPipelines.Next() + if err != nil { + return nil, err + } + } + + return &pipelinesList, err +} + +// ListDatasetIDByDataFactory - +func ListDatasetIDByDataFactory(t *testing.T, subscriptionID string, resourceGroupName string, dataFactoryName string) *[]string { + dsList, err := listDatasetIDByDataFactoryE(subscriptionID, resourceGroupName, dataFactoryName) + if err != nil { + t.Fatal(err) + } + + return dsList +} + +func listDatasetIDByDataFactoryE(subscriptionID string, resourceGroupName string, dataFactoryName string) (*[]string, error) { + dsList, err := listDatasetByDataFactory(subscriptionID, resourceGroupName, dataFactoryName) + if err != nil { + return nil, err + } + + results := make([]string, 0) + for _, dataset := range *dsList { + results = append(results, *dataset.ID) + if err != nil { + return nil, err + } + } + + return &results, nil +} + +func listDatasetByDataFactory(subscriptionID string, resourceGroupName string, dataFactoryName string) (*[]datafactory.DatasetResource, error) { + + dsClient, err := datasetClientE(subscriptionID) + if err != nil { + return nil, err + } + + iteratorDataset, err := dsClient.ListByFactoryComplete(context.Background(), resourceGroupName, dataFactoryName) + if err != nil { + return nil, err + } + + dsList := make([]datafactory.DatasetResource, 0) + + for iteratorDataset.NotDone() { + dsList = append(dsList, iteratorDataset.Value()) + err = iteratorDataset.Next() + if err != nil { + return nil, err + } + } + + return &dsList, err +} + +// ListLinkedServicesIDByDataFactory - +func ListLinkedServicesIDByDataFactory(t *testing.T, subscriptionID string, resourceGroupName string, dataFactoryName string) *[]string { + lsList, err := listLinkedServicesIDByDataFactoryE(subscriptionID, resourceGroupName, dataFactoryName) + if err != nil { + t.Fatal(err) + } + + return lsList +} + +func listLinkedServicesIDByDataFactoryE(subscriptionID string, resourceGroupName string, dataFactoryName string) (*[]string, error) { + lsList, err := listLinkedServiceByDataFactory(subscriptionID, resourceGroupName, dataFactoryName) + if err != nil { + return nil, err + } + + results := make([]string, 0) + for _, linkedService := range *lsList { + results = append(results, *linkedService.ID) + if err != nil { + return nil, err + } + } + + return &results, nil +} + +func listLinkedServiceByDataFactory(subscriptionID string, resourceGroupName string, dataFactoryName string) (*[]datafactory.LinkedServiceResource, error) { + + lsClient, err := linkedServiceClientE(subscriptionID) + if err != nil { + return nil, err + } + + iteratorLinkedService, err := lsClient.ListByFactoryComplete(context.Background(), resourceGroupName, dataFactoryName) + if err != nil { + return nil, err + } + + lsList := make([]datafactory.LinkedServiceResource, 0) + + for iteratorLinkedService.NotDone() { + lsList = append(lsList, iteratorLinkedService.Value()) + err = iteratorLinkedService.Next() + if err != nil { + return nil, err + } + } + + return &lsList, err +}