Deleted Azure DW Load Scripts

2021-09-12 18:24:09 +00:00 · 2021-09-12 18:24:09 +00:00 · 741deee3e4
--- a/Files/CTASLoad-Example.sql
+++ b/Files/CTASLoad-Example.sql
@ -1,63 +0,0 @@
-/***This Artifact belongs to the Data SQL Ninja Engineering Team***/
-- STEP 1: Create a master key. Only necessary if one does not already exist.
-CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'MyUltraSecurePassword!12345!'; 
-GO
-
-- STEP 2: Create a database scoped credential
-- Azure Data Lake Credential
-CREATE DATABASE SCOPED CREDENTIAL AzureCredential
-WITH IDENTITY = '<AAD AppID>@https://login.microsoftonline.com/<subscriptionid>/oauth2/token',
-     SECRET = '<secret key>';
-
-/* Blob Storage Credential
-CREATE DATABASE SCOPED CREDENTIAL AzureCredential 
-WITH IDENTITY = 'SHARED ACCESS SIGNATURE', 
-	 SECRET = 'your key here';
-*/
-
-- STEP 3: Create an external data source - type HADOOP for ADLS
-CREATE EXTERNAL DATA SOURCE AzureStorage
-WITH (TYPE = HADOOP, LOCATION = 'adl://<adls name>.azuredatalakestore.net', CREDENTIAL = AzureCredential);
-GO
-
-/* Blob Storage Data Source - wabs syntax with Hadoop type
-CREATE EXTERNAL DATA SOURCE AzureStorage
-WITH (TYPE = HADOOP, LOCATION = 'wasbs://container@storageacct.blob.core.windows.net',
-      CREDENTIAL = AzureCredential);
-*/	
-
-- STEP 4: Create an external file format 
-CREATE EXTERNAL FILE FORMAT TextFileFormat 
-WITH	(FORMAT_TYPE = DELIMITEDTEXT, FORMAT_OPTIONS
-			(FIELD_TERMINATOR = '0x01', STRING_DELIMITER = '"', -- DATE_FORMAT = 'yyyy-MM-dd HH:mm:ss.fff', 
-			USE_TYPE_DEFAULT = FALSE), 
-			DATA_COMPRESSION = 'org.apache.hadoop.io.compress.GzipCodec'
-		);
-GO
-
-- STEP 5: Create external table pointing to blob storage files
-CREATE EXTERNAL TABLE [ext_ACCOUNT_FACT]
-(
-   [ACCT_PK_ID] bigint NOT NULL,
-   [PERSON_PK_ID] bigint NOT NULL,
-   [SALES_PERSON_PK_ID] int NOT NULL,
-   [BATCH_ID] bigint  NULL,
-   [START_TMSP] datetime  NULL,
-   [END_TMSP] datetime  NULL,
-   [ACCT_NAME] varchar(50)  NULL,
-   [ACCT_FLAG] varchar(2)  NULL,
-   [ACCT_STATUS] varchar(24)  NULL,
-   [ACCT_STATUS_CHG_DATE] datetime  NULL,
-   [ACCT_TYPE_CODE] varchar(30)  NULL
-)
-WITH ( LOCATION='/data/test/', DATA_SOURCE = AzureStorage, FILE_FORMAT = TextFileFormat, REJECT_TYPE = VALUE, REJECT_VALUE = 0 );
-GO
-
-- STEP 6: Create Table As Select (CTAS) operation - invokes Polybase to pull information out of one or more text files in ADLS into DW tables
-- note you need to split the input text files to take advantage of parallel load on the compute nodes
-CREATE TABLE [POC_DM].[ACCOUNT_FACT]
-WITH (DISTRIBUTION = HASH([ACCT_PK_ID])) 
-AS SELECT * FROM ext_ACCOUNT_FACT
-OPTION (LABEL = 'CTAS : Load ACCOUNT_FACT');
-GO
-
--- a/Files/GenerateLoadScript-V1.sql
+++ b/Files/GenerateLoadScript-V1.sql
@ -1,165 +0,0 @@
-/***This Artifact belongs to the Data SQL Ninja Engineering Team***/
-declare @sourceschema varchar(128) = 'SRC_POC_DM'
-declare @targetschema varchar(128) = 'TARG_POC_DM'
-declare @blobstore varchar(100) = '<account>.blob.core.windows.net'
-
-set nocount on
-
-- ensure your data warehouse has a master key
-- CREATE MASTER KEY;
-
-- Use your blob storage key to provide SQL DW access to blob storage
-if not exists(select * from [sys].[database_credentials] where [name]='AzureStorageCredential')
-	CREATE DATABASE SCOPED CREDENTIAL AzureStorageCredential WITH IDENTITY = 'SHARED ACCESS SIGNATURE', SECRET = 'your key here...=='
-
-- Create the file format definition
-if not exists(select * from [sys].[external_file_formats] where [name]='TextFileFormat')
-	CREATE EXTERNAL FILE FORMAT TextFileFormat WITH (FORMAT_TYPE = DELIMITEDTEXT, FORMAT_OPTIONS (FIELD_TERMINATOR = '0x01', --STRING_DELIMITER = '', 
-																							   USE_TYPE_DEFAULT = FALSE), DATA_COMPRESSION = 'org.apache.hadoop.io.compress.GzipCodec')
-
-declare @objectid int, @table varchar(128), @colid int, @column varchar(128), @type varchar(128), @length smallint, @precision tinyint, @scale tinyint, @nullable bit, @colstr varchar(150)
-declare @cmd varchar(max), @distcol varchar(128), @disttype varchar(50)
-declare @trows int = 0, @trow int = 1, @crows int = 0, @crow int = 1, @start datetime, @tblrows bigint
-
-- Ensure target schema exists
-if not exists(select * from sys.schemas where name = @targetschema)
- begin
-	select @cmd = 'CREATE SCHEMA ' + @targetschema
-	exec(@cmd)
- end
- 
-- Check external table schema exists
-if not exists(select * from sys.schemas where name = 'ASB')
- 	exec('CREATE SCHEMA ASB')
-
-- cleanup of any previous failed run
-IF OBJECT_ID('tempdb..#tables') IS NOT NULL
-	DROP TABLE #tables
-IF OBJECT_ID('tempdb..#columns') IS NOT NULL
-	DROP TABLE #columns
-
-create table #tables
-(
-	rowid int not null,
-	objectid int not null,
-	[table] varchar(128) not null
-) 
-WITH ( HEAP , DISTRIBUTION = ROUND_ROBIN )
-
-create table #columns
-(
-	colid int,
-	[column] varchar(128),
-	[type] varchar(128),
-	[length] smallint,
-	[precision] tinyint, 
-	[scale] tinyint,
-	[nullable] bit
-)
-WITH ( HEAP , DISTRIBUTION = ROUND_ROBIN )
-
-- Set up to process all tables in the defined source schema
-insert into #tables
-select row_number() over (order by tb.name), object_id, tb.name 
-from sys.tables tb join sys.schemas s on (tb.schema_id=s.schema_id) 
-where s.name = @sourceschema
-
-select @trows = count(*) from #tables
-
--select * from #tables
-
-- initial cleanup of any previous run - if an external table still exists, you will have to drop it first
-if exists(select * from sys.external_data_sources where name='AzureStorage')
-	drop external data source AzureStorage
-
-select @objectid=objectid, @table=[table] from #tables where rowid=@trow
-
-while (@trow <= @trows)
- begin
-	select @start = getdate()	-- save start time
-	print '---------------------- ' + @targetschema + '.' + @table + ' ----------------------'
-
-	-- create the external data source
-	select @cmd = 'CREATE EXTERNAL DATA SOURCE AzureStorage WITH (TYPE = HADOOP, LOCATION = ''wasbs://' + replace(lower(@table), '_', '-') + '@' + @blobstore + ''', CREDENTIAL = AzureStorageCredential);'
-	print @cmd
-	print ''
-	exec(@cmd)
-
-	-- clear all rows in columns temp table (for previous table)
-	truncate table #columns
-
-	-- get all the column definitions for the target table
-	insert into #columns
-	select c.column_id, c.[name], t.[name], c.max_length, c.[precision], c.scale, c.is_nullable 
-	from sys.columns c 
-		join sys.types t on (c.user_type_id=t.user_type_id) 
-	where object_id = @objectid 
-	order by c.column_id
-	
-	-- build external table definition
-	select @cmd = 'CREATE EXTERNAL TABLE [ASB].[' + @table + '] (' 
-
-	-- process each column for the target table
-	select @crows = count(*) from #columns
-	select @crow = 1
-	select @colid = colid, @column = [column], @type = [type], @length = [length], @precision = [precision], @scale = [scale], @nullable = [nullable] from #columns where colid=@crow
-	while (@crow <= @crows)
-	 begin
-		if (@colid <> 1) select @cmd = @cmd + ', '
-		select @cmd = @cmd + '[' + @column + '] ' + @type
-		if @type in ('char', 'varchar', 'nchar', 'nvarchar') 
-			select @cmd = @cmd + '(' + case when @length < 0 then 'max' else cast(@length as varchar(4)) end + ')'
-		else if @type in ('numeric', 'decimal', 'real', 'float')
-			select @cmd = @cmd + '(' + cast(@precision as varchar(3)) + case when @type in ('decimal', 'numeric') then ', ' + cast(@scale as varchar(3)) else '' end + ')' 
-		else if @type = 'datetime2'
-			select @cmd = @cmd + '(' + cast(@scale as varchar(3))  + ')'
-		select @cmd = @cmd + case when @nullable then ' NULL' else ' NOT NULL' end
-		select @crow = @crow + 1
-		select @colid = colid, @column = [column], @type = [type], @length = [length], @precision = [precision], @scale = [scale], @nullable = [nullable] from #columns where colid=@crow
-	 end
-	select @cmd = @cmd + ') WITH ( LOCATION=''./'', DATA_SOURCE = AzureStorage, FILE_FORMAT = TextFileFormat, REJECT_TYPE = VALUE, REJECT_VALUE = 0 );'
-	declare @i int = 1
-	while (@i < len(@cmd))
-	 begin
-		print substring(@cmd, @i, 1000)		-- statements can exceed the capacity of a single print
-		select @i = @i + 1000
-	 end
-	print ''	
-	exec(@cmd)
-
-	-- get the distribution mechanism and column for the target table
-	select @distcol='', @disttype=distribution_policy_desc from sys.pdw_table_distribution_properties where object_id=@objectid
-	select @distcol=c.[name] from sys.pdw_column_distribution_properties d join sys.columns c on (d.object_id=c.object_id and d.column_id=c.column_id) where d.[object_id]=@objectid and distribution_ordinal=1
-
-	-- remove target table if it already exists
-	if exists(select * from sys.tables t join sys.schemas s on (t.schema_id=s.schema_id) where s.[name] = @targetschema and t.[name]=@table)
-	 begin
-		select @cmd = 'DROP TABLE [' + @targetschema + '].[' + @table + ']'
-		exec(@cmd)
-	 end
-
-	-- build simple CTAS statement (since column type and nullability done in external table)
-	select @cmd = 'CREATE TABLE [' + @targetschema + '].[' + @table + '] WITH (DISTRIBUTION = '+ case when ISNULL(@disttype, '') = '' then 'HEAP' else @disttype end + case when @distcol != '' then '([' + @distcol + '])' else '' end +
-							 ') AS SELECT  * FROM [ASB].[' + @table + '] OPTION (LABEL = ''CTAS : Load [' + @targetschema + '].[' + @table + ']'');'
-	print @cmd
-	print ''	
-	exec(@cmd)
-
-	-- Cleanup external objects
-	select @cmd = 'DROP EXTERNAL TABLE [ASB].[' + @table + ']'
-	print @cmd
-	exec(@cmd)
-	
-	print 'DROP EXTERNAL DATA SOURCE AzureStorage'
-	DROP EXTERNAL DATA SOURCE AzureStorage
-
-	-- Output row count and elapsed load time for the current table
-	select @cmd = 'select COUNT_BIG(*) as ''Rows in [' + @targetschema + '].[' + @table + ']'', ' + cast(datediff(s, @start, getdate())/60.0 as varchar(40)) + ' as ''Minutes to Load'' from [' + @targetschema + '].[' + @table + ']'
-	exec(@cmd)
-
-	-- Increment to the next table
-	select @trow = @trow + 1
-	select @objectid=objectid, @table=[table] from #tables where rowid=@trow
- end
-drop table #tables
-drop table #columns
--- a/Files/GenerateLoadScript-V2.sql
+++ b/Files/GenerateLoadScript-V2.sql
@ -1,195 +0,0 @@
-/***This Artifact belongs to the Data SQL Ninja Engineering Team***/
-declare @sourceschema varchar(128) = 'SRC_POC_DM'
-declare @targetschema varchar(128) = 'TARG_POC_DM'
-declare @blobstore varchar(100) = '<account>.blob.core.windows.net'
-
-set nocount on
-
-- ensure your data warehouse has a master key
-- CREATE MASTER KEY;
-
-- Use your blob storage key to provide SQL DW access to blob storage
-if not exists(select * from [sys].[database_credentials] where [name]='AzureStorageCredential')
-	CREATE DATABASE SCOPED CREDENTIAL AzureStorageCredential WITH IDENTITY = 'SHARED ACCESS SIGNATURE', SECRET = 'your key here...=='
-
-- Create the file format definition
-if not exists(select * from [sys].[external_file_formats] where [name]='TextFileFormat')
-	CREATE EXTERNAL FILE FORMAT TextFileFormat WITH (FORMAT_TYPE = DELIMITEDTEXT, FORMAT_OPTIONS (FIELD_TERMINATOR = '0x01', --STRING_DELIMITER = '', 
-																							   USE_TYPE_DEFAULT = FALSE), DATA_COMPRESSION = 'org.apache.hadoop.io.compress.GzipCodec')
-
-declare @objectid int, @table varchar(128), @colid int, @column varchar(128), @type varchar(128), @length smallint, @precision tinyint, @scale tinyint, @nullable bit, @colstr varchar(150)
-declare @cmd varchar(max), @distcol varchar(128), @disttype varchar(50)
-declare @trows int = 0, @trow int = 1, @crows int = 0, @crow int = 1, @start datetime, @tblrows bigint
-
-- Ensure target schema exists
-if not exists(select * from sys.schemas where name = @targetschema)
- begin
-	select @cmd = 'CREATE SCHEMA ' + @targetschema
-	exec(@cmd)
- end
-- Check external table schema exists
-if not exists(select * from sys.schemas where name = 'ASB')
- 	exec('CREATE SCHEMA ASB')
-
-- cleanup of any previous failed run
-IF OBJECT_ID('tempdb..#tables') IS NOT NULL
-	DROP TABLE #tables
-IF OBJECT_ID('tempdb..#columns') IS NOT NULL
-	DROP TABLE #columns
-
-create table #tables
-(
-	rowid int not null,
-	objectid int not null,
-	[table] varchar(128) not null
-) 
-WITH ( HEAP , DISTRIBUTION = ROUND_ROBIN )
-
-create table #columns
-(
-	colid int,
-	[column] varchar(128),
-	[type] varchar(128),
-	[length] smallint,
-	[precision] tinyint, 
-	[scale] tinyint,
-	[nullable] bit
-)
-WITH ( HEAP , DISTRIBUTION = ROUND_ROBIN )
-
-- Set up to process all tables in the defined source schema
-insert into #tables
-select row_number() over (order by tb.name), object_id, tb.name 
-from sys.tables tb join sys.schemas s on (tb.schema_id=s.schema_id) 
-where s.name = @sourceschema
-
-select @trows = count(*) from #tables
-
--select * from #tables
-
-- initial cleanup of any previous run - if an external table still exists, you will have to drop it first
-if exists(select * from sys.external_data_sources where name='AzureStorage')
-	drop external data source AzureStorage
-
-select @objectid=objectid, @table=[table] from #tables where rowid=@trow
-
-while (@trow <= @trows)
- begin
-	select @start = getdate()	-- save start time
-	print '---------------------- ' + @targetschema + '.' + @table + ' ----------------------'
-
-	-- create the external data source
-	select @cmd = 'CREATE EXTERNAL DATA SOURCE AzureStorage WITH (TYPE = HADOOP, LOCATION = ''wasbs://' + replace(lower(@table), '_', '-') + '@' + @blobstore + ''', CREDENTIAL = AzureStorageCredential);'
-	print @cmd
-	print ''
-	exec(@cmd)
-
-	-- clear all rows in columns temp table (for previous table)
-	truncate table #columns
-
-	-- get all the column definitions for the target table
-	insert into #columns
-	select c.column_id, c.[name], t.[name], c.max_length, c.[precision], c.scale, c.is_nullable 
-	from sys.columns c 
-		join sys.types t on (c.user_type_id=t.user_type_id) 
-	where object_id = @objectid 
-	order by c.column_id
-	
-	-- build external table definition
-	select @cmd = 'CREATE EXTERNAL TABLE [ASB].[' + @table + '] (' 
-
-	-- process each column for the target table
-	select @crows = count(*) from #columns
-	select @crow = 1
-	select @colid = colid, @column = [column], @type = [type], @length = [length], @precision = [precision], @scale = [scale], @nullable = [nullable] from #columns where colid=@crow
-	while (@crow <= @crows)
-	 begin
-		if (@colid <> 1) select @cmd = @cmd + ','
-		select @cmd = @cmd + '[' + @column + '] ' + case when @type in ('nvarchar', 'nchar') then 'nvarchar' else 'varchar' end + '(' + 
-															case when @type in ('decimal', 'numeric', 'bigint', 'real', 'float', 'money') then '35'		
-																 when @type in ('int', 'smallmoney') then '14' 
-																 when @type in ('bit', 'tinyint', 'smallint') then  '6'
-																 when @type in ('char', 'varchar', 'nchar', 'nvarchar', 'binary', 'varbinary') then case when @length = -1 then 'MAX' when @length < 6 then '10' when @length > 3980 and left(@type,1)='n' then '4000' when @length > 7980 then '8000' else cast(@length+20 as varchar(5)) end -- handle null and add quotes (& embedded quotes)
-																 when @type = 'uniqueidentifier' then '38'
-																 else '50' end + ') NULL'		-- dates and times @ 50 - image, text, xml, hierarchy and spatial data types not supported on DW
-		select @crow = @crow + 1
-		select @colid = colid, @column = [column], @type = [type], @length = [length], @precision = [precision], @scale = [scale], @nullable = [nullable] from #columns where colid=@crow
-	 end
-	select @cmd = @cmd + ') WITH ( LOCATION=''./'', DATA_SOURCE = AzureStorage, FILE_FORMAT = TextFileFormat, REJECT_TYPE = VALUE, REJECT_VALUE = 0 );'
-	declare @i int = 1
-	while (@i < len(@cmd))
-	 begin
-		print substring(@cmd, @i, 1000)		-- statements can exceed the capacity of a single print
-		select @i = @i + 1000
-	 end
-	print ''	
-	exec(@cmd)
-
-	-- get the distribution mechanism and column for the target table
-	select @distcol='', @disttype=distribution_policy_desc from sys.pdw_table_distribution_properties where object_id=@objectid
-	select @distcol=c.[name] from sys.pdw_column_distribution_properties d join sys.columns c on (d.object_id=c.object_id and d.column_id=c.column_id) where d.[object_id]=@objectid and distribution_ordinal=1
-
-	-- remove target table if it already exists
-	if exists(select * from sys.tables t join sys.schemas s on (t.schema_id=s.schema_id) where s.[name] = @targetschema and t.[name]=@table)
-	 begin
-		select @cmd = 'DROP TABLE [' + @targetschema + '].[' + @table + ']'
-		exec(@cmd)
-	 end
-
-	-- build CTAS statement - looping through all of the columns to do a cast to the appropriate data type
-	select @cmd = 'CREATE TABLE [' + @targetschema + '].[' + @table + '] WITH (DISTRIBUTION = '+ case when ISNULL(@disttype, '') = '' then 'HEAP' else @disttype end + case when @distcol != '' then '([' + @distcol + '])' else '' end + ') AS SELECT '
-	select @crow = 1
-	select @colid = colid, @column = [column], @type = [type], @length = [length], @precision = [precision], @scale = [scale], @nullable = [nullable] from #columns where colid=@crow
-	while (@crow <= @crows)
-	 begin
-		select @colstr = 'substring(['+@column+'], 2, LEN(['+@column+'])-2)'	-- remove lead and tail quotes
-		if (@colid <> 1) select @cmd = @cmd + ', '
-		if (@nullable = 0)														-- if this column is not nullable we have to give SQL DW the hint to make it NOT NULL - in theory we should error if there is a text value of null in the field, but that is just more code...
-			select @cmd = @cmd + 'ISNULL(('
-		if @type in ('char', 'varchar', 'nchar', 'nvarchar')  -- remove escaped quotes and replace special line end characters with line feed
-			select @cmd = @cmd + 'case when [' + @column + '] = ''"null"'' then null else cast(replace(replace(replace(' + @colstr + ', ''\"'', ''"''), char(31), char(10)), char(30), char(13)) as '+ @type +'('+ case when @length=-1 then 'max' else cast(@length as varchar(10)) end +')) end' + case when @nullable=0 then '), '''')' else '' end
-		else if @type in ('numeric', 'decimal')
-			select @cmd = @cmd + 'case when [' + @column + '] = ''"null"'' then null else cast(' + @colstr + ' as ' + @type + '(' + cast(@precision as varchar(3)) + ',' + cast(@scale as varchar(3)) + ')) end' + case when @nullable=0 then '), 0.)' else '' end
-		else if @type in ('bigint', 'real', 'float', 'money', 'int', 'smallmoney', 'bit', 'tinyint', 'smallint')
-			select @cmd = @cmd + 'case when [' + @column + '] = ''"null"'' then null else cast(' + @colstr + ' as ' + @type + ') end' + case when @nullable=0 then '), 0)' else '' end
-		else if @type in ('datetime', 'smalldatetime', 'date')
-			select @cmd = @cmd + 'case when [' + @column + '] = ''"null"'' then null when left(['+@column+'], 5) < ''"1753'' then cast(''1753-01-01 00:00:00'' as ' + @type + ') else cast(substring(' + @colstr + ', 1, (case when CHARINDEX(''.'', ' + @column + ') != 0 then CHARINDEX(''.'', ' + @column + ') else len(' + @column + ') end)-2) as ' + @type + ') end' + case when @nullable=0 then '), cast(''1753-01-01 00:00:00'' as ' + @type + '))' else '' end
-		else if @type = 'date'
-			select @cmd = @cmd + 'case when [' + @column + '] = ''"null"'' then null when left(['+@column+'], 5) < ''"0001'' then cast(''0001-01-01'' as date) else cast(' + @colstr + ' as date) end' + case when @nullable=0 then '), cast(''0001-01-01'' as date))' else '' end
-		else if @type = 'datetime2'
-			select @cmd = @cmd + 'case when [' + @column + '] = ''"null"'' then null when left(['+@column+'], 5) < ''"0001'' then cast(''0001-01-01 00:00:00'' as datetime2(' + cast(@scale as varchar(3)) + ')) else cast(' + @colstr + ' as datetime2(' +  cast(@scale as varchar(3)) + ')) end' + case when @nullable=0 then '), cast(''0001-01-01 00:00:00'' as datetime2(' + @precision + ')))' else '' end
-		else if @type = 'uniqueidentifier'
-			select @cmd = @cmd + 'case when [' + @column + '] = ''"null"'' then null else cast(' + @colstr + ' as ' + @type + ') end' + case when @nullable=0 then '), cast(''00000000-0000-0000-0000-000000000000'' as uniqueidentifier))' else '' end
-		else -- not sure any data types are left - if so, you need a null value for 'not null' instead of the 0
-			select @cmd = @cmd + 'case when [' + @column + '] = ''"null"'' then null else cast(' + @colstr + ' as ' + @type + ') end' + case when @nullable=0 then '), 0)' else '' end
-		select @cmd = @cmd + ' as ''' + @column + ''''		-- add column name
-		select @crow = @crow + 1
-		select @colid = colid, @column = [column], @type = [type], @length = [length], @precision = [precision], @scale = [scale], @nullable = [nullable] from #columns where colid=@crow
-	 end
-	select @cmd = @cmd + ' FROM [ASB].[' + @table + '] OPTION (LABEL = ''CTAS : Load [' + @targetschema + '].[' + @table + ']'');'
-	select @i = 1
-	while (@i < len(@cmd))
-	 begin
-		print substring(@cmd, @i, 1000)
-		select @i = @i + 1000
-	 end
-	print ''	
-	exec(@cmd)
-
-	-- Cleanup external objects
-	select @cmd = 'DROP EXTERNAL TABLE [ASB].[' + @table + ']'
-	print @cmd
-	exec(@cmd)
-	print 'DROP EXTERNAL DATA SOURCE AzureStorage'
-	DROP EXTERNAL DATA SOURCE AzureStorage
-
-	-- Output row count and elapsed load time for the current table
-	select @cmd = 'select COUNT_BIG(*) as ''Rows in [' + @targetschema + '].[' + @table + ']'', ' + cast(datediff(s, @start, getdate())/60.0 as varchar(40)) + ' as ''Minutes to Load'' from [' + @targetschema + '].[' + @table + ']'
-	exec(@cmd)
-
-	-- Increment to the next table
-	select @trow = @trow + 1
-	select @objectid=objectid, @table=[table] from #tables where rowid=@trow
- end
-drop table #tables
-drop table #columns
--- a/Files/GenerateLoadScript-V3-ContainerSubfolders.sql
+++ b/Files/GenerateLoadScript-V3-ContainerSubfolders.sql
--- a/Files/README.md
+++ b/Files/README.md
@ -1,33 +0,0 @@
-# /***This Artifact belongs to the Data SQL Ninja Engineering Team***/
-# Auto Generate Azure SQL DW Load – TSQL Scripts
-
-Script Author: Mitch van Huuksloot, Solution Architect, Data SQL Ninja Engineering Team
-
-These scripts were developed to help with a large Azure SQL DW POC with one of the DMJ customers. Some of the assumptions made in the scripts are very specific to the scenario encountered at the customer. Feel free to adapt these scripts to your data loading scenario.
-
-Each of the scripts creates the appropriate objects to allow an optimal performance DW load from text files in blob storage into Azure SQL DW using Polybase. You can easily change the external data source to use Azure Data Lake as a source as well. The optimal nature of the load will depend on the number of files provided. Our customer had many gzip’d files that distributed well among several DW nodes (few larger gzip’d files would not have done as well).
-
-From a workflow perspective, we used SSMA for Oracle to generate the DW schema and then manually tweaked the data types in the script (the customer used the “number” data type without scale or precision indiscriminately). The SSMA default conversion of number to float(53) needed to be adjusted (given you are converting from precise to approximate). Part of the manual schema editing process was also deciding on distribution hash keys, which was an initial guess, since we didn’t have a list of typical queries. If we had no idea what to hash on, we left the tables round robin. For dimension tables, this would also be an appropriate time to make them replicated tables. We did not have row counts or sizes at that point, so we couldn’t make that determination (the guidance is a maximum size of 2 GB). 
-
-Before running the schema script on the DW, we put all the tables into “source” schemas (SRC_XXX). The “source” schema is the template that the script uses to create the final tables in the target schema (including column types and distribution – note that no other indexes are created, beyond the default CCI index). Once a table is processed, just remove it from the source schema. We did not add this step to the script just in case there were errors that didn’t stop the script, but the drop could be easily added. 
-
-In the POC, we had 72 tables split across 4 schemas, so we could load each schema separately with the script (note; to load multiple schemas in parallel with this script would require you to give the AzureStorage object a unique name for each copy of the script).
-
-During the POC, we had two locations where the customer put the text files to be loaded in Azure Blob Storage. Initially, they put the files in their own container under the same storage account. Many of the tables in their schema had underscore characters in the name, but the container names in blob storage had the table names with dashes instead of underscores (so the script has a replace to change this automatically). Later during the POC, the customer started moving newly extracted tables to a single container with a subfolder naming scheme (note; in blob store there aren’t actually subfolders, just a naming trick). There are therefore two different script versions, one for each case. In the first case we need to create a separate external data source for each container (we couldn’t get the root container syntax to work), while in the second case we can use the same external data source for every table and specify the subfolder in the external table creation command.
-
-The V1 script generates a set of “classic” load statements, where the external table definition has the appropriate types expected in each column. The CTAS statement then is just a simple “create table … select *…”.  Also included is a single table CTAS load script that documents every step required to load one or more text files into a table in Azure SQL DW.
-
-During the POC, we started with the V1 script, but quickly determined that the script failed to load many of the customer tables for a variety of reasons, including; every column even numeric ones were quoted (which Polybase might have handled, except null values were specified as “null”), timestamps had too much precision for Polybase, embedded escaped quotes (\”) were not handled by Polybase, embedded new line characters caused Polybase to error etc.
-
-A strategy was developed to address some of the loading issues, which in some cases required the files to be re-extracted from the source system (hence the change during the POC of where files were stored). The customer originally had selected an ASCII 01 (SOH) character as the record separator. This seemed like a good choice, since, being an infrequently used control character, it should not appear in any of the text fields. The hope was that Polybase would not have an issue with it, and there wasn’t once we specified the value in the script in hex. 
-
-Given the relatively exotic column separator, there really wasn’t a need for quotes around strings, but they were already in the generated files. We ended up having to remove the quote specification from the external file format definition, because the embedded quotes in columns were causing Polybase errors. In the process we switched to reading everything from the text files as character columns (or Unicode, if required) to reduce the number of Polybase issues we were seeing. Note that since we are reading everything into character columns and there may be embedded quotes in the strings, the character column lengths used are arbitrarily longer than the actual source columns length (we settled on adding 20 characters to character columns’ length).
-
-Instead of staging the raw file in a temp table, we came up with a strategy to cast the columns in the CTAS statement, to the appropriate data types (later we discovered the trick to add nullability). The complexity of the CTAS statement grew over time to handle many issues that we ran into. For some of the tables, the statements were longer than a print command could handle (so it is printed in multiple lines, with no concept of an appropriate line break location). 
-
-One issue that was very problematic was newlines embedded in column text. We had the customer regenerate the files using an ascii 31 (RS) for an ascii 10 (LF) and an ascii 30 (US) for an ascii 13 (CR) and we added code to handle the replacement. They initially missed some columns with both CR and LF as newlines, but eventually they got them all. One table had a SOH character that showed up in one of the columns, which was very problematic to hunt down – the error was a string truncation but in column 85, but the problem column was actually 32. We had the customer regenerate the files for this table with a different record separator of an ascii 28 (FS) – this was a temporary script change that was not shared in this package but would be easy to replicate.
-
-If the script fails, you will need to manually remove the external table definition created (drop external table…). We could remove these tables automatically in the script but have no idea if there are multiple scripts running in parallel.
-
-
-
--- a/Scripts/MICROSOFT
+++ b/Scripts/MICROSOFT
--- a/Scripts/READ
+++ b/Scripts/READ
--- a/Scripts/readme.md
+++ b/Scripts/readme.md
@ -1 +0,0 @@
-/***This Artifact belongs to the Data SQL Ninja Engineering Team***/
				`@ -1 +0,0 @@`
				`/*This Artifact belongs to the Data SQL Ninja Engineering Team*/`