add hash check for downloaded files

This commit is contained in:
Eqbal Zaffar 2017-06-15 15:33:00 -07:00
Родитель ce23961214
Коммит aa298291e1
12 изменённых файлов: 39 добавлений и 17 удалений

Просмотреть файл

@ -48,6 +48,7 @@ $dataSize = ""
$scriptPath = Get-Location
$filePath = $scriptPath.Path+ "\"
$dataFilePath = $dataPath + "\"
##########################################################################
# Script level variables
@ -147,11 +148,11 @@ if ($uninterrupted -eq 'y' -or $uninterrupted -eq 'Y')
# upload csv files into SQL tables
foreach ($dataFile in $dataList)
{
$destination = $dataPath + $dataFile + $table_suffix + ".csv"
$error_file = $dataPath + $dataFile + $table_suffix + ".error"
Write-Host -ForeGroundColor 'magenta'(" Populate SQL table: {0}..." -f $dataFile)
$destination = $dataFilePath + $dataFile + $table_suffix + ".csv"
$error_file = $dataFilePath + $dataFile + $table_suffix + ".error"
Write-Host -ForeGroundColor 'magenta'(" Populate SQL table: {0}... from {1}" -f $dataFile, $destination)
$tableName = $DBName + ".dbo." + $dataFile + $table_suffix
$tableSchema = $dataPath + $dataFile + $table_suffix + ".xml"
$tableSchema = $dataFilePath + $dataFile + $table_suffix + ".xml"
bcp $tableName format nul -c -x -f $tableSchema -U $username -S $ServerName -P $password -t ','
Write-Host -ForeGroundColor 'magenta'(" Loading {0} to SQL table..." -f $dataFile)
bcp $tableName in $destination -t ',' -S $ServerName -f $tableSchema -F 2 -C "RAW" -b 100000 -U $username -P $password -e $error_file
@ -236,11 +237,11 @@ if ($ans -eq 'y' -or $ans -eq 'Y')
# upload csv files into SQL tables
foreach ($dataFile in $dataList)
{
$destination = $dataPath + $dataFile + $table_suffix + ".csv"
$error_file = $dataPath + $dataFile + $table_suffix + ".error"
Write-Host -ForeGroundColor 'magenta'(" Populate SQL table: {0}..." -f $dataFile)
$destination = $dataFilePath + $dataFile + $table_suffix + ".csv"
$error_file = $dataFilePath + $dataFile + $table_suffix + ".error"
Write-Host -ForeGroundColor 'magenta'(" Populate SQL table: {0} from {1}..." -f $dataFile, $destination)
$tableName = $DBName + ".dbo." + $dataFile + $table_suffix
$tableSchema = $dataPath + $dataFile + $table_suffix + ".xml"
$tableSchema = $dataFilePath + $dataFile + $table_suffix + ".xml"
bcp $tableName format nul -c -x -f $tableSchema -U $username -S $ServerName -P $password -t ','
Write-Host -ForeGroundColor 'magenta'(" Loading {0} to SQL table..." -f $dataFile)
bcp $tableName in $destination -t ',' -S $ServerName -f $tableSchema -F 2 -C "RAW" -b 100000 -U $username -P $password -e $error_file

Просмотреть файл

@ -10,6 +10,7 @@
################################################################################################
param([string]$serverName,[string]$baseurl,[string]$username,[string]$password)
$originalLocation = Get-Location
# This is the directory for the data/code download
$solutionTemplateSetupDir = "LoanChargeOffSolution"
$solutionTemplateSetupPath = "D:\" + $solutionTemplateSetupDir
@ -27,27 +28,45 @@ cd $dataDirPath
$helpShortCutFile = "LoanChargeOffHelp.url"
# List of files to be downloaded
$dataList = "loan_info_10k.csv", "member_info_10k.csv", "payments_info_10k.csv", "loan_info_100k.csv", "member_info_100k.csv", "payments_info_100k.csv", "loan_info_1m.csv", "member_info_1m.csv", "payments_info_1m.csv"
$dataList = "loan_info_10k", "member_info_10k", "payments_info_10k", "loan_info_100k", "member_info_100k", "payments_info_100k", "loan_info_1m", "member_info_1m", "payments_info_1m"
$dataExtn = ".csv"
$hashExtn = ".hash"
foreach ($dataFile in $dataList)
{
$down = $baseurl + '/' + $dataFile
$down = $baseurl + '/' + $dataFile + $dataExtn
Write-Host $down
Start-BitsTransfer -Source $down
}
# making sure that the data files conform to windows style of line ending.
foreach ($dataFile in $dataList)
{
unix2dos $dataFile
}
#checkout setup scripts/code from github
cd $solutionTemplateSetupPath
Remove-Item $checkoutDir -Force -Recurse
git clone -n https://github.com/Microsoft/r-server-loan-chargeoff $checkoutDir
cd $checkoutDir
git config core.sparsecheckout true
echo "/*`r`n!HDI" | out-file -encoding ascii .git/info/sparse-checkout
git checkout master
$sqlsolutionCodePath = $solutionTemplateSetupPath + "\" + $checkoutDir + "\SQL"
cd $sqlsolutionCodePath
# make sure the hashes match for data files
foreach ($dataFile in $dataList)
{
$dataFileHash = $dataDirPath + "\" + $dataFile + $dataExtn | Get-Hash -Algorithm SHA512
$storedHash = $dataFile + $hashExtn | Get-Content
if ($dataFileHash.Hash -ne $storedHash)
{
Write-Host -ForeGroundColor 'Red' "Data file has been corrupted. Please try again."
throw
}
}
# making sure that the data files conform to windows style of line ending.
foreach ($dataFile in $dataList)
{
unix2dos $dataFile + $dataExtn
}
# Start the script for DB creation. Due to privilege issues with SYSTEM user (the user that runs the
# extension script), we use ps-remoting to login as admin use and run the DB creation scripts
@ -57,9 +76,10 @@ $command1 = "runDB.ps1"
$command2 ="setupHelp.ps1"
Enable-PSRemoting -Force
Invoke-Command -Credential $credential -ComputerName $serverName -FilePath $command1 -ArgumentList $dataDirPath
Invoke-Command -Credential $credential -ComputerName $serverName -FilePath $command1 -ArgumentList $dataDirPath, $sqlsolutionCodePath
Invoke-Command -Credential $credential -ComputerName $serverName -FilePath $command2 -ArgumentList $helpShortCutFile
Disable-PSRemoting -Force
cd $originalLocation.Path
Stop-Transcript

Двоичные данные
SQL/loan_info_100k.hash Normal file

Двоичный файл не отображается.

Двоичные данные
SQL/loan_info_10k.hash Normal file

Двоичный файл не отображается.

Двоичные данные
SQL/loan_info_1m.hash Normal file

Двоичный файл не отображается.

Двоичные данные
SQL/member_info_100k.hash Normal file

Двоичный файл не отображается.

Двоичные данные
SQL/member_info_10k.hash Normal file

Двоичный файл не отображается.

Двоичные данные
SQL/member_info_1m.hash Normal file

Двоичный файл не отображается.

Двоичные данные
SQL/payments_info_100k.hash Normal file

Двоичный файл не отображается.

Двоичные данные
SQL/payments_info_10k.hash Normal file

Двоичный файл не отображается.

Двоичные данные
SQL/payments_info_1m.hash Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -12,7 +12,8 @@
# datadir - directory where raw csv data has been downloaded
# datasize - size of the data to train on (10k, 100k, 1m)
##############################################################################################
Param([string]$dbuser, [string]$dbpass, [bool]$createuser = $true, [string]$datadir, [ValidateSet("10k", "100k", "1m")][string]$datasize=10k)
Param([string]$datadir, [string]$scriptdir, [string]$dbuser, [string]$dbpass, [bool]$createuser = $true, [ValidateSet("10k", "100k", "1m")][string]$datasize="10k")
cd $scriptdir
# Function to generate a temporary password for SQL Server
Function Get-TempPassword()
{