move scripts/code from ciqs to github

This commit is contained in:
Eqbal Zaffar 2017-06-15 12:49:50 -07:00
Родитель c2d705e4ea
Коммит 6f3b81f044
17 изменённых файлов: 1569 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,2 @@
[InternetShortcut]
URL=https://microsoft.github.io/r-server-campaign-optimization/CIG_Workflow.html

370
SQL/Loan_ChargeOff.ps1 Normal file
Просмотреть файл

@ -0,0 +1,370 @@
<#
.SYNOPSIS
Script to provide Loan ChargeOff predictions, using SQL & MRS.
.DESCRIPTION
This script will show the E2E work flow of loan chargeoff prediction machine learning
templates with Microsoft SQL Server 2016 and Microsoft R services.
For the detailed description, please read README.md.
#>
[CmdletBinding()]
param(
# SQL server address
[parameter(Mandatory=$true,ParameterSetName = "CM")]
[ValidateNotNullOrEmpty()]
[String]
$ServerName = "",
# SQL server database name
[parameter(Mandatory=$true,ParameterSetName = "CM")]
[ValidateNotNullOrEmpty()]
[String]
$DBName = "",
[parameter(Mandatory=$true,ParameterSetName = "CM")]
[ValidateNotNullOrEmpty()]
[String]
$username ="",
[parameter(Mandatory=$true,ParameterSetName = "CM")]
[ValidateNotNullOrEmpty()]
[String]
$password ="",
[parameter(Mandatory=$true,ParameterSetName = "CM")]
[ValidateNotNullOrEmpty()]
[String]
$uninterrupted="",
[parameter(Mandatory=$false,ParameterSetName = "CM")]
[ValidateNotNullOrEmpty()]
[String]
$dataPath = "",
[parameter(Mandatory=$false,ParameterSetName = "CM")]
[ValidateSet("l", "L", "s", "S", "")]
[String]
$dataSize = ""
)
$scriptPath = Get-Location
$filePath = $scriptPath.Path+ "\"
##########################################################################
# Script level variables
##########################################################################
$table_suffix = "_10k"
if ($dataSize -eq 'l' -or $dataSize -eq 'L')
{
$table_suffix = "_1m"
}
$trainingTable = "loan_chargeoff_train" + $table_suffix
$testTable = "loan_chargeoff_test" + $table_suffix
$evalScoreTable = "loan_chargeoff_eval_score" + $table_suffix
$scoreTable = "loan_chargeoff_score" + $table_suffix
$modelTable = "loan_chargeoff_models" + $table_suffix
$predictionTable = "loan_chargeoff_prediction" + $table_suffix
$selectedFeaturesTable = "selected_features" + $table_suffix
##########################################################################
# Function wrapper to invoke SQL command
##########################################################################
function ExecuteSQL
{
param(
[String]
$sqlscript
)
Invoke-Sqlcmd -ServerInstance $ServerName -Database $DBName -Username $username -Password $password -InputFile $sqlscript -QueryTimeout 200000
}
##########################################################################
# Function wrapper to invoke SQL query
##########################################################################
function ExecuteSQLQuery
{
param(
[String]
$sqlquery
)
Invoke-Sqlcmd -ServerInstance $ServerName -Database $DBName -Username $username -Password $password -Query $sqlquery -QueryTimeout 200000
}
##########################################################################
# Get connection string
##########################################################################
function GetConnectionString
{
$connectionString = "Driver=SQL Server;Server=$ServerName;Database=$DBName;UID=$username;PWD=$password"
$connectionString
}
$ServerName2="localhost"
function GetConnectionString2
{
$connectionString2 = "Driver=SQL Server;Server=$ServerName2;Database=$DBName;UID=$username;PWD=$password"
$connectionString2
}
##########################################################################
# Construct the SQL connection strings
##########################################################################
$connectionString = GetConnectionString
$connectionString2 = GetConnectionString2
##########################################################################
# Check if the SQL server or database exists
##########################################################################
$query = "IF NOT EXISTS(SELECT * FROM sys.databases WHERE NAME = '$DBName') CREATE DATABASE $DBName"
Invoke-Sqlcmd -ServerInstance $ServerName -Username $username -Password $password -Query $query -ErrorAction SilentlyContinue
if ($? -eq $false)
{
Write-Host -ForegroundColor Red "Failed the test to connect to SQL server: $ServerName database: $DBName !"
Write-Host -ForegroundColor Red "Please make sure: `n`t 1. SQL Server: $ServerName exists;
`n`t 2. SQL database: $DBName exists;
`n`t 3. SQL user: $username has the right credential for SQL server access."
exit
}
$query = "USE $DBName;"
Invoke-Sqlcmd -ServerInstance $ServerName -Username $username -Password $password -Query $query
##########################################################################
# Running without interruption
##########################################################################
$startTime= Get-Date
Write-Host "Start time is:" $startTime
if ($uninterrupted -eq 'y' -or $uninterrupted -eq 'Y')
{
try
{
# create training and test tables
Write-Host -ForeGroundColor 'green' ("Create SQL tables: member_info, loan_info, payments_info")
$script = $filePath + "step1_create_tables" + $table_suffix + ".sql"
ExecuteSQL $script
Write-Host -ForeGroundColor 'green' ("Populate SQL tables: member_info, loan_info, payments_info")
$dataList = "member_info", "loan_info", "payments_info"
# upload csv files into SQL tables
foreach ($dataFile in $dataList)
{
$destination = $dataPath + $dataFile + $table_suffix + ".csv"
$error_file = $dataPath + $dataFile + $table_suffix + ".error"
Write-Host -ForeGroundColor 'magenta'(" Populate SQL table: {0}..." -f $dataFile)
$tableName = $DBName + ".dbo." + $dataFile + $table_suffix
$tableSchema = $dataPath + $dataFile + $table_suffix + ".xml"
bcp $tableName format nul -c -x -f $tableSchema -U $username -S $ServerName -P $password -t ','
Write-Host -ForeGroundColor 'magenta'(" Loading {0} to SQL table..." -f $dataFile)
bcp $tableName in $destination -t ',' -S $ServerName -f $tableSchema -F 2 -C "RAW" -b 100000 -U $username -P $password -e $error_file
Write-Host -ForeGroundColor 'magenta'(" Done...Loading {0} to SQL table..." -f $dataFile)
}
# create the views for features and label with training, test and scoring split
Write-Host -ForeGroundColor 'magenta'(" Creating features label view and persisting...")
$script = $filepath + "step2_features_label_view" + $table_suffix + ".sql"
ExecuteSQL $script
Write-Host -ForeGroundColor 'magenta'(" Done creating features label view and persisting...")
# create the stored procedure for training
$script = $filepath + "step3_train_test_model.sql"
ExecuteSQL $script
Write-Host -ForeGroundColor 'magenta'(" Done creating training and eval stored proc...")
# execute the training
Write-Host -ForeGroundColor 'magenta'(" Starting training and evaluation of models...")
$modelNames = 'logistic_reg','fast_linear','fast_trees','fast_forest','neural_net'
foreach ($modelName in $modelNames)
{
Write-Host -ForeGroundColor 'Cyan' (" Training $modelName...")
$query = "EXEC train_model $trainingTable, $testTable, $evalScoreTable, $modelTable, $modelName, '$connectionString2'"
ExecuteSQLQuery $query
}
Write-Host -ForeGroundColor 'Cyan' (" Done with training and evaluation of models. Evaluation stats stored in $modelTable...")
# create the stored procedure for recommendations
$script = $filepath + "step4_chargeoff_batch_prediction.sql"
ExecuteSQL $script
Write-Host -ForeGroundColor 'magenta'(" Done creating batch scoring stored proc...")
#score on the data
Write-Host -ForeGroundColor 'Cyan' ("Scoring based on best performing model score table = $scoreTable, prediction table = $predictionTable...")
$scoring_query = "EXEC predict_chargeoff $scoreTable, $predictionTable, $modelTable, '$connectionString2'"
ExecuteSQLQuery $scoring_query
# create the stored procedure for recommendations
$script = $filepath + "step4a_chargeoff_ondemand_prediction.sql"
ExecuteSQL $script
Write-Host -ForeGroundColor 'magenta'(" Done creating on demand scoring stored proc [predict_chargeoff_ondemand]...")
}
catch
{
Write-Host -ForegroundColor DarkYellow "Exception in populating database tables:"
Write-Host -ForegroundColor Red $Error[0].Exception
throw
}
Write-Host -foregroundcolor 'green'("Loan ChargeOff Workflow Finished Successfully!")
}
if ($uninterrupted -eq 'n' -or $uninterrupted -eq 'N')
{
##########################################################################
# Create input tables and populate with data from csv files.
##########################################################################
Write-Host -foregroundcolor 'green' ("Step 0: Create and populate tables in Database" -f $dbname)
$ans = Read-Host 'Continue [y|Y], Exit [e|E], Skip [s|S]?'
if ($ans -eq 'E' -or $ans -eq 'e')
{
return
}
if ($ans -eq 'y' -or $ans -eq 'Y')
{
try
{
# create training and test tables
Write-Host -ForeGroundColor 'green' ("Create SQL tables: member_info, loan_info, payments_info")
$script = $filePath + "step1_create_tables" + $table_suffix + ".sql"
ExecuteSQL $script
Write-Host -ForeGroundColor 'green' ("Populate SQL tables: member_info, loan_info, payments_info")
$dataList = "member_info", "loan_info", "payments_info"
# upload csv files into SQL tables
foreach ($dataFile in $dataList)
{
$destination = $dataPath + $dataFile + $table_suffix + ".csv"
$error_file = $dataPath + $dataFile + $table_suffix + ".error"
Write-Host -ForeGroundColor 'magenta'(" Populate SQL table: {0}..." -f $dataFile)
$tableName = $DBName + ".dbo." + $dataFile + $table_suffix
$tableSchema = $dataPath + $dataFile + $table_suffix + ".xml"
bcp $tableName format nul -c -x -f $tableSchema -U $username -S $ServerName -P $password -t ','
Write-Host -ForeGroundColor 'magenta'(" Loading {0} to SQL table..." -f $dataFile)
bcp $tableName in $destination -t ',' -S $ServerName -f $tableSchema -F 2 -C "RAW" -b 100000 -U $username -P $password -e $error_file
Write-Host -ForeGroundColor 'magenta'(" Done...Loading {0} to SQL table..." -f $dataFile)
}
}
catch
{
Write-Host -ForegroundColor DarkYellow "Exception in populating database tables:"
Write-Host -ForegroundColor Red $Error[0].Exception
throw
}
}
##########################################################################
# Create and execute the scripts for data processing
##########################################################################
Write-Host -foregroundcolor 'green' ("Step 1: Data Processing/Create feature and label views and tables")
$ans = Read-Host 'Continue [y|Y], Exit [e|E], Skip [s|S]?'
if ($ans -eq 'E' -or $ans -eq 'e')
{
return
}
if ($ans -eq 'y' -or $ans -eq 'Y')
{
# create features, labels view
Write-Host -ForeGroundColor 'Cyan' (" Creating feature/label views...")
$script = $filepath + "step2_features_label_view" + $table_suffix + ".sql"
ExecuteSQL $script
}
##########################################################################
# Create and execute the stored procedure for feature selection (optional)
##########################################################################
Write-Host -foregroundcolor 'green' ("Step 2: Feature Engineering (for demo purpose only, training step does it's own feature selection)")
$ans = Read-Host 'Continue [y|Y], Exit [e|E], Skip [s|S]?'
if ($ans -eq 'E' -or $ans -eq 'e')
{
return
}
if ($ans -eq 'y' -or $ans -eq 'Y')
{
# create the stored procedure for feature engineering
$script = $filepath + "step2a_optional_feature_selection.sql"
ExecuteSQL $script
# execute the feature engineering
Write-Host -ForeGroundColor 'Cyan' (" selecting features using MicrosoftML selectFeatures mlTransform with Logistic Regression...")
$query = "EXEC select_features $trainingTable, $testTable, $selectedFeaturesTable, '$connectionString2'"
ExecuteSQLQuery $query
}
##########################################################################
# Create and execute the stored procedure for Training and evaluation
##########################################################################
Write-Host -foregroundcolor 'green' ("Step 3: Models Training and Evaluation")
$ans = Read-Host 'Continue [y|Y], Exit [e|E], Skip [s|S]?'
if ($ans -eq 'E' -or $ans -eq 'e')
{
return
}
if ($ans -eq 'y' -or $ans -eq 'Y')
{
# create the stored procedure for training
$script = $filepath + "step3_train_test_model.sql"
ExecuteSQL $script
Write-Host -ForeGroundColor 'magenta'(" Starting training and evaluation of models...")
$modelNames = 'logistic_reg','fast_linear','fast_trees','fast_forest','neural_net'
foreach ($modelName in $modelNames)
{
Write-Host -ForeGroundColor 'Cyan' (" Training $modelName...")
$query = "EXEC train_model $trainingTable, $testTable, $evalScoreTable, $modelTable, $modelName, '$connectionString2'"
ExecuteSQLQuery $query
}
}
##########################################################################
# Create and execute the stored procedure for charge_off predictions
##########################################################################
Write-Host -foregroundcolor 'green' ("Step 4: ChargeOff predictions")
$ans = Read-Host 'Continue [y|Y], Exit [e|E], Skip [s|S]?'
if ($ans -eq 'E' -or $ans -eq 'e')
{
return
}
if ($ans -eq 'y' -or $ans -eq 'Y')
{
# create the stored procedure for recommendations
$script = $filepath + "step4_chargeoff_batch_prediction.sql"
ExecuteSQL $script
# compute loan chargeoff predictions
Write-Host -ForeGroundColor 'Cyan' ("Scoring based on best performing model score table = $scoreTable, prediction table = $predictionTable...")
$query = "EXEC predict_chargeoff $scoreTable, $predictionTable, $modelTable, '$connectionString2'"
ExecuteSQLQuery $query
}
Write-Host -foregroundcolor 'green' ("Step 4a: Create on demand ChargeOff prediction stored proc")
$ans = Read-Host 'Continue [y|Y], Exit [e|E], Skip [s|S]?'
if ($ans -eq 'E' -or $ans -eq 'e')
{
return
}
if ($ans -eq 'y' -or $ans -eq 'Y')
{
# create the stored procedure for recommendations
$script = $filepath + "step4a_chargeoff_ondemand_prediction.sql"
ExecuteSQL $script
Write-Host -ForeGroundColor 'Cyan' ("Done creating on demand chargeoff prediction stored proc [predict_chargeoff_ondemand]...")
}
Write-Host -foregroundcolor 'green'("Loan Chargeoff Prediction Workflow Finished Successfully!")
}
$endTime =Get-Date
$totalTime = ($endTime-$startTime).ToString()
Write-Host "Finished running at:" $endTime
Write-Host "Total time used: " -foregroundcolor 'green' $totalTime.ToString()

65
SQL/Setup.ps1 Normal file
Просмотреть файл

@ -0,0 +1,65 @@
################################################################################################
# Powershell script for setting up the solution template. This script checks out the solution
# from github and deploys it to SQL Server on the local Data Science VM (DSVM).
#
# Parameters:
# serverName - Name of the server with SQL Server with R Services (this is the DSVM server)
# baseurl =
# username = login username for the server
# password = login password for the server
################################################################################################
param([string]$serverName,[string]$baseurl,[string]$username,[string]$password)
# This is the directory for the data/code download
$solutionTemplateSetupDir = "D:\SolutionTemplateSetup"
$dataDir = $solutionTemplateSetupDir + "Data"
$checkoutDir = "LoanChargeOff"
New-Item -Name $solutionTemplateSetupDir -ItemType directory
$setupLog = $solutionTemplateSetupDir + "setup_log.txt"
Start-Transcript -Path $setupLog -Append
cd $dataDir
$helpShortCutFile = "LoanChargeOffHelp.url"
# List of files to be downloaded
$dataList = "loan_info_10k.csv", "member_info_10k.csv", "payments_info_10k.csv", "loan_info_1m.csv", "member_info_1m.csv", "payments_info_1m.csv","step1_create_tables_10k.sql","step1_create_tables_1m.sql","step2_features_label_view_10k.sql","step2_features_label_view_1m.sql","step2a_optional_feature_selection.sql","step3_train_test_model.sql","step4_chargeoff_batch_prediction.sql","step4a_chargeoff_ondemand_prediction.sql","createuser.sql","Loan_ChargeOff.ps1","runDB.ps1","runDB_LargeDataSet.ps1","setupHelp.ps1",$helpShortCutFile
foreach ($dataFile in $dataList)
{
$down = $baseurl + '/' + $dataFile
Write-Host $down
Start-BitsTransfer -Source $down
}
cd $solutionTemplateSetupDir
git clone -n https://github.com/Microsoft/r-server-loan-chargeoff $checkoutDir
cd $checkoutDir
git config core.sparsecheckout true
echo "/*`r`n!HDI" | out-file -encoding ascii .git/info/sparse-checkout
git checkout master
# making sure that the data files conform to windows style of line ending.
$dataList = "loan_info_10k.csv", "member_info_10k.csv", "payments_info_10k.csv", "loan_info_1m.csv", "member_info_1m.csv", "payments_info_1m.csv"
foreach ($dataFile in $dataList)
{
unix2dos $dataDir + "/" + $dataFile
}
# Start the script for DB creation. Due to privilege issues with SYSTEM user (the user that runs the
# extension script), we use ps-remoting to login as admin use and run the DB creation scripts
$passwords = $password | ConvertTo-SecureString -AsPlainText -Force
$credential = New-Object System.Management.Automation.PSCredential("$serverName\$username", $passwords)
$command1 = "C:\Windows\Temp\runDB.ps1"
$command2 ="C:\Windows\Temp\setupHelp.ps1"
Enable-PSRemoting -Force
Invoke-Command -Credential $credential -ComputerName $serverName -FilePath $command1 -ArgumentList $dataDir
Invoke-Command -Credential $credential -ComputerName $serverName -FilePath $command2 -ArgumentList $helpShortCutFile
Disable-PSRemoting -Force
Stop-Transcript

36
SQL/createuser.sql Normal file
Просмотреть файл

@ -0,0 +1,36 @@
--
-- remove old $(username) user and login from master
--
USE [master]
GO
IF EXISTS (SELECT name FROM sys.database_principals WHERE name = '$(username)')
BEGIN
PRINT 'Deleting old $(username) user from master'
DROP USER [$(username)]
END
GO
IF EXISTS (SELECT name FROM master.sys.server_principals WHERE name = '$(username)')
BEGIN
PRINT 'Deleting old $(username) login from master'
DROP LOGIN [$(username)]
END
GO
--
-- create new $(username) login in master
--
USE [master]
GO
PRINT 'Creating $(username) login in master'
CREATE LOGIN [$(username)] WITH PASSWORD=N'$(password)', CHECK_EXPIRATION=OFF, CHECK_POLICY=OFF;
CREATE USER [$(username)] FOR LOGIN [$(username)]
--ALTER ROLE [db_rrerole] ADD MEMBER [$(username)]
ALTER ROLE [db_owner] ADD MEMBER [$(username)]
GO
exec sp_addrolemember 'db_owner', '$(username)'
exec sp_addrolemember 'db_ddladmin', '$(username)'
exec sp_addrolemember 'db_accessadmin', '$(username)'
exec sp_addrolemember 'db_datareader', '$(username)'
exec sp_addrolemember 'db_datawriter', '$(username)'
exec sp_addsrvrolemember @loginame= '$(username)', @rolename = 'sysadmin'
GO

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ca302200f994e693bcfe6f772a5d1b2e912a8408add58ea33cbed97ace9e082f
size 6230802

Просмотреть файл

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9f510f322422b15200b87358c9b07b02aadca714ca777b892ec2db91b9bbe53a
size 6233895

77
SQL/runDB.ps1 Normal file
Просмотреть файл

@ -0,0 +1,77 @@
##############################################################################################
# Script to invoke the LoanChargeOff data science workflow with a smaller dataset of 10,000
# loans.
# It also creates a SQL Server user and stores the password in 'ExporedSqlPassword.txt'.
# Users can retrieve the password from the file and decrypt using ConvertTo-SecureString
# commandlet in PowerShell.
#
# Parameters:
# dbuser - (Optional) username for database LoanChargeOff
# dbpass - (Optional) database password
# createuser - (Optional) whethere to create a database user
##############################################################################################
Param([string]$dbuser, [string]$dbpass, [bool]$createuser = $true, [string]$datadir)
# Function to generate a temporary password for SQL Server
Function Get-TempPassword()
{
Param
(
[int]$length=10,
[string[]]$sourcedata
)
For ($loop=1; $loop -le $length; $loop++)
{
$TempPassword += ($sourcedata | Get-Random)
}
return $TempPassword
}
$passwordSource=$NULL
$dbpassword = ""
$dbusername = "rdemo"
$passwordFile = "ExportedSqlPassword.txt"
For ($a=33;$a -le 126; $a++)
{
$passwordSource += ,[char][byte]$a
}
if ($dbuser)
{
$dbusername = $dbuser
}
if (!$createuser)
{
if (!$dbpass)
{
if (Test-Path $passwordFile)
{
$secureTxtFromFile = Get-Content $passwordFile
$securePasswordObj = $secureTxtFromFile | ConvertTo-SecureString
#get back the original unencrypted password
$PasswordBSTR = [System.Runtime.InteropServices.Marshal]::SecureStringToBSTR($securePasswordObj)
$dbpassword = [System.Runtime.InteropServices.Marshal]::PtrToStringAuto($PasswordBSTR)
}
else
{
Write-Host -ForegroundColor DarkYellow "Either ExportedSqlPassword.txt must exist with encrypted database password or must provide password using dbpass parameter."
throw
}
}
else
{
$dbpassword = $dbpass
}
}
else
{
Write-Host -ForegroundColor Cyan "Creating database user"
$dbpassword = Get-TempPassword -length 15 -sourcedata $passwordSource
$securePassword = $dbpassword | ConvertTo-SecureString -AsPlainText -Force
$secureTxt = $securePassword | ConvertFrom-SecureString
Set-Content $passwordFile $secureTxt
sqlcmd -S $env:COMPUTERNAME -v username="$dbusername" -v password="$dbpassword" -i .\createuser.sql
}
.\Loan_ChargeOff.ps1 -ServerName $env:COMPUTERNAME -DBName LoanChargeOff -username $dbusername -password $dbpassword -uninterrupted y -dataPath $datadir

Просмотреть файл

@ -0,0 +1,77 @@
##############################################################################################
# Script to invoke the LoanChargeOff data science workflow with a larger dataset of 1,000,000
# loans.
# It can also optionally creates a SQL Server user and stores the password in
# 'ExporedSqlPassword.txt'. Users can retrieve the password from the file and decrypt using
# ConvertTo-SecureString commandlet in PowerShell.
#
# Parameters:
# dbuser - (Optional) username for database LoanChargeOff
# dbpass - (Optional) database password
# createuser - (Optional) whethere to create a database user
##############################################################################################
Param([string]$dbuser, [string]$dbpass, [bool]$createuser = $true, [string]$datadir)
# Function to generate a temporary password for SQL Server
Function Get-TempPassword()
{
Param
(
[int]$length=10,
[string[]]$sourcedata
)
For ($loop=1; $loop -le $length; $loop++)
{
$TempPassword += ($sourcedata | Get-Random)
}
return $TempPassword
}
$passwordSource=$NULL
$dbpassword = ""
$dbusername = "rdemo"
$passwordFile = "ExportedSqlPassword.txt"
For ($a=33;$a -le 126; $a++)
{
$passwordSource += ,[char][byte]$a
}
if ($dbuser)
{
$dbusername = $dbuser
}
if (!$createuser)
{
if (!$dbpass)
{
if (Test-Path $passwordFile)
{
$secureTxtFromFile = Get-Content $passwordFile
$securePasswordObj = $secureTxtFromFile | ConvertTo-SecureString
#get back the original unencrypted password
$PasswordBSTR = [System.Runtime.InteropServices.Marshal]::SecureStringToBSTR($securePasswordObj)
$dbpassword = [System.Runtime.InteropServices.Marshal]::PtrToStringAuto($PasswordBSTR)
}
else
{
Write-Host -ForegroundColor DarkYellow "Either ExportedSqlPassword.txt must exist with encrypted database password or must provide password using dbpass parameter."
throw
}
}
else
{
$dbpassword = $dbpass
}
}
else
{
Write-Host -ForegroundColor Cyan "Creating database user"
$dbpassword = Get-TempPassword -length 15 -sourcedata $passwordSource
$securePassword = $dbpassword | ConvertTo-SecureString -AsPlainText -Force
$secureTxt = $securePassword | ConvertFrom-SecureString
Set-Content $passwordFile $secureTxt
sqlcmd -S $env:COMPUTERNAME -v username="$dbusername" -v password="$dbpassword" -i .\createuser.sql
}
.\Loan_ChargeOff.ps1 -ServerName $env:COMPUTERNAME -DBName LoanChargeOff -username $dbusername -password $dbpassword -uninterrupted y -dataPath $datadir -dataSize L

23
SQL/setupHelp.ps1 Normal file
Просмотреть файл

@ -0,0 +1,23 @@
param( [string]$helpfile)
#git clone
$desktop = [Environment]::GetFolderPath("Desktop")
$desktop = $desktop + '\'
#create the help link in startup program
$startmenu = [Environment]::GetFolderPath("StartMenu")
$startupfolder = $startmenu + '\Programs\Startup\'
# We create this since the user startup folder is only created after first login
# Alternative is to add is to all user startup
mkdir $startupfolder
#copy
$down = $helpfile
Write-Host $down
Write-Host $startmenu
ls $startmenu
Write-Host $startupfolder
ls $startupfolder
cp -Verbose $down $startupfolder
cp -Verbose $down $desktop

Просмотреть файл

@ -0,0 +1,104 @@
SET ansi_nulls on
GO
SET quoted_identifier on
GO
/* Create the member_info Table. */
DROP TABLE IF EXISTS member_info_10k
CREATE TABLE [member_info_10k](
[memberId] [int],
[residentialState] [nvarchar](4),
[annualIncome] [real],
[yearsEmployment] [nvarchar](11),
[homeOwnership] [nvarchar](10),
[incomeVerified] [bit],
[creditScore] [int],
[dtiRatio] [real],
[revolvingBalance] [real],
[revolvingUtilizationRate] [real],
[numDelinquency2Years] [int],
[numDerogatoryRec] [int],
[numInquiries6Mon] [int],
[lengthCreditHistory] [int],
[numOpenCreditLines] [int],
[numTotalCreditLines] [int],
[numChargeoff1year] [int]
);
CREATE CLUSTERED COLUMNSTORE INDEX member_info_10k_cci ON member_info_10k WITH (DROP_EXISTING = OFF);
GO
/* Create the loan_info Table. */
DROP TABLE IF EXISTS loan_info_10k
CREATE TABLE [loan_info_10k](
[loanId] [int],
[loan_open_date] [datetime],
[memberId] [int],
[loanAmount] [real],
[interestRate] [real],
[grade] [int],
[term] [int],
[installment] [real],
[isJointApplication] [bit],
[purpose] [nvarchar](255)
);
CREATE CLUSTERED COLUMNSTORE INDEX loan_info_10k_cci ON loan_info_10k WITH (DROP_EXISTING = OFF);
GO
/* Create the payments_info Table*/
DROP TABLE IF EXISTS payments_info_10k
CREATE TABLE [payments_info_10k](
[loanId] [int],
[payment_date] [datetime],
[payment] [real],
[past_due] [real],
[remain_balance] [real],
[closed] [bit],
[charged_off] [bit]
);
CREATE CLUSTERED COLUMNSTORE INDEX payments_info_10k_cci ON payments_info_10k WITH (DROP_EXISTING = OFF);
GO
DROP TABLE IF EXISTS [loan_chargeoff_models_10k];
CREATE TABLE [loan_chargeoff_models_10k]
(
[model_name] varchar(30) not null default('default model') primary key,
[model] varbinary(max) not null,
[auc] real,
[accuracy] real,
[precision] real,
[recall] real,
[f1score] real,
[training_ts] datetime default(GETDATE())
);
GO
DROP TABLE IF EXISTS [selected_features_10k];
CREATE TABLE [selected_features_10k](
[feature_id] [int] IDENTITY(1,1) NOT NULL,
[feature_name] [nvarchar](500) NOT NULL
);
GO
DROP TABLE IF EXISTS [loan_chargeoff_prediction_10k]
CREATE TABLE [loan_chargeoff_prediction_10k](
[memberId] [int],
[loanId] [int],
[payment_date] [date],
[prediction_date] [date] default(GETDATE()),
[PredictedLabel] [nvarchar](255),
[Score.1] [float],
[Probability.1] [float]
);
GO

Просмотреть файл

@ -0,0 +1,103 @@
SET ansi_nulls on
GO
SET quoted_identifier on
GO
/* Create the member_info Table. */
/* Large DataSets */
DROP TABLE IF EXISTS member_info_1m
CREATE TABLE [member_info_1m](
[memberId] [int],
[residentialState] [nvarchar](4),
[annualIncome] [real],
[yearsEmployment] [nvarchar](11),
[homeOwnership] [nvarchar](10),
[incomeVerified] [bit],
[creditScore] [int],
[dtiRatio] [real],
[revolvingBalance] [real],
[revolvingUtilizationRate] [real],
[numDelinquency2Years] [int],
[numDerogatoryRec] [int],
[numInquiries6Mon] [int],
[lengthCreditHistory] [int],
[numOpenCreditLines] [int],
[numTotalCreditLines] [int],
[numChargeoff1year] [int]
);
CREATE CLUSTERED COLUMNSTORE INDEX member_info_1m_cci ON member_info_1m WITH (DROP_EXISTING = OFF);
GO
/* Create the loan_info Table. */
DROP TABLE IF EXISTS loan_info_1m
CREATE TABLE [loan_info_1m](
[loanId] [int],
[loan_open_date] [datetime],
[memberId] [int],
[loanAmount] [real],
[interestRate] [real],
[grade] [int],
[term] [int],
[installment] [real],
[isJointApplication] [bit],
[purpose] [nvarchar](255)
);
CREATE CLUSTERED COLUMNSTORE INDEX loan_info_1m_cci ON loan_info_1m WITH (DROP_EXISTING = OFF);
GO
/* Create the payments_info Table*/
DROP TABLE IF EXISTS payments_info_1m
CREATE TABLE [payments_info_1m](
[loanId] [int],
[payment_date] [date],
[payment] [real],
[past_due] [real],
[remain_balance] [real],
[closed] [bit],
[charged_off] [bit]
);
CREATE CLUSTERED COLUMNSTORE INDEX payments_info_1m_cci ON payments_info_1m WITH (DROP_EXISTING = OFF);
GO
DROP TABLE IF EXISTS [loan_chargeoff_models_1m];
CREATE TABLE [loan_chargeoff_models_1m]
(
[model_name] varchar(30) not null default('default model') primary key,
[model] varbinary(max) not null,
[auc] real,
[accuracy] real,
[precision] real,
[recall] real,
[f1score] real,
[training_ts] datetime default(GETDATE())
);
GO
DROP TABLE IF EXISTS selected_features_1m;
CREATE TABLE [selected_features_1m](
[feature_id] [int] IDENTITY(1,1) NOT NULL,
[feature_name] [nvarchar](500) NOT NULL
);
GO
DROP TABLE IF EXISTS [loan_chargeoff_prediction_1m]
CREATE TABLE [loan_chargeoff_prediction_1m](
[memberId] [int],
[loanId] [int],
[payment_date] [date],
[prediction_date] [date] default(GETDATE()),
[PredictedLabel] [nvarchar](255),
[Score.1] [float],
[Probability.1] [float]
);
GO

Просмотреть файл

@ -0,0 +1,137 @@
-- View over the underlying table for features and labels required
drop view if exists vw_loan_chargeoff_train_10k
go
create view vw_loan_chargeoff_train_10k
as
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
from
(
select *,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
(select MAX(charged_off+0) from payments_info_10k p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
from payments_info_10k p1 ) AS t inner join loan_info_10k l ON t.loanId = l.loanId inner join member_info_10k m ON l.memberId = m.memberId
where t.charge_off IS NOT NULL
and ((payment_date between '2016-09-12' and '2016-12-12' and charge_off = 1) or (payment_date = '2017-01-12'));
go
drop view if exists vw_loan_chargeoff_test_10k
go
create view vw_loan_chargeoff_test_10k
as
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
from
(
select *,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
(select MAX(charged_off+0) from payments_info_10k p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
from payments_info_10k p1 ) AS t inner join loan_info_10k l ON t.loanId = l.loanId inner join member_info_10k m ON l.memberId = m.memberId
where t.charge_off IS NOT NULL
and payment_date = '2017-02-12';
go
drop view if exists vw_loan_chargeoff_score_10k
go
create view vw_loan_chargeoff_score_10k
as
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
from
(
select *,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
(select MAX(charged_off+0) from payments_info_10k p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
from payments_info_10k p1 ) AS t inner join loan_info_10k l ON t.loanId = l.loanId inner join member_info_10k m ON l.memberId = m.memberId
where t.charge_off IS NOT NULL
and payment_date > '2017-02-12';
go
-- persist the view in case of large dataset in order to get faster results
drop table if exists [loan_chargeoff_train_10k]
go
select *
into [loan_chargeoff_train_10k]
from [vw_loan_chargeoff_train_10k]
go
create clustered columnstore index [cci_loan_chargeoff_train_10k] on [loan_chargeoff_train_10k]
go
drop table if exists [loan_chargeoff_test_10k]
go
select *
into [loan_chargeoff_test_10k]
from [vw_loan_chargeoff_test_10k]
go
create clustered columnstore index [cci_loan_chargeoff_test_10k] on [loan_chargeoff_test_10k]
go
drop table if exists [loan_chargeoff_score_10k]
go
select *
into [loan_chargeoff_score_10k]
from [vw_loan_chargeoff_score_10k]
go
create clustered columnstore index [cci_loan_chargeoff_score_10k] on [loan_chargeoff_score_10k]
go

Просмотреть файл

@ -0,0 +1,140 @@
-- View over the underlying table for features and labels required
/* Large DataSets */
drop view if exists [dbo].[vw_loan_chargeoff_1m]
go
create view [dbo].[vw_loan_chargeoff_1m]
as
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
from
(
select *,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
(select MAX(charged_off+0) from payments_info_1m p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
from payments_info_1m p1 ) AS t inner join loan_info_1m l ON t.loanId = l.loanId inner join member_info_1m m ON l.memberId = m.memberId
where t.charge_off IS NOT NULL
and ((payment_date between '2016-09-12' and '2016-12-12' and charge_off = 1) or (payment_date = '2017-01-12'))
GO
drop view if exists [dbo].[vw_loan_chargeoff_test_1m]
go
create view [dbo].[vw_loan_chargeoff_test_1m]
as
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
from
(
select *,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
(select MAX(charged_off+0) from payments_info_1m p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
from payments_info_1m p1 ) AS t inner join loan_info_1m l ON t.loanId = l.loanId inner join member_info_1m m ON l.memberId = m.memberId
where t.charge_off IS NOT NULL
and payment_date = '2017-02-12'
GO
drop view if exists [dbo].[vw_loan_chargeoff_score_1m]
go
create view [dbo].[vw_loan_chargeoff_score_1m]
as
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
from
(
select *,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
(select MAX(charged_off+0) from payments_info_1m p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
from payments_info_1m p1 ) AS t inner join loan_info_1m l ON t.loanId = l.loanId inner join member_info_1m m ON l.memberId = m.memberId
where t.charge_off IS NOT NULL
and payment_date > '2017-02-12'
GO
-- persist the view in case of large dataset in order to get faster results
/* Large dataset */
drop table if exists [loan_chargeoff_train_1m]
go
select *
into [loan_chargeoff_train_1m]
from [vw_loan_chargeoff_1m]
go
create clustered columnstore index [cci_loan_chargeoff_train_1m] on [loan_chargeoff_train_1m]
go
drop table if exists [loan_chargeoff_test_1m]
go
select *
into [loan_chargeoff_test_1m]
from [vw_loan_chargeoff_test_1m]
go
create clustered columnstore index [cci_loan_chargeoff_test_1m] on [loan_chargeoff_test_1m]
go
drop table if exists [loan_chargeoff_score_1m]
go
select *
into [loan_chargeoff_score_1m]
from [vw_loan_chargeoff_score_1m]
go
create clustered columnstore index [cci_loan_chargeoff_score_1m] on [loan_chargeoff_score_1m]
go

Просмотреть файл

@ -0,0 +1,53 @@
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
DROP PROCEDURE IF EXISTS [dbo].[select_features];
GO
CREATE PROCEDURE [select_features] @training_set_table varchar(100), @test_set_table varchar(100), @selected_features_table varchar(100), @connectionString varchar(300)
AS
BEGIN
DECLARE @testing_set_query nvarchar(400), @del_cmd nvarchar(100), @ins_cmd nvarchar(max)
/* select features using MicrosotML */
SET @del_cmd = 'DELETE FROM ' + @selected_features_table
EXEC sp_executesql @del_cmd
SET @ins_cmd = 'INSERT INTO ' + @selected_features_table + ' (feature_name)
EXECUTE sp_execute_external_script @language = N''R'',
@script = N''
library(RevoScaleR)
library(MicrosoftML)
##########################################################################################################################################
## Set the compute context to SQL for faster training
##########################################################################################################################################
testing_set <- RxSqlServerData(table=test_set, connectionString = connection_string)
training_set <- RxSqlServerData(table=train_set, connectionString = connection_string)
features <- rxGetVarNames(testing_set)
variables_to_remove <- c("memberId", "loanId", "payment_date", "loan_open_date", "charge_off")
feature_names <- features[!(features %in% variables_to_remove)]
model_formula <- as.formula(paste(paste("charge_off~"), paste(feature_names, collapse = "+")))
selected_count <- 0
features_to_remove <- c("(Bias)")
ml_trans <- list(categorical(vars = c("purpose", "residentialState", "homeOwnership", "yearsEmployment")),
selectFeatures(model_formula, mode = mutualInformation(numFeaturesToKeep = 41)))
candidate_model <- rxLogisticRegression(model_formula, data = training_set, mlTransforms = ml_trans)
predicted_score <- rxPredict(candidate_model, testing_set, extraVarsToWrite = c("charge_off"))
predicted_roc <- rxRoc("charge_off", grep("Probability", names(predicted_score), value = T), predicted_score)
auc <- rxAuc(predicted_roc)
selected_features <- rxGetVarInfo(summary(candidate_model)$summary)
selected_feature_names <- names(selected_features)
selected_feature_filtered <- selected_feature_names[!(selected_feature_names %in% features_to_remove)]
selected_features_final <- data.frame(selected_feature_filtered)''
, @output_data_1_name = N''selected_features_final''
, @params = N''@connection_string varchar(300), @test_set varchar(100), @train_set varchar(100)''
, @connection_string = ''' + @connectionString + '''' +
', @train_set = ''' + @training_set_table + '''' +
', @test_set = ''' + @test_set_table + ''';'
EXEC sp_executesql @ins_cmd
END
GO

Просмотреть файл

@ -0,0 +1,123 @@
/****** Stored Procedure to train models ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
DROP PROCEDURE IF EXISTS [dbo].[train_model];
GO
CREATE PROCEDURE [train_model] @training_set_table varchar(100), @test_set_table varchar(100), @scored_table varchar(100), @model_table varchar(100), @model_name_param varchar(50), @connectionString varchar(300)
AS
BEGIN
DECLARE @payload varbinary(max), @auc real, @accuracy real, @precision real, @recall real, @f1score real;
DECLARE @del_cmd nvarchar(300), @ins_cmd nvarchar(300), @param_def nvarchar(300);
EXECUTE sp_execute_external_script @language = N'R',
@script = N'
library(RevoScaleR)
library(MicrosoftML)
# model evaluation functions
model_eval_stats <- function(scored_data, label="charge_off", predicted_prob="Probability", predicted_label="PredictedLabel")
{
roc <- rxRoc(label, grep(predicted_prob, names(scored_data), value=T), scored_data)
auc <- rxAuc(roc)
crosstab_formula <- as.formula(paste("~as.factor(", label, "):as.factor(", predicted_label, ")"))
cross_tab <- rxCrossTabs(crosstab_formula, scored_data)
conf_matrix <- cross_tab$counts[[1]]
tn <- conf_matrix[1,1]
fp <- conf_matrix[1,2]
fn <- conf_matrix[2,1]
tp <- conf_matrix[2,2]
accuracy <- (tp + tn) / (tp + fn + fp + tn)
precision <- tp/(tp+fp)
recall <- tp / (tp+fn)
f1score <- 2 * (precision * recall) / (precision + recall)
return(list(auc=auc, accuracy=accuracy, precision = precision, recall=recall, f1score=f1score))
}
cc <- RxInSqlServer(connectionString = connection_string)
rxSetComputeContext(cc)
training_set <- RxSqlServerData(table=train_set, connectionString = connection_string)
testing_set <- RxSqlServerData(table=test_set, connectionString = connection_string)
scoring_set <- RxSqlServerData(table=score_set, connectionString = connection_string, overwrite=TRUE)
##########################################################################################################################################
## Training and evaluating model based on model selection
##########################################################################################################################################
features <- rxGetVarNames(training_set)
vars_to_remove <- c("memberId", "loanId", "payment_date", "loan_open_date", "charge_off")
feature_names <- features[!(features %in% vars_to_remove)]
model_formula <- as.formula(paste(paste("charge_off~"), paste(feature_names, collapse = "+")))
ml_trans <- list(categorical(vars = c("purpose", "residentialState", "homeOwnership", "yearsEmployment")),
selectFeatures(model_formula, mode = mutualInformation(numFeaturesToKeep = 41)))
if (model_name == "logistic_reg") {
model <- rxLogisticRegression(formula = model_formula,
data = training_set,
mlTransforms = ml_trans)
} else if (model_name == "fast_trees") {
model <- rxFastTrees(formula = model_formula,
data = training_set,
mlTransforms = ml_trans)
} else if (model_name == "fast_forest") {
model <- rxFastForest(formula = model_formula,
data = training_set,
mlTransforms = ml_trans)
} else if (model_name == "fast_linear") {
model <- rxFastLinear(formula = model_formula,
data = training_set,
mlTransforms = ml_trans)
} else if (model_name == "neural_net") {
model <- rxNeuralNet(formula = model_formula,
data = training_set,
numIterations = 42,
optimizer = adaDeltaSgd(),
mlTransforms = ml_trans)
}
print("Done training.")
# evaluate model
rxPredict(model, testing_set, outData = scoring_set, extraVarsToWrite = c("memberId", "loanId", "charge_off"), overwrite=TRUE)
print("Done writing predictions for evaluation of model.")
model_stats <- model_eval_stats(scoring_set)
print(model_stats)
modelbin <- serialize(model, connection=NULL)
stat_auc <- model_stats[[1]]
stat_accuracy <- model_stats[[2]]
stat_precision <- model_stats[[3]]
stat_recall <- model_stats[[4]]
stat_f1score <- model_stats[[5]]
'
, @params = N'@model_name varchar(20), @connection_string varchar(300), @train_set varchar(100), @test_set varchar(100), @score_set varchar(100),
@modelbin varbinary(max) OUTPUT, @stat_auc real OUTPUT, @stat_accuracy real OUTPUT, @stat_precision real OUTPUT, @stat_recall real OUTPUT, @stat_f1score real OUTPUT'
, @model_name = @model_name_param
, @connection_string = @connectionString
, @train_set = @training_set_table
, @test_set = @test_set_table
, @score_set = @scored_table
, @modelbin = @payload OUTPUT
, @stat_auc = @auc OUTPUT
, @stat_accuracy = @accuracy OUTPUT
, @stat_precision = @precision OUTPUT
, @stat_recall = @recall OUTPUT
, @stat_f1score = @f1score OUTPUT;
SET @del_cmd = N'DELETE FROM ' + @model_table + N' WHERE model_name = ''' + @model_name_param + ''''
EXEC sp_executesql @del_cmd;
SET @ins_cmd = N'INSERT INTO ' + @model_table + N' (model_name, model, auc, accuracy, precision, recall, f1score) VALUES (''' + @model_name_param + ''', @p_payload, @p_auc, @p_accuracy, @p_precision, @p_recall, @p_f1score)'
SET @param_def = N'@p_payload varbinary(max),
@p_auc real,
@p_accuracy real,
@p_precision real,
@p_recall real,
@p_f1score real'
EXEC sp_executesql @ins_cmd, @param_def,
@p_payload=@payload,
@p_auc=@auc,
@p_accuracy=@accuracy,
@p_precision=@precision,
@p_recall=@recall,
@p_f1score=@f1score;
;
END
GO

Просмотреть файл

@ -0,0 +1,38 @@
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
DROP PROCEDURE IF EXISTS [dbo].[predict_chargeoff]
GO
CREATE PROCEDURE [predict_chargeoff] @score_table varchar(100), @score_prediction_table varchar(100), @models_table varchar(100), @connectionString varchar(300)
AS
BEGIN
DECLARE @best_model_query nvarchar(300), @param_def nvarchar(100)
DECLARE @bestmodel varbinary(max)
SET @best_model_query = 'select top 1 @p_best_model = model from ' + @models_table + ' where f1score in (select max(f1score) from ' + @models_table + ')'
SET @param_def = N'@p_best_model varbinary(max) OUTPUT';
EXEC sp_executesql @best_model_query, @param_def, @p_best_model=@bestmodel OUTPUT;
EXEC sp_execute_external_script @language = N'R',
@script = N'
library(RevoScaleR)
library(MicrosoftML)
# Get best_model.
best_model <- unserialize(best_model)
scoring_set <- RxSqlServerData(table=score_set, connectionString = connection_string)
scored_output <- RxSqlServerData(table=score_prediction, connectionString = connection_string, overwrite=TRUE)
print(summary(best_model))
rxPredict(best_model, scoring_set, outData = scored_output, extraVarsToWrite = c("memberId", "loanId", "payment_date"), overwrite=TRUE)
'
, @params = N'@best_model varbinary(max), @score_set varchar(100), @score_prediction varchar(100), @connection_string varchar(300)'
, @best_model = @bestmodel
, @score_set = @score_table
, @score_prediction = @score_prediction_table
, @connection_string = @connectionString
;
END
GO

Просмотреть файл

@ -0,0 +1,215 @@
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
DROP PROCEDURE IF EXISTS [dbo].[predict_chargeoff_ondemand]
GO
CREATE PROCEDURE [predict_chargeoff_ondemand]
@models_table varchar(100),
@loanId int,
@payment_date datetime,
@payment real,
@past_due real,
@remain_balance real,
@loan_open_date datetime,
@loanAmount real,
@interestRate real,
@grade int,
@term int,
@installment real,
@isJointApplication bit,
@purpose nvarchar(255),
@memberId int,
@residentialState nvarchar(4),
@annualIncome real,
@yearsEmployment nvarchar(11),
@homeOwnership nvarchar(10),
@incomeVerified bit,
@creditScore int,
@dtiRatio real,
@revolvingBalance real,
@revolvingUtilizationRate real,
@numDelinquency2Years int,
@numDerogatoryRec int,
@numInquiries6Mon int,
@lengthCreditHistory int,
@numOpenCreditLines int,
@numTotalCreditLines int,
@numChargeoff1year int,
@payment_1 real,
@payment_2 real,
@payment_3 real,
@payment_4 real,
@payment_5 real,
@past_due_1 real,
@past_due_2 real,
@past_due_3 real,
@past_due_4 real,
@past_due_5 real,
@remain_balance_1 real,
@remain_balance_2 real,
@remain_balance_3 real,
@remain_balance_4 real,
@remain_balance_5 real
AS
BEGIN
DECLARE @best_model_query nvarchar(300), @param_def nvarchar(100)
DECLARE @bestmodel varbinary(max)
SET @best_model_query = 'select top 1 @p_best_model = model from ' + @models_table + ' where f1score in (select max(f1score) from ' + @models_table + ')'
SET @param_def = N'@p_best_model varbinary(max) OUTPUT';
EXEC sp_executesql @best_model_query, @param_def, @p_best_model=@bestmodel OUTPUT;
DECLARE @inquery nvarchar(max) = N'SELECT @p_loanId
,@p_payment_date
,@p_payment
,@p_past_due
,@p_remain_balance
,@p_loan_open_date
,@p_loanAmount
,@p_interestRate
,@p_grade
,@p_term
,@p_installment
,@p_isJointApplication
,@p_purpose
,@p_memberId
,@p_residentialState
,@p_annualIncome
,@p_yearsEmployment
,@p_homeOwnership
,@p_incomeVerified
,@p_creditScore
,@p_dtiRatio
,@p_revolvingBalance
,@p_revolvingUtilizationRate
,@p_numDelinquency2Years
,@p_numDerogatoryRec
,@p_numInquiries6Mon
,@p_lengthCreditHistory
,@p_numOpenCreditLines
,@p_numTotalCreditLines
,@p_numChargeoff1year
,@p_payment_1
,@p_payment_2
,@p_payment_3
,@p_payment_4
,@p_payment_5
,@p_past_due_1
,@p_past_due_2
,@p_past_due_3
,@p_past_due_4
,@p_past_due_5
,@p_remain_balance_1
,@p_remain_balance_2
,@p_remain_balance_3
,@p_remain_balance_4
,@p_remain_balance_5'
EXEC sp_execute_external_script @language = N'R',
@script = N'
library(RevoScaleR)
library(MicrosoftML)
# Get best_model.
best_model <- unserialize(best_model)
OutputDataSet <- rxPredict(best_model, InputDataSet, outData = NULL, extraVarsToWrite = c("memberId", "loanId", "payment_date"))
'
, @input_data_1 = @inquery
, @params = N'@best_model varbinary(max),
@p_loanId int,
@p_payment_date datetime,
@p_payment real,
@p_past_due real,
@p_remain_balance real,
@p_loan_open_date datetime,
@p_loanAmount real,
@p_interestRate real,
@p_grade int,
@p_term int,
@p_installment real,
@p_isJointApplication bit,
@p_purpose nvarchar(255),
@p_memberId int,
@p_residentialState nvarchar(4),
@p_annualIncome real,
@p_yearsEmployment nvarchar(11),
@p_homeOwnership nvarchar(10),
@p_incomeVerified bit,
@p_creditScore int,
@p_dtiRatio real,
@p_revolvingBalance real,
@p_revolvingUtilizationRate real,
@p_numDelinquency2Years int,
@p_numDerogatoryRec int,
@p_numInquiries6Mon int,
@p_lengthCreditHistory int,
@p_numOpenCreditLines int,
@p_numTotalCreditLines int,
@p_numChargeoff1year int,
@p_payment_1 real,
@p_payment_2 real,
@p_payment_3 real,
@p_payment_4 real,
@p_payment_5 real,
@p_past_due_1 real,
@p_past_due_2 real,
@p_past_due_3 real,
@p_past_due_4 real,
@p_past_due_5 real,
@p_remain_balance_1 real,
@p_remain_balance_2 real,
@p_remain_balance_3 real,
@p_remain_balance_4 real,
@p_remain_balance_5 real'
, @p_loanId=@loanId
, @p_payment_date=@payment_date
, @p_payment=@payment
, @p_past_due=@past_due
, @p_remain_balance=@remain_balance
, @p_loan_open_date=@loan_open_date
, @p_loanAmount=@loanAmount
, @p_interestRate=@interestRate
, @p_grade=@grade
, @p_term=@term
, @p_installment=@installment
, @p_isJointApplication=@isJointApplication
, @p_purpose=@purpose
, @p_memberId=@memberId
, @p_residentialState=@residentialState
, @p_annualIncome=@annualIncome
, @p_yearsEmployment=@yearsEmployment
, @p_homeOwnership=@homeOwnership
, @p_incomeVerified=@incomeVerified
, @p_creditScore=@creditScore
, @p_dtiRatio=@dtiRatio
, @p_revolvingBalance=@revolvingBalance
, @p_revolvingUtilizationRate=@revolvingUtilizationRate
, @p_numDelinquency2Years=@numDelinquency2Years
, @p_numDerogatoryRec=@numDerogatoryRec
, @p_numInquiries6Mon=@numInquiries6Mon
, @p_lengthCreditHistory=@lengthCreditHistory
, @p_numOpenCreditLines=@numOpenCreditLines
, @p_numTotalCreditLines=@numTotalCreditLines
, @p_numChargeoff1year=@numChargeoff1year
, @p_payment_1=@payment_1
, @p_payment_2=@payment_2
, @p_payment_3=@payment_3
, @p_payment_4=@payment_4
, @p_payment_5=@payment_5
, @p_past_due_1=@past_due_1
, @p_past_due_2=@past_due_2
, @p_past_due_3=@past_due_3
, @p_past_due_4=@past_due_4
, @p_past_due_5=@past_due_5
, @p_remain_balance_1=@remain_balance_1
, @p_remain_balance_2=@remain_balance_2
, @p_remain_balance_3=@remain_balance_3
, @p_remain_balance_4=@remain_balance_4
, @p_remain_balance_5=@remain_balance_5
, @best_model = @bestmodel
;
END
GO