move scripts/code from ciqs to github
This commit is contained in:
Родитель
c2d705e4ea
Коммит
6f3b81f044
|
@ -0,0 +1,2 @@
|
|||
[InternetShortcut]
|
||||
URL=https://microsoft.github.io/r-server-campaign-optimization/CIG_Workflow.html
|
|
@ -0,0 +1,370 @@
|
|||
<#
|
||||
.SYNOPSIS
|
||||
Script to provide Loan ChargeOff predictions, using SQL & MRS.
|
||||
.DESCRIPTION
|
||||
This script will show the E2E work flow of loan chargeoff prediction machine learning
|
||||
templates with Microsoft SQL Server 2016 and Microsoft R services.
|
||||
For the detailed description, please read README.md.
|
||||
#>
|
||||
[CmdletBinding()]
|
||||
param(
|
||||
# SQL server address
|
||||
[parameter(Mandatory=$true,ParameterSetName = "CM")]
|
||||
[ValidateNotNullOrEmpty()]
|
||||
[String]
|
||||
$ServerName = "",
|
||||
|
||||
# SQL server database name
|
||||
[parameter(Mandatory=$true,ParameterSetName = "CM")]
|
||||
[ValidateNotNullOrEmpty()]
|
||||
[String]
|
||||
$DBName = "",
|
||||
|
||||
[parameter(Mandatory=$true,ParameterSetName = "CM")]
|
||||
[ValidateNotNullOrEmpty()]
|
||||
[String]
|
||||
$username ="",
|
||||
|
||||
[parameter(Mandatory=$true,ParameterSetName = "CM")]
|
||||
[ValidateNotNullOrEmpty()]
|
||||
[String]
|
||||
$password ="",
|
||||
|
||||
[parameter(Mandatory=$true,ParameterSetName = "CM")]
|
||||
[ValidateNotNullOrEmpty()]
|
||||
[String]
|
||||
$uninterrupted="",
|
||||
|
||||
[parameter(Mandatory=$false,ParameterSetName = "CM")]
|
||||
[ValidateNotNullOrEmpty()]
|
||||
[String]
|
||||
$dataPath = "",
|
||||
|
||||
[parameter(Mandatory=$false,ParameterSetName = "CM")]
|
||||
[ValidateSet("l", "L", "s", "S", "")]
|
||||
[String]
|
||||
$dataSize = ""
|
||||
)
|
||||
|
||||
$scriptPath = Get-Location
|
||||
$filePath = $scriptPath.Path+ "\"
|
||||
|
||||
##########################################################################
|
||||
# Script level variables
|
||||
##########################################################################
|
||||
|
||||
$table_suffix = "_10k"
|
||||
if ($dataSize -eq 'l' -or $dataSize -eq 'L')
|
||||
{
|
||||
$table_suffix = "_1m"
|
||||
}
|
||||
|
||||
$trainingTable = "loan_chargeoff_train" + $table_suffix
|
||||
$testTable = "loan_chargeoff_test" + $table_suffix
|
||||
$evalScoreTable = "loan_chargeoff_eval_score" + $table_suffix
|
||||
$scoreTable = "loan_chargeoff_score" + $table_suffix
|
||||
$modelTable = "loan_chargeoff_models" + $table_suffix
|
||||
$predictionTable = "loan_chargeoff_prediction" + $table_suffix
|
||||
$selectedFeaturesTable = "selected_features" + $table_suffix
|
||||
##########################################################################
|
||||
# Function wrapper to invoke SQL command
|
||||
##########################################################################
|
||||
function ExecuteSQL
|
||||
{
|
||||
param(
|
||||
[String]
|
||||
$sqlscript
|
||||
)
|
||||
Invoke-Sqlcmd -ServerInstance $ServerName -Database $DBName -Username $username -Password $password -InputFile $sqlscript -QueryTimeout 200000
|
||||
}
|
||||
##########################################################################
|
||||
# Function wrapper to invoke SQL query
|
||||
##########################################################################
|
||||
function ExecuteSQLQuery
|
||||
{
|
||||
param(
|
||||
[String]
|
||||
$sqlquery
|
||||
)
|
||||
Invoke-Sqlcmd -ServerInstance $ServerName -Database $DBName -Username $username -Password $password -Query $sqlquery -QueryTimeout 200000
|
||||
}
|
||||
|
||||
##########################################################################
|
||||
# Get connection string
|
||||
##########################################################################
|
||||
function GetConnectionString
|
||||
{
|
||||
$connectionString = "Driver=SQL Server;Server=$ServerName;Database=$DBName;UID=$username;PWD=$password"
|
||||
$connectionString
|
||||
}
|
||||
|
||||
$ServerName2="localhost"
|
||||
|
||||
function GetConnectionString2
|
||||
{
|
||||
$connectionString2 = "Driver=SQL Server;Server=$ServerName2;Database=$DBName;UID=$username;PWD=$password"
|
||||
$connectionString2
|
||||
}
|
||||
|
||||
##########################################################################
|
||||
# Construct the SQL connection strings
|
||||
##########################################################################
|
||||
$connectionString = GetConnectionString
|
||||
$connectionString2 = GetConnectionString2
|
||||
|
||||
##########################################################################
|
||||
# Check if the SQL server or database exists
|
||||
##########################################################################
|
||||
$query = "IF NOT EXISTS(SELECT * FROM sys.databases WHERE NAME = '$DBName') CREATE DATABASE $DBName"
|
||||
Invoke-Sqlcmd -ServerInstance $ServerName -Username $username -Password $password -Query $query -ErrorAction SilentlyContinue
|
||||
if ($? -eq $false)
|
||||
{
|
||||
Write-Host -ForegroundColor Red "Failed the test to connect to SQL server: $ServerName database: $DBName !"
|
||||
Write-Host -ForegroundColor Red "Please make sure: `n`t 1. SQL Server: $ServerName exists;
|
||||
`n`t 2. SQL database: $DBName exists;
|
||||
`n`t 3. SQL user: $username has the right credential for SQL server access."
|
||||
exit
|
||||
}
|
||||
|
||||
$query = "USE $DBName;"
|
||||
Invoke-Sqlcmd -ServerInstance $ServerName -Username $username -Password $password -Query $query
|
||||
|
||||
|
||||
##########################################################################
|
||||
# Running without interruption
|
||||
##########################################################################
|
||||
$startTime= Get-Date
|
||||
Write-Host "Start time is:" $startTime
|
||||
|
||||
if ($uninterrupted -eq 'y' -or $uninterrupted -eq 'Y')
|
||||
{
|
||||
try
|
||||
{
|
||||
# create training and test tables
|
||||
Write-Host -ForeGroundColor 'green' ("Create SQL tables: member_info, loan_info, payments_info")
|
||||
$script = $filePath + "step1_create_tables" + $table_suffix + ".sql"
|
||||
ExecuteSQL $script
|
||||
|
||||
Write-Host -ForeGroundColor 'green' ("Populate SQL tables: member_info, loan_info, payments_info")
|
||||
$dataList = "member_info", "loan_info", "payments_info"
|
||||
|
||||
# upload csv files into SQL tables
|
||||
foreach ($dataFile in $dataList)
|
||||
{
|
||||
$destination = $dataPath + $dataFile + $table_suffix + ".csv"
|
||||
$error_file = $dataPath + $dataFile + $table_suffix + ".error"
|
||||
Write-Host -ForeGroundColor 'magenta'(" Populate SQL table: {0}..." -f $dataFile)
|
||||
$tableName = $DBName + ".dbo." + $dataFile + $table_suffix
|
||||
$tableSchema = $dataPath + $dataFile + $table_suffix + ".xml"
|
||||
bcp $tableName format nul -c -x -f $tableSchema -U $username -S $ServerName -P $password -t ','
|
||||
Write-Host -ForeGroundColor 'magenta'(" Loading {0} to SQL table..." -f $dataFile)
|
||||
bcp $tableName in $destination -t ',' -S $ServerName -f $tableSchema -F 2 -C "RAW" -b 100000 -U $username -P $password -e $error_file
|
||||
Write-Host -ForeGroundColor 'magenta'(" Done...Loading {0} to SQL table..." -f $dataFile)
|
||||
}
|
||||
|
||||
|
||||
|
||||
# create the views for features and label with training, test and scoring split
|
||||
Write-Host -ForeGroundColor 'magenta'(" Creating features label view and persisting...")
|
||||
$script = $filepath + "step2_features_label_view" + $table_suffix + ".sql"
|
||||
ExecuteSQL $script
|
||||
Write-Host -ForeGroundColor 'magenta'(" Done creating features label view and persisting...")
|
||||
|
||||
# create the stored procedure for training
|
||||
$script = $filepath + "step3_train_test_model.sql"
|
||||
ExecuteSQL $script
|
||||
Write-Host -ForeGroundColor 'magenta'(" Done creating training and eval stored proc...")
|
||||
|
||||
# execute the training
|
||||
Write-Host -ForeGroundColor 'magenta'(" Starting training and evaluation of models...")
|
||||
$modelNames = 'logistic_reg','fast_linear','fast_trees','fast_forest','neural_net'
|
||||
foreach ($modelName in $modelNames)
|
||||
{
|
||||
Write-Host -ForeGroundColor 'Cyan' (" Training $modelName...")
|
||||
$query = "EXEC train_model $trainingTable, $testTable, $evalScoreTable, $modelTable, $modelName, '$connectionString2'"
|
||||
ExecuteSQLQuery $query
|
||||
}
|
||||
|
||||
Write-Host -ForeGroundColor 'Cyan' (" Done with training and evaluation of models. Evaluation stats stored in $modelTable...")
|
||||
|
||||
# create the stored procedure for recommendations
|
||||
$script = $filepath + "step4_chargeoff_batch_prediction.sql"
|
||||
ExecuteSQL $script
|
||||
Write-Host -ForeGroundColor 'magenta'(" Done creating batch scoring stored proc...")
|
||||
|
||||
#score on the data
|
||||
Write-Host -ForeGroundColor 'Cyan' ("Scoring based on best performing model score table = $scoreTable, prediction table = $predictionTable...")
|
||||
$scoring_query = "EXEC predict_chargeoff $scoreTable, $predictionTable, $modelTable, '$connectionString2'"
|
||||
ExecuteSQLQuery $scoring_query
|
||||
|
||||
# create the stored procedure for recommendations
|
||||
$script = $filepath + "step4a_chargeoff_ondemand_prediction.sql"
|
||||
ExecuteSQL $script
|
||||
Write-Host -ForeGroundColor 'magenta'(" Done creating on demand scoring stored proc [predict_chargeoff_ondemand]...")
|
||||
|
||||
}
|
||||
catch
|
||||
{
|
||||
Write-Host -ForegroundColor DarkYellow "Exception in populating database tables:"
|
||||
Write-Host -ForegroundColor Red $Error[0].Exception
|
||||
throw
|
||||
}
|
||||
|
||||
Write-Host -foregroundcolor 'green'("Loan ChargeOff Workflow Finished Successfully!")
|
||||
}
|
||||
|
||||
if ($uninterrupted -eq 'n' -or $uninterrupted -eq 'N')
|
||||
{
|
||||
|
||||
##########################################################################
|
||||
# Create input tables and populate with data from csv files.
|
||||
##########################################################################
|
||||
Write-Host -foregroundcolor 'green' ("Step 0: Create and populate tables in Database" -f $dbname)
|
||||
$ans = Read-Host 'Continue [y|Y], Exit [e|E], Skip [s|S]?'
|
||||
if ($ans -eq 'E' -or $ans -eq 'e')
|
||||
{
|
||||
return
|
||||
}
|
||||
if ($ans -eq 'y' -or $ans -eq 'Y')
|
||||
{
|
||||
try
|
||||
{
|
||||
# create training and test tables
|
||||
Write-Host -ForeGroundColor 'green' ("Create SQL tables: member_info, loan_info, payments_info")
|
||||
$script = $filePath + "step1_create_tables" + $table_suffix + ".sql"
|
||||
ExecuteSQL $script
|
||||
|
||||
Write-Host -ForeGroundColor 'green' ("Populate SQL tables: member_info, loan_info, payments_info")
|
||||
$dataList = "member_info", "loan_info", "payments_info"
|
||||
|
||||
# upload csv files into SQL tables
|
||||
foreach ($dataFile in $dataList)
|
||||
{
|
||||
$destination = $dataPath + $dataFile + $table_suffix + ".csv"
|
||||
$error_file = $dataPath + $dataFile + $table_suffix + ".error"
|
||||
Write-Host -ForeGroundColor 'magenta'(" Populate SQL table: {0}..." -f $dataFile)
|
||||
$tableName = $DBName + ".dbo." + $dataFile + $table_suffix
|
||||
$tableSchema = $dataPath + $dataFile + $table_suffix + ".xml"
|
||||
bcp $tableName format nul -c -x -f $tableSchema -U $username -S $ServerName -P $password -t ','
|
||||
Write-Host -ForeGroundColor 'magenta'(" Loading {0} to SQL table..." -f $dataFile)
|
||||
bcp $tableName in $destination -t ',' -S $ServerName -f $tableSchema -F 2 -C "RAW" -b 100000 -U $username -P $password -e $error_file
|
||||
Write-Host -ForeGroundColor 'magenta'(" Done...Loading {0} to SQL table..." -f $dataFile)
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
Write-Host -ForegroundColor DarkYellow "Exception in populating database tables:"
|
||||
Write-Host -ForegroundColor Red $Error[0].Exception
|
||||
throw
|
||||
}
|
||||
}
|
||||
|
||||
##########################################################################
|
||||
# Create and execute the scripts for data processing
|
||||
##########################################################################
|
||||
Write-Host -foregroundcolor 'green' ("Step 1: Data Processing/Create feature and label views and tables")
|
||||
$ans = Read-Host 'Continue [y|Y], Exit [e|E], Skip [s|S]?'
|
||||
if ($ans -eq 'E' -or $ans -eq 'e')
|
||||
{
|
||||
return
|
||||
}
|
||||
if ($ans -eq 'y' -or $ans -eq 'Y')
|
||||
{
|
||||
# create features, labels view
|
||||
Write-Host -ForeGroundColor 'Cyan' (" Creating feature/label views...")
|
||||
$script = $filepath + "step2_features_label_view" + $table_suffix + ".sql"
|
||||
ExecuteSQL $script
|
||||
}
|
||||
|
||||
##########################################################################
|
||||
# Create and execute the stored procedure for feature selection (optional)
|
||||
##########################################################################
|
||||
Write-Host -foregroundcolor 'green' ("Step 2: Feature Engineering (for demo purpose only, training step does it's own feature selection)")
|
||||
$ans = Read-Host 'Continue [y|Y], Exit [e|E], Skip [s|S]?'
|
||||
if ($ans -eq 'E' -or $ans -eq 'e')
|
||||
{
|
||||
return
|
||||
}
|
||||
if ($ans -eq 'y' -or $ans -eq 'Y')
|
||||
{
|
||||
# create the stored procedure for feature engineering
|
||||
$script = $filepath + "step2a_optional_feature_selection.sql"
|
||||
ExecuteSQL $script
|
||||
|
||||
# execute the feature engineering
|
||||
Write-Host -ForeGroundColor 'Cyan' (" selecting features using MicrosoftML selectFeatures mlTransform with Logistic Regression...")
|
||||
$query = "EXEC select_features $trainingTable, $testTable, $selectedFeaturesTable, '$connectionString2'"
|
||||
ExecuteSQLQuery $query
|
||||
}
|
||||
|
||||
##########################################################################
|
||||
# Create and execute the stored procedure for Training and evaluation
|
||||
##########################################################################
|
||||
|
||||
Write-Host -foregroundcolor 'green' ("Step 3: Models Training and Evaluation")
|
||||
$ans = Read-Host 'Continue [y|Y], Exit [e|E], Skip [s|S]?'
|
||||
if ($ans -eq 'E' -or $ans -eq 'e')
|
||||
{
|
||||
return
|
||||
}
|
||||
if ($ans -eq 'y' -or $ans -eq 'Y')
|
||||
{
|
||||
# create the stored procedure for training
|
||||
$script = $filepath + "step3_train_test_model.sql"
|
||||
ExecuteSQL $script
|
||||
|
||||
Write-Host -ForeGroundColor 'magenta'(" Starting training and evaluation of models...")
|
||||
$modelNames = 'logistic_reg','fast_linear','fast_trees','fast_forest','neural_net'
|
||||
foreach ($modelName in $modelNames)
|
||||
{
|
||||
Write-Host -ForeGroundColor 'Cyan' (" Training $modelName...")
|
||||
$query = "EXEC train_model $trainingTable, $testTable, $evalScoreTable, $modelTable, $modelName, '$connectionString2'"
|
||||
ExecuteSQLQuery $query
|
||||
}
|
||||
}
|
||||
|
||||
##########################################################################
|
||||
# Create and execute the stored procedure for charge_off predictions
|
||||
##########################################################################
|
||||
|
||||
Write-Host -foregroundcolor 'green' ("Step 4: ChargeOff predictions")
|
||||
$ans = Read-Host 'Continue [y|Y], Exit [e|E], Skip [s|S]?'
|
||||
if ($ans -eq 'E' -or $ans -eq 'e')
|
||||
{
|
||||
return
|
||||
}
|
||||
if ($ans -eq 'y' -or $ans -eq 'Y')
|
||||
{
|
||||
# create the stored procedure for recommendations
|
||||
$script = $filepath + "step4_chargeoff_batch_prediction.sql"
|
||||
ExecuteSQL $script
|
||||
|
||||
# compute loan chargeoff predictions
|
||||
Write-Host -ForeGroundColor 'Cyan' ("Scoring based on best performing model score table = $scoreTable, prediction table = $predictionTable...")
|
||||
$query = "EXEC predict_chargeoff $scoreTable, $predictionTable, $modelTable, '$connectionString2'"
|
||||
ExecuteSQLQuery $query
|
||||
}
|
||||
|
||||
Write-Host -foregroundcolor 'green' ("Step 4a: Create on demand ChargeOff prediction stored proc")
|
||||
$ans = Read-Host 'Continue [y|Y], Exit [e|E], Skip [s|S]?'
|
||||
if ($ans -eq 'E' -or $ans -eq 'e')
|
||||
{
|
||||
return
|
||||
}
|
||||
if ($ans -eq 'y' -or $ans -eq 'Y')
|
||||
{
|
||||
# create the stored procedure for recommendations
|
||||
$script = $filepath + "step4a_chargeoff_ondemand_prediction.sql"
|
||||
ExecuteSQL $script
|
||||
|
||||
Write-Host -ForeGroundColor 'Cyan' ("Done creating on demand chargeoff prediction stored proc [predict_chargeoff_ondemand]...")
|
||||
}
|
||||
|
||||
|
||||
Write-Host -foregroundcolor 'green'("Loan Chargeoff Prediction Workflow Finished Successfully!")
|
||||
}
|
||||
|
||||
$endTime =Get-Date
|
||||
$totalTime = ($endTime-$startTime).ToString()
|
||||
Write-Host "Finished running at:" $endTime
|
||||
Write-Host "Total time used: " -foregroundcolor 'green' $totalTime.ToString()
|
|
@ -0,0 +1,65 @@
|
|||
################################################################################################
|
||||
# Powershell script for setting up the solution template. This script checks out the solution
|
||||
# from github and deploys it to SQL Server on the local Data Science VM (DSVM).
|
||||
#
|
||||
# Parameters:
|
||||
# serverName - Name of the server with SQL Server with R Services (this is the DSVM server)
|
||||
# baseurl =
|
||||
# username = login username for the server
|
||||
# password = login password for the server
|
||||
################################################################################################
|
||||
param([string]$serverName,[string]$baseurl,[string]$username,[string]$password)
|
||||
|
||||
# This is the directory for the data/code download
|
||||
$solutionTemplateSetupDir = "D:\SolutionTemplateSetup"
|
||||
$dataDir = $solutionTemplateSetupDir + "Data"
|
||||
$checkoutDir = "LoanChargeOff"
|
||||
New-Item -Name $solutionTemplateSetupDir -ItemType directory
|
||||
|
||||
$setupLog = $solutionTemplateSetupDir + "setup_log.txt"
|
||||
Start-Transcript -Path $setupLog -Append
|
||||
|
||||
cd $dataDir
|
||||
|
||||
$helpShortCutFile = "LoanChargeOffHelp.url"
|
||||
|
||||
# List of files to be downloaded
|
||||
$dataList = "loan_info_10k.csv", "member_info_10k.csv", "payments_info_10k.csv", "loan_info_1m.csv", "member_info_1m.csv", "payments_info_1m.csv","step1_create_tables_10k.sql","step1_create_tables_1m.sql","step2_features_label_view_10k.sql","step2_features_label_view_1m.sql","step2a_optional_feature_selection.sql","step3_train_test_model.sql","step4_chargeoff_batch_prediction.sql","step4a_chargeoff_ondemand_prediction.sql","createuser.sql","Loan_ChargeOff.ps1","runDB.ps1","runDB_LargeDataSet.ps1","setupHelp.ps1",$helpShortCutFile
|
||||
foreach ($dataFile in $dataList)
|
||||
{
|
||||
$down = $baseurl + '/' + $dataFile
|
||||
Write-Host $down
|
||||
Start-BitsTransfer -Source $down
|
||||
}
|
||||
|
||||
cd $solutionTemplateSetupDir
|
||||
git clone -n https://github.com/Microsoft/r-server-loan-chargeoff $checkoutDir
|
||||
cd $checkoutDir
|
||||
git config core.sparsecheckout true
|
||||
echo "/*`r`n!HDI" | out-file -encoding ascii .git/info/sparse-checkout
|
||||
git checkout master
|
||||
|
||||
# making sure that the data files conform to windows style of line ending.
|
||||
|
||||
$dataList = "loan_info_10k.csv", "member_info_10k.csv", "payments_info_10k.csv", "loan_info_1m.csv", "member_info_1m.csv", "payments_info_1m.csv"
|
||||
foreach ($dataFile in $dataList)
|
||||
{
|
||||
unix2dos $dataDir + "/" + $dataFile
|
||||
}
|
||||
|
||||
|
||||
# Start the script for DB creation. Due to privilege issues with SYSTEM user (the user that runs the
|
||||
# extension script), we use ps-remoting to login as admin use and run the DB creation scripts
|
||||
|
||||
$passwords = $password | ConvertTo-SecureString -AsPlainText -Force
|
||||
$credential = New-Object System.Management.Automation.PSCredential("$serverName\$username", $passwords)
|
||||
$command1 = "C:\Windows\Temp\runDB.ps1"
|
||||
$command2 ="C:\Windows\Temp\setupHelp.ps1"
|
||||
|
||||
Enable-PSRemoting -Force
|
||||
Invoke-Command -Credential $credential -ComputerName $serverName -FilePath $command1 -ArgumentList $dataDir
|
||||
Invoke-Command -Credential $credential -ComputerName $serverName -FilePath $command2 -ArgumentList $helpShortCutFile
|
||||
Disable-PSRemoting -Force
|
||||
|
||||
Stop-Transcript
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
--
|
||||
-- remove old $(username) user and login from master
|
||||
--
|
||||
USE [master]
|
||||
GO
|
||||
IF EXISTS (SELECT name FROM sys.database_principals WHERE name = '$(username)')
|
||||
BEGIN
|
||||
PRINT 'Deleting old $(username) user from master'
|
||||
DROP USER [$(username)]
|
||||
END
|
||||
GO
|
||||
IF EXISTS (SELECT name FROM master.sys.server_principals WHERE name = '$(username)')
|
||||
BEGIN
|
||||
PRINT 'Deleting old $(username) login from master'
|
||||
DROP LOGIN [$(username)]
|
||||
END
|
||||
GO
|
||||
--
|
||||
-- create new $(username) login in master
|
||||
--
|
||||
USE [master]
|
||||
GO
|
||||
PRINT 'Creating $(username) login in master'
|
||||
CREATE LOGIN [$(username)] WITH PASSWORD=N'$(password)', CHECK_EXPIRATION=OFF, CHECK_POLICY=OFF;
|
||||
CREATE USER [$(username)] FOR LOGIN [$(username)]
|
||||
--ALTER ROLE [db_rrerole] ADD MEMBER [$(username)]
|
||||
ALTER ROLE [db_owner] ADD MEMBER [$(username)]
|
||||
GO
|
||||
|
||||
exec sp_addrolemember 'db_owner', '$(username)'
|
||||
exec sp_addrolemember 'db_ddladmin', '$(username)'
|
||||
exec sp_addrolemember 'db_accessadmin', '$(username)'
|
||||
exec sp_addrolemember 'db_datareader', '$(username)'
|
||||
exec sp_addrolemember 'db_datawriter', '$(username)'
|
||||
exec sp_addsrvrolemember @loginame= '$(username)', @rolename = 'sysadmin'
|
||||
GO
|
|
@ -0,0 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ca302200f994e693bcfe6f772a5d1b2e912a8408add58ea33cbed97ace9e082f
|
||||
size 6230802
|
|
@ -0,0 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9f510f322422b15200b87358c9b07b02aadca714ca777b892ec2db91b9bbe53a
|
||||
size 6233895
|
|
@ -0,0 +1,77 @@
|
|||
##############################################################################################
|
||||
# Script to invoke the LoanChargeOff data science workflow with a smaller dataset of 10,000
|
||||
# loans.
|
||||
# It also creates a SQL Server user and stores the password in 'ExporedSqlPassword.txt'.
|
||||
# Users can retrieve the password from the file and decrypt using ConvertTo-SecureString
|
||||
# commandlet in PowerShell.
|
||||
#
|
||||
# Parameters:
|
||||
# dbuser - (Optional) username for database LoanChargeOff
|
||||
# dbpass - (Optional) database password
|
||||
# createuser - (Optional) whethere to create a database user
|
||||
##############################################################################################
|
||||
Param([string]$dbuser, [string]$dbpass, [bool]$createuser = $true, [string]$datadir)
|
||||
# Function to generate a temporary password for SQL Server
|
||||
Function Get-TempPassword()
|
||||
{
|
||||
Param
|
||||
(
|
||||
[int]$length=10,
|
||||
[string[]]$sourcedata
|
||||
)
|
||||
|
||||
For ($loop=1; $loop -le $length; $loop++)
|
||||
{
|
||||
$TempPassword += ($sourcedata | Get-Random)
|
||||
}
|
||||
return $TempPassword
|
||||
}
|
||||
|
||||
$passwordSource=$NULL
|
||||
$dbpassword = ""
|
||||
$dbusername = "rdemo"
|
||||
$passwordFile = "ExportedSqlPassword.txt"
|
||||
For ($a=33;$a -le 126; $a++)
|
||||
{
|
||||
$passwordSource += ,[char][byte]$a
|
||||
}
|
||||
|
||||
if ($dbuser)
|
||||
{
|
||||
$dbusername = $dbuser
|
||||
}
|
||||
if (!$createuser)
|
||||
{
|
||||
if (!$dbpass)
|
||||
{
|
||||
if (Test-Path $passwordFile)
|
||||
{
|
||||
$secureTxtFromFile = Get-Content $passwordFile
|
||||
$securePasswordObj = $secureTxtFromFile | ConvertTo-SecureString
|
||||
#get back the original unencrypted password
|
||||
$PasswordBSTR = [System.Runtime.InteropServices.Marshal]::SecureStringToBSTR($securePasswordObj)
|
||||
$dbpassword = [System.Runtime.InteropServices.Marshal]::PtrToStringAuto($PasswordBSTR)
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host -ForegroundColor DarkYellow "Either ExportedSqlPassword.txt must exist with encrypted database password or must provide password using dbpass parameter."
|
||||
throw
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
$dbpassword = $dbpass
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host -ForegroundColor Cyan "Creating database user"
|
||||
$dbpassword = Get-TempPassword -length 15 -sourcedata $passwordSource
|
||||
$securePassword = $dbpassword | ConvertTo-SecureString -AsPlainText -Force
|
||||
$secureTxt = $securePassword | ConvertFrom-SecureString
|
||||
Set-Content $passwordFile $secureTxt
|
||||
|
||||
sqlcmd -S $env:COMPUTERNAME -v username="$dbusername" -v password="$dbpassword" -i .\createuser.sql
|
||||
}
|
||||
|
||||
.\Loan_ChargeOff.ps1 -ServerName $env:COMPUTERNAME -DBName LoanChargeOff -username $dbusername -password $dbpassword -uninterrupted y -dataPath $datadir
|
|
@ -0,0 +1,77 @@
|
|||
##############################################################################################
|
||||
# Script to invoke the LoanChargeOff data science workflow with a larger dataset of 1,000,000
|
||||
# loans.
|
||||
# It can also optionally creates a SQL Server user and stores the password in
|
||||
# 'ExporedSqlPassword.txt'. Users can retrieve the password from the file and decrypt using
|
||||
# ConvertTo-SecureString commandlet in PowerShell.
|
||||
#
|
||||
# Parameters:
|
||||
# dbuser - (Optional) username for database LoanChargeOff
|
||||
# dbpass - (Optional) database password
|
||||
# createuser - (Optional) whethere to create a database user
|
||||
##############################################################################################
|
||||
Param([string]$dbuser, [string]$dbpass, [bool]$createuser = $true, [string]$datadir)
|
||||
# Function to generate a temporary password for SQL Server
|
||||
Function Get-TempPassword()
|
||||
{
|
||||
Param
|
||||
(
|
||||
[int]$length=10,
|
||||
[string[]]$sourcedata
|
||||
)
|
||||
|
||||
For ($loop=1; $loop -le $length; $loop++)
|
||||
{
|
||||
$TempPassword += ($sourcedata | Get-Random)
|
||||
}
|
||||
return $TempPassword
|
||||
}
|
||||
|
||||
$passwordSource=$NULL
|
||||
$dbpassword = ""
|
||||
$dbusername = "rdemo"
|
||||
$passwordFile = "ExportedSqlPassword.txt"
|
||||
For ($a=33;$a -le 126; $a++)
|
||||
{
|
||||
$passwordSource += ,[char][byte]$a
|
||||
}
|
||||
|
||||
if ($dbuser)
|
||||
{
|
||||
$dbusername = $dbuser
|
||||
}
|
||||
if (!$createuser)
|
||||
{
|
||||
if (!$dbpass)
|
||||
{
|
||||
if (Test-Path $passwordFile)
|
||||
{
|
||||
$secureTxtFromFile = Get-Content $passwordFile
|
||||
$securePasswordObj = $secureTxtFromFile | ConvertTo-SecureString
|
||||
#get back the original unencrypted password
|
||||
$PasswordBSTR = [System.Runtime.InteropServices.Marshal]::SecureStringToBSTR($securePasswordObj)
|
||||
$dbpassword = [System.Runtime.InteropServices.Marshal]::PtrToStringAuto($PasswordBSTR)
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host -ForegroundColor DarkYellow "Either ExportedSqlPassword.txt must exist with encrypted database password or must provide password using dbpass parameter."
|
||||
throw
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
$dbpassword = $dbpass
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host -ForegroundColor Cyan "Creating database user"
|
||||
$dbpassword = Get-TempPassword -length 15 -sourcedata $passwordSource
|
||||
$securePassword = $dbpassword | ConvertTo-SecureString -AsPlainText -Force
|
||||
$secureTxt = $securePassword | ConvertFrom-SecureString
|
||||
Set-Content $passwordFile $secureTxt
|
||||
|
||||
sqlcmd -S $env:COMPUTERNAME -v username="$dbusername" -v password="$dbpassword" -i .\createuser.sql
|
||||
}
|
||||
|
||||
.\Loan_ChargeOff.ps1 -ServerName $env:COMPUTERNAME -DBName LoanChargeOff -username $dbusername -password $dbpassword -uninterrupted y -dataPath $datadir -dataSize L
|
|
@ -0,0 +1,23 @@
|
|||
param( [string]$helpfile)
|
||||
|
||||
#git clone
|
||||
$desktop = [Environment]::GetFolderPath("Desktop")
|
||||
|
||||
$desktop = $desktop + '\'
|
||||
|
||||
#create the help link in startup program
|
||||
|
||||
$startmenu = [Environment]::GetFolderPath("StartMenu")
|
||||
$startupfolder = $startmenu + '\Programs\Startup\'
|
||||
# We create this since the user startup folder is only created after first login
|
||||
# Alternative is to add is to all user startup
|
||||
mkdir $startupfolder
|
||||
#copy
|
||||
$down = $helpfile
|
||||
Write-Host $down
|
||||
Write-Host $startmenu
|
||||
ls $startmenu
|
||||
Write-Host $startupfolder
|
||||
ls $startupfolder
|
||||
cp -Verbose $down $startupfolder
|
||||
cp -Verbose $down $desktop
|
|
@ -0,0 +1,104 @@
|
|||
SET ansi_nulls on
|
||||
GO
|
||||
SET quoted_identifier on
|
||||
GO
|
||||
|
||||
/* Create the member_info Table. */
|
||||
|
||||
DROP TABLE IF EXISTS member_info_10k
|
||||
|
||||
CREATE TABLE [member_info_10k](
|
||||
[memberId] [int],
|
||||
[residentialState] [nvarchar](4),
|
||||
[annualIncome] [real],
|
||||
[yearsEmployment] [nvarchar](11),
|
||||
[homeOwnership] [nvarchar](10),
|
||||
[incomeVerified] [bit],
|
||||
[creditScore] [int],
|
||||
[dtiRatio] [real],
|
||||
[revolvingBalance] [real],
|
||||
[revolvingUtilizationRate] [real],
|
||||
[numDelinquency2Years] [int],
|
||||
[numDerogatoryRec] [int],
|
||||
[numInquiries6Mon] [int],
|
||||
[lengthCreditHistory] [int],
|
||||
[numOpenCreditLines] [int],
|
||||
[numTotalCreditLines] [int],
|
||||
[numChargeoff1year] [int]
|
||||
);
|
||||
|
||||
CREATE CLUSTERED COLUMNSTORE INDEX member_info_10k_cci ON member_info_10k WITH (DROP_EXISTING = OFF);
|
||||
GO
|
||||
/* Create the loan_info Table. */
|
||||
|
||||
DROP TABLE IF EXISTS loan_info_10k
|
||||
|
||||
CREATE TABLE [loan_info_10k](
|
||||
[loanId] [int],
|
||||
[loan_open_date] [datetime],
|
||||
[memberId] [int],
|
||||
[loanAmount] [real],
|
||||
[interestRate] [real],
|
||||
[grade] [int],
|
||||
[term] [int],
|
||||
[installment] [real],
|
||||
[isJointApplication] [bit],
|
||||
[purpose] [nvarchar](255)
|
||||
);
|
||||
|
||||
CREATE CLUSTERED COLUMNSTORE INDEX loan_info_10k_cci ON loan_info_10k WITH (DROP_EXISTING = OFF);
|
||||
GO
|
||||
/* Create the payments_info Table*/
|
||||
|
||||
DROP TABLE IF EXISTS payments_info_10k
|
||||
|
||||
CREATE TABLE [payments_info_10k](
|
||||
[loanId] [int],
|
||||
[payment_date] [datetime],
|
||||
[payment] [real],
|
||||
[past_due] [real],
|
||||
[remain_balance] [real],
|
||||
[closed] [bit],
|
||||
[charged_off] [bit]
|
||||
);
|
||||
|
||||
CREATE CLUSTERED COLUMNSTORE INDEX payments_info_10k_cci ON payments_info_10k WITH (DROP_EXISTING = OFF);
|
||||
GO
|
||||
|
||||
DROP TABLE IF EXISTS [loan_chargeoff_models_10k];
|
||||
|
||||
CREATE TABLE [loan_chargeoff_models_10k]
|
||||
(
|
||||
[model_name] varchar(30) not null default('default model') primary key,
|
||||
[model] varbinary(max) not null,
|
||||
[auc] real,
|
||||
[accuracy] real,
|
||||
[precision] real,
|
||||
[recall] real,
|
||||
[f1score] real,
|
||||
[training_ts] datetime default(GETDATE())
|
||||
);
|
||||
GO
|
||||
|
||||
DROP TABLE IF EXISTS [selected_features_10k];
|
||||
|
||||
CREATE TABLE [selected_features_10k](
|
||||
[feature_id] [int] IDENTITY(1,1) NOT NULL,
|
||||
[feature_name] [nvarchar](500) NOT NULL
|
||||
);
|
||||
GO
|
||||
|
||||
DROP TABLE IF EXISTS [loan_chargeoff_prediction_10k]
|
||||
|
||||
CREATE TABLE [loan_chargeoff_prediction_10k](
|
||||
[memberId] [int],
|
||||
[loanId] [int],
|
||||
[payment_date] [date],
|
||||
[prediction_date] [date] default(GETDATE()),
|
||||
[PredictedLabel] [nvarchar](255),
|
||||
[Score.1] [float],
|
||||
[Probability.1] [float]
|
||||
);
|
||||
|
||||
GO
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
SET ansi_nulls on
|
||||
GO
|
||||
SET quoted_identifier on
|
||||
GO
|
||||
|
||||
/* Create the member_info Table. */
|
||||
/* Large DataSets */
|
||||
DROP TABLE IF EXISTS member_info_1m
|
||||
|
||||
CREATE TABLE [member_info_1m](
|
||||
[memberId] [int],
|
||||
[residentialState] [nvarchar](4),
|
||||
[annualIncome] [real],
|
||||
[yearsEmployment] [nvarchar](11),
|
||||
[homeOwnership] [nvarchar](10),
|
||||
[incomeVerified] [bit],
|
||||
[creditScore] [int],
|
||||
[dtiRatio] [real],
|
||||
[revolvingBalance] [real],
|
||||
[revolvingUtilizationRate] [real],
|
||||
[numDelinquency2Years] [int],
|
||||
[numDerogatoryRec] [int],
|
||||
[numInquiries6Mon] [int],
|
||||
[lengthCreditHistory] [int],
|
||||
[numOpenCreditLines] [int],
|
||||
[numTotalCreditLines] [int],
|
||||
[numChargeoff1year] [int]
|
||||
);
|
||||
|
||||
CREATE CLUSTERED COLUMNSTORE INDEX member_info_1m_cci ON member_info_1m WITH (DROP_EXISTING = OFF);
|
||||
GO
|
||||
/* Create the loan_info Table. */
|
||||
|
||||
DROP TABLE IF EXISTS loan_info_1m
|
||||
|
||||
CREATE TABLE [loan_info_1m](
|
||||
[loanId] [int],
|
||||
[loan_open_date] [datetime],
|
||||
[memberId] [int],
|
||||
[loanAmount] [real],
|
||||
[interestRate] [real],
|
||||
[grade] [int],
|
||||
[term] [int],
|
||||
[installment] [real],
|
||||
[isJointApplication] [bit],
|
||||
[purpose] [nvarchar](255)
|
||||
);
|
||||
|
||||
CREATE CLUSTERED COLUMNSTORE INDEX loan_info_1m_cci ON loan_info_1m WITH (DROP_EXISTING = OFF);
|
||||
GO
|
||||
/* Create the payments_info Table*/
|
||||
|
||||
DROP TABLE IF EXISTS payments_info_1m
|
||||
|
||||
CREATE TABLE [payments_info_1m](
|
||||
[loanId] [int],
|
||||
[payment_date] [date],
|
||||
[payment] [real],
|
||||
[past_due] [real],
|
||||
[remain_balance] [real],
|
||||
[closed] [bit],
|
||||
[charged_off] [bit]
|
||||
);
|
||||
|
||||
CREATE CLUSTERED COLUMNSTORE INDEX payments_info_1m_cci ON payments_info_1m WITH (DROP_EXISTING = OFF);
|
||||
GO
|
||||
|
||||
DROP TABLE IF EXISTS [loan_chargeoff_models_1m];
|
||||
|
||||
CREATE TABLE [loan_chargeoff_models_1m]
|
||||
(
|
||||
[model_name] varchar(30) not null default('default model') primary key,
|
||||
[model] varbinary(max) not null,
|
||||
[auc] real,
|
||||
[accuracy] real,
|
||||
[precision] real,
|
||||
[recall] real,
|
||||
[f1score] real,
|
||||
[training_ts] datetime default(GETDATE())
|
||||
);
|
||||
GO
|
||||
|
||||
DROP TABLE IF EXISTS selected_features_1m;
|
||||
|
||||
CREATE TABLE [selected_features_1m](
|
||||
[feature_id] [int] IDENTITY(1,1) NOT NULL,
|
||||
[feature_name] [nvarchar](500) NOT NULL
|
||||
);
|
||||
GO
|
||||
|
||||
DROP TABLE IF EXISTS [loan_chargeoff_prediction_1m]
|
||||
|
||||
CREATE TABLE [loan_chargeoff_prediction_1m](
|
||||
[memberId] [int],
|
||||
[loanId] [int],
|
||||
[payment_date] [date],
|
||||
[prediction_date] [date] default(GETDATE()),
|
||||
[PredictedLabel] [nvarchar](255),
|
||||
[Score.1] [float],
|
||||
[Probability.1] [float]
|
||||
);
|
||||
|
||||
GO
|
|
@ -0,0 +1,137 @@
|
|||
-- View over the underlying table for features and labels required
|
||||
drop view if exists vw_loan_chargeoff_train_10k
|
||||
go
|
||||
create view vw_loan_chargeoff_train_10k
|
||||
as
|
||||
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
|
||||
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
|
||||
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
|
||||
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
|
||||
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
|
||||
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
|
||||
from
|
||||
(
|
||||
select *,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
|
||||
(select MAX(charged_off+0) from payments_info_10k p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
|
||||
from payments_info_10k p1 ) AS t inner join loan_info_10k l ON t.loanId = l.loanId inner join member_info_10k m ON l.memberId = m.memberId
|
||||
where t.charge_off IS NOT NULL
|
||||
and ((payment_date between '2016-09-12' and '2016-12-12' and charge_off = 1) or (payment_date = '2017-01-12'));
|
||||
go
|
||||
|
||||
drop view if exists vw_loan_chargeoff_test_10k
|
||||
go
|
||||
create view vw_loan_chargeoff_test_10k
|
||||
as
|
||||
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
|
||||
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
|
||||
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
|
||||
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
|
||||
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
|
||||
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
|
||||
from
|
||||
(
|
||||
select *,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
|
||||
(select MAX(charged_off+0) from payments_info_10k p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
|
||||
from payments_info_10k p1 ) AS t inner join loan_info_10k l ON t.loanId = l.loanId inner join member_info_10k m ON l.memberId = m.memberId
|
||||
where t.charge_off IS NOT NULL
|
||||
and payment_date = '2017-02-12';
|
||||
go
|
||||
|
||||
drop view if exists vw_loan_chargeoff_score_10k
|
||||
go
|
||||
create view vw_loan_chargeoff_score_10k
|
||||
as
|
||||
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
|
||||
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
|
||||
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
|
||||
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
|
||||
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
|
||||
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
|
||||
from
|
||||
(
|
||||
select *,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
|
||||
(select MAX(charged_off+0) from payments_info_10k p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
|
||||
from payments_info_10k p1 ) AS t inner join loan_info_10k l ON t.loanId = l.loanId inner join member_info_10k m ON l.memberId = m.memberId
|
||||
where t.charge_off IS NOT NULL
|
||||
and payment_date > '2017-02-12';
|
||||
go
|
||||
|
||||
|
||||
-- persist the view in case of large dataset in order to get faster results
|
||||
drop table if exists [loan_chargeoff_train_10k]
|
||||
go
|
||||
|
||||
select *
|
||||
into [loan_chargeoff_train_10k]
|
||||
from [vw_loan_chargeoff_train_10k]
|
||||
go
|
||||
|
||||
create clustered columnstore index [cci_loan_chargeoff_train_10k] on [loan_chargeoff_train_10k]
|
||||
go
|
||||
|
||||
drop table if exists [loan_chargeoff_test_10k]
|
||||
go
|
||||
|
||||
select *
|
||||
into [loan_chargeoff_test_10k]
|
||||
from [vw_loan_chargeoff_test_10k]
|
||||
go
|
||||
|
||||
create clustered columnstore index [cci_loan_chargeoff_test_10k] on [loan_chargeoff_test_10k]
|
||||
go
|
||||
|
||||
drop table if exists [loan_chargeoff_score_10k]
|
||||
go
|
||||
|
||||
select *
|
||||
into [loan_chargeoff_score_10k]
|
||||
from [vw_loan_chargeoff_score_10k]
|
||||
go
|
||||
|
||||
create clustered columnstore index [cci_loan_chargeoff_score_10k] on [loan_chargeoff_score_10k]
|
||||
go
|
|
@ -0,0 +1,140 @@
|
|||
-- View over the underlying table for features and labels required
|
||||
/* Large DataSets */
|
||||
drop view if exists [dbo].[vw_loan_chargeoff_1m]
|
||||
go
|
||||
create view [dbo].[vw_loan_chargeoff_1m]
|
||||
as
|
||||
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
|
||||
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
|
||||
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
|
||||
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
|
||||
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
|
||||
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
|
||||
from
|
||||
(
|
||||
select *,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
|
||||
(select MAX(charged_off+0) from payments_info_1m p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
|
||||
from payments_info_1m p1 ) AS t inner join loan_info_1m l ON t.loanId = l.loanId inner join member_info_1m m ON l.memberId = m.memberId
|
||||
where t.charge_off IS NOT NULL
|
||||
and ((payment_date between '2016-09-12' and '2016-12-12' and charge_off = 1) or (payment_date = '2017-01-12'))
|
||||
|
||||
GO
|
||||
|
||||
drop view if exists [dbo].[vw_loan_chargeoff_test_1m]
|
||||
go
|
||||
create view [dbo].[vw_loan_chargeoff_test_1m]
|
||||
as
|
||||
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
|
||||
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
|
||||
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
|
||||
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
|
||||
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
|
||||
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
|
||||
from
|
||||
(
|
||||
select *,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
|
||||
(select MAX(charged_off+0) from payments_info_1m p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
|
||||
from payments_info_1m p1 ) AS t inner join loan_info_1m l ON t.loanId = l.loanId inner join member_info_1m m ON l.memberId = m.memberId
|
||||
where t.charge_off IS NOT NULL
|
||||
and payment_date = '2017-02-12'
|
||||
GO
|
||||
|
||||
drop view if exists [dbo].[vw_loan_chargeoff_score_1m]
|
||||
go
|
||||
create view [dbo].[vw_loan_chargeoff_score_1m]
|
||||
as
|
||||
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
|
||||
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
|
||||
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
|
||||
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
|
||||
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
|
||||
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
|
||||
from
|
||||
(
|
||||
select *,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
|
||||
(select MAX(charged_off+0) from payments_info_1m p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
|
||||
from payments_info_1m p1 ) AS t inner join loan_info_1m l ON t.loanId = l.loanId inner join member_info_1m m ON l.memberId = m.memberId
|
||||
where t.charge_off IS NOT NULL
|
||||
and payment_date > '2017-02-12'
|
||||
GO
|
||||
|
||||
-- persist the view in case of large dataset in order to get faster results
|
||||
|
||||
/* Large dataset */
|
||||
drop table if exists [loan_chargeoff_train_1m]
|
||||
go
|
||||
|
||||
select *
|
||||
into [loan_chargeoff_train_1m]
|
||||
from [vw_loan_chargeoff_1m]
|
||||
go
|
||||
|
||||
create clustered columnstore index [cci_loan_chargeoff_train_1m] on [loan_chargeoff_train_1m]
|
||||
go
|
||||
|
||||
drop table if exists [loan_chargeoff_test_1m]
|
||||
go
|
||||
|
||||
select *
|
||||
into [loan_chargeoff_test_1m]
|
||||
from [vw_loan_chargeoff_test_1m]
|
||||
go
|
||||
|
||||
create clustered columnstore index [cci_loan_chargeoff_test_1m] on [loan_chargeoff_test_1m]
|
||||
go
|
||||
|
||||
drop table if exists [loan_chargeoff_score_1m]
|
||||
go
|
||||
|
||||
select *
|
||||
into [loan_chargeoff_score_1m]
|
||||
from [vw_loan_chargeoff_score_1m]
|
||||
go
|
||||
|
||||
create clustered columnstore index [cci_loan_chargeoff_score_1m] on [loan_chargeoff_score_1m]
|
||||
go
|
|
@ -0,0 +1,53 @@
|
|||
SET ANSI_NULLS ON
|
||||
GO
|
||||
SET QUOTED_IDENTIFIER ON
|
||||
GO
|
||||
|
||||
DROP PROCEDURE IF EXISTS [dbo].[select_features];
|
||||
GO
|
||||
|
||||
CREATE PROCEDURE [select_features] @training_set_table varchar(100), @test_set_table varchar(100), @selected_features_table varchar(100), @connectionString varchar(300)
|
||||
AS
|
||||
BEGIN
|
||||
DECLARE @testing_set_query nvarchar(400), @del_cmd nvarchar(100), @ins_cmd nvarchar(max)
|
||||
/* select features using MicrosotML */
|
||||
SET @del_cmd = 'DELETE FROM ' + @selected_features_table
|
||||
EXEC sp_executesql @del_cmd
|
||||
SET @ins_cmd = 'INSERT INTO ' + @selected_features_table + ' (feature_name)
|
||||
EXECUTE sp_execute_external_script @language = N''R'',
|
||||
@script = N''
|
||||
library(RevoScaleR)
|
||||
library(MicrosoftML)
|
||||
##########################################################################################################################################
|
||||
## Set the compute context to SQL for faster training
|
||||
##########################################################################################################################################
|
||||
testing_set <- RxSqlServerData(table=test_set, connectionString = connection_string)
|
||||
training_set <- RxSqlServerData(table=train_set, connectionString = connection_string)
|
||||
|
||||
features <- rxGetVarNames(testing_set)
|
||||
variables_to_remove <- c("memberId", "loanId", "payment_date", "loan_open_date", "charge_off")
|
||||
feature_names <- features[!(features %in% variables_to_remove)]
|
||||
model_formula <- as.formula(paste(paste("charge_off~"), paste(feature_names, collapse = "+")))
|
||||
selected_count <- 0
|
||||
features_to_remove <- c("(Bias)")
|
||||
ml_trans <- list(categorical(vars = c("purpose", "residentialState", "homeOwnership", "yearsEmployment")),
|
||||
selectFeatures(model_formula, mode = mutualInformation(numFeaturesToKeep = 41)))
|
||||
candidate_model <- rxLogisticRegression(model_formula, data = training_set, mlTransforms = ml_trans)
|
||||
predicted_score <- rxPredict(candidate_model, testing_set, extraVarsToWrite = c("charge_off"))
|
||||
predicted_roc <- rxRoc("charge_off", grep("Probability", names(predicted_score), value = T), predicted_score)
|
||||
auc <- rxAuc(predicted_roc)
|
||||
|
||||
selected_features <- rxGetVarInfo(summary(candidate_model)$summary)
|
||||
selected_feature_names <- names(selected_features)
|
||||
selected_feature_filtered <- selected_feature_names[!(selected_feature_names %in% features_to_remove)]
|
||||
|
||||
selected_features_final <- data.frame(selected_feature_filtered)''
|
||||
, @output_data_1_name = N''selected_features_final''
|
||||
, @params = N''@connection_string varchar(300), @test_set varchar(100), @train_set varchar(100)''
|
||||
, @connection_string = ''' + @connectionString + '''' +
|
||||
', @train_set = ''' + @training_set_table + '''' +
|
||||
', @test_set = ''' + @test_set_table + ''';'
|
||||
|
||||
EXEC sp_executesql @ins_cmd
|
||||
END
|
||||
GO
|
|
@ -0,0 +1,123 @@
|
|||
/****** Stored Procedure to train models ******/
|
||||
SET ANSI_NULLS ON
|
||||
GO
|
||||
SET QUOTED_IDENTIFIER ON
|
||||
GO
|
||||
|
||||
DROP PROCEDURE IF EXISTS [dbo].[train_model];
|
||||
GO
|
||||
|
||||
CREATE PROCEDURE [train_model] @training_set_table varchar(100), @test_set_table varchar(100), @scored_table varchar(100), @model_table varchar(100), @model_name_param varchar(50), @connectionString varchar(300)
|
||||
AS
|
||||
BEGIN
|
||||
|
||||
DECLARE @payload varbinary(max), @auc real, @accuracy real, @precision real, @recall real, @f1score real;
|
||||
DECLARE @del_cmd nvarchar(300), @ins_cmd nvarchar(300), @param_def nvarchar(300);
|
||||
EXECUTE sp_execute_external_script @language = N'R',
|
||||
@script = N'
|
||||
library(RevoScaleR)
|
||||
library(MicrosoftML)
|
||||
# model evaluation functions
|
||||
model_eval_stats <- function(scored_data, label="charge_off", predicted_prob="Probability", predicted_label="PredictedLabel")
|
||||
{
|
||||
roc <- rxRoc(label, grep(predicted_prob, names(scored_data), value=T), scored_data)
|
||||
auc <- rxAuc(roc)
|
||||
crosstab_formula <- as.formula(paste("~as.factor(", label, "):as.factor(", predicted_label, ")"))
|
||||
cross_tab <- rxCrossTabs(crosstab_formula, scored_data)
|
||||
conf_matrix <- cross_tab$counts[[1]]
|
||||
tn <- conf_matrix[1,1]
|
||||
fp <- conf_matrix[1,2]
|
||||
fn <- conf_matrix[2,1]
|
||||
tp <- conf_matrix[2,2]
|
||||
accuracy <- (tp + tn) / (tp + fn + fp + tn)
|
||||
precision <- tp/(tp+fp)
|
||||
recall <- tp / (tp+fn)
|
||||
f1score <- 2 * (precision * recall) / (precision + recall)
|
||||
return(list(auc=auc, accuracy=accuracy, precision = precision, recall=recall, f1score=f1score))
|
||||
}
|
||||
cc <- RxInSqlServer(connectionString = connection_string)
|
||||
rxSetComputeContext(cc)
|
||||
training_set <- RxSqlServerData(table=train_set, connectionString = connection_string)
|
||||
testing_set <- RxSqlServerData(table=test_set, connectionString = connection_string)
|
||||
scoring_set <- RxSqlServerData(table=score_set, connectionString = connection_string, overwrite=TRUE)
|
||||
##########################################################################################################################################
|
||||
## Training and evaluating model based on model selection
|
||||
##########################################################################################################################################
|
||||
features <- rxGetVarNames(training_set)
|
||||
vars_to_remove <- c("memberId", "loanId", "payment_date", "loan_open_date", "charge_off")
|
||||
feature_names <- features[!(features %in% vars_to_remove)]
|
||||
model_formula <- as.formula(paste(paste("charge_off~"), paste(feature_names, collapse = "+")))
|
||||
ml_trans <- list(categorical(vars = c("purpose", "residentialState", "homeOwnership", "yearsEmployment")),
|
||||
selectFeatures(model_formula, mode = mutualInformation(numFeaturesToKeep = 41)))
|
||||
|
||||
if (model_name == "logistic_reg") {
|
||||
model <- rxLogisticRegression(formula = model_formula,
|
||||
data = training_set,
|
||||
mlTransforms = ml_trans)
|
||||
} else if (model_name == "fast_trees") {
|
||||
model <- rxFastTrees(formula = model_formula,
|
||||
data = training_set,
|
||||
mlTransforms = ml_trans)
|
||||
} else if (model_name == "fast_forest") {
|
||||
model <- rxFastForest(formula = model_formula,
|
||||
data = training_set,
|
||||
mlTransforms = ml_trans)
|
||||
} else if (model_name == "fast_linear") {
|
||||
model <- rxFastLinear(formula = model_formula,
|
||||
data = training_set,
|
||||
mlTransforms = ml_trans)
|
||||
} else if (model_name == "neural_net") {
|
||||
model <- rxNeuralNet(formula = model_formula,
|
||||
data = training_set,
|
||||
numIterations = 42,
|
||||
optimizer = adaDeltaSgd(),
|
||||
mlTransforms = ml_trans)
|
||||
}
|
||||
print("Done training.")
|
||||
# evaluate model
|
||||
rxPredict(model, testing_set, outData = scoring_set, extraVarsToWrite = c("memberId", "loanId", "charge_off"), overwrite=TRUE)
|
||||
print("Done writing predictions for evaluation of model.")
|
||||
model_stats <- model_eval_stats(scoring_set)
|
||||
print(model_stats)
|
||||
modelbin <- serialize(model, connection=NULL)
|
||||
stat_auc <- model_stats[[1]]
|
||||
|
||||
stat_accuracy <- model_stats[[2]]
|
||||
stat_precision <- model_stats[[3]]
|
||||
stat_recall <- model_stats[[4]]
|
||||
stat_f1score <- model_stats[[5]]
|
||||
'
|
||||
, @params = N'@model_name varchar(20), @connection_string varchar(300), @train_set varchar(100), @test_set varchar(100), @score_set varchar(100),
|
||||
@modelbin varbinary(max) OUTPUT, @stat_auc real OUTPUT, @stat_accuracy real OUTPUT, @stat_precision real OUTPUT, @stat_recall real OUTPUT, @stat_f1score real OUTPUT'
|
||||
, @model_name = @model_name_param
|
||||
, @connection_string = @connectionString
|
||||
, @train_set = @training_set_table
|
||||
, @test_set = @test_set_table
|
||||
, @score_set = @scored_table
|
||||
, @modelbin = @payload OUTPUT
|
||||
, @stat_auc = @auc OUTPUT
|
||||
, @stat_accuracy = @accuracy OUTPUT
|
||||
, @stat_precision = @precision OUTPUT
|
||||
, @stat_recall = @recall OUTPUT
|
||||
, @stat_f1score = @f1score OUTPUT;
|
||||
|
||||
SET @del_cmd = N'DELETE FROM ' + @model_table + N' WHERE model_name = ''' + @model_name_param + ''''
|
||||
EXEC sp_executesql @del_cmd;
|
||||
SET @ins_cmd = N'INSERT INTO ' + @model_table + N' (model_name, model, auc, accuracy, precision, recall, f1score) VALUES (''' + @model_name_param + ''', @p_payload, @p_auc, @p_accuracy, @p_precision, @p_recall, @p_f1score)'
|
||||
SET @param_def = N'@p_payload varbinary(max),
|
||||
@p_auc real,
|
||||
@p_accuracy real,
|
||||
@p_precision real,
|
||||
@p_recall real,
|
||||
@p_f1score real'
|
||||
EXEC sp_executesql @ins_cmd, @param_def,
|
||||
@p_payload=@payload,
|
||||
@p_auc=@auc,
|
||||
@p_accuracy=@accuracy,
|
||||
@p_precision=@precision,
|
||||
@p_recall=@recall,
|
||||
@p_f1score=@f1score;
|
||||
|
||||
;
|
||||
END
|
||||
GO
|
|
@ -0,0 +1,38 @@
|
|||
|
||||
SET ANSI_NULLS ON
|
||||
GO
|
||||
SET QUOTED_IDENTIFIER ON
|
||||
GO
|
||||
|
||||
DROP PROCEDURE IF EXISTS [dbo].[predict_chargeoff]
|
||||
GO
|
||||
|
||||
CREATE PROCEDURE [predict_chargeoff] @score_table varchar(100), @score_prediction_table varchar(100), @models_table varchar(100), @connectionString varchar(300)
|
||||
|
||||
AS
|
||||
BEGIN
|
||||
|
||||
DECLARE @best_model_query nvarchar(300), @param_def nvarchar(100)
|
||||
DECLARE @bestmodel varbinary(max)
|
||||
SET @best_model_query = 'select top 1 @p_best_model = model from ' + @models_table + ' where f1score in (select max(f1score) from ' + @models_table + ')'
|
||||
SET @param_def = N'@p_best_model varbinary(max) OUTPUT';
|
||||
EXEC sp_executesql @best_model_query, @param_def, @p_best_model=@bestmodel OUTPUT;
|
||||
EXEC sp_execute_external_script @language = N'R',
|
||||
@script = N'
|
||||
library(RevoScaleR)
|
||||
library(MicrosoftML)
|
||||
# Get best_model.
|
||||
best_model <- unserialize(best_model)
|
||||
scoring_set <- RxSqlServerData(table=score_set, connectionString = connection_string)
|
||||
scored_output <- RxSqlServerData(table=score_prediction, connectionString = connection_string, overwrite=TRUE)
|
||||
print(summary(best_model))
|
||||
rxPredict(best_model, scoring_set, outData = scored_output, extraVarsToWrite = c("memberId", "loanId", "payment_date"), overwrite=TRUE)
|
||||
'
|
||||
, @params = N'@best_model varbinary(max), @score_set varchar(100), @score_prediction varchar(100), @connection_string varchar(300)'
|
||||
, @best_model = @bestmodel
|
||||
, @score_set = @score_table
|
||||
, @score_prediction = @score_prediction_table
|
||||
, @connection_string = @connectionString
|
||||
;
|
||||
END
|
||||
GO
|
|
@ -0,0 +1,215 @@
|
|||
|
||||
SET ANSI_NULLS ON
|
||||
GO
|
||||
SET QUOTED_IDENTIFIER ON
|
||||
GO
|
||||
|
||||
DROP PROCEDURE IF EXISTS [dbo].[predict_chargeoff_ondemand]
|
||||
GO
|
||||
|
||||
CREATE PROCEDURE [predict_chargeoff_ondemand]
|
||||
@models_table varchar(100),
|
||||
@loanId int,
|
||||
@payment_date datetime,
|
||||
@payment real,
|
||||
@past_due real,
|
||||
@remain_balance real,
|
||||
@loan_open_date datetime,
|
||||
@loanAmount real,
|
||||
@interestRate real,
|
||||
@grade int,
|
||||
@term int,
|
||||
@installment real,
|
||||
@isJointApplication bit,
|
||||
@purpose nvarchar(255),
|
||||
@memberId int,
|
||||
@residentialState nvarchar(4),
|
||||
@annualIncome real,
|
||||
@yearsEmployment nvarchar(11),
|
||||
@homeOwnership nvarchar(10),
|
||||
@incomeVerified bit,
|
||||
@creditScore int,
|
||||
@dtiRatio real,
|
||||
@revolvingBalance real,
|
||||
@revolvingUtilizationRate real,
|
||||
@numDelinquency2Years int,
|
||||
@numDerogatoryRec int,
|
||||
@numInquiries6Mon int,
|
||||
@lengthCreditHistory int,
|
||||
@numOpenCreditLines int,
|
||||
@numTotalCreditLines int,
|
||||
@numChargeoff1year int,
|
||||
@payment_1 real,
|
||||
@payment_2 real,
|
||||
@payment_3 real,
|
||||
@payment_4 real,
|
||||
@payment_5 real,
|
||||
@past_due_1 real,
|
||||
@past_due_2 real,
|
||||
@past_due_3 real,
|
||||
@past_due_4 real,
|
||||
@past_due_5 real,
|
||||
@remain_balance_1 real,
|
||||
@remain_balance_2 real,
|
||||
@remain_balance_3 real,
|
||||
@remain_balance_4 real,
|
||||
@remain_balance_5 real
|
||||
|
||||
AS
|
||||
BEGIN
|
||||
|
||||
DECLARE @best_model_query nvarchar(300), @param_def nvarchar(100)
|
||||
DECLARE @bestmodel varbinary(max)
|
||||
SET @best_model_query = 'select top 1 @p_best_model = model from ' + @models_table + ' where f1score in (select max(f1score) from ' + @models_table + ')'
|
||||
SET @param_def = N'@p_best_model varbinary(max) OUTPUT';
|
||||
EXEC sp_executesql @best_model_query, @param_def, @p_best_model=@bestmodel OUTPUT;
|
||||
DECLARE @inquery nvarchar(max) = N'SELECT @p_loanId
|
||||
,@p_payment_date
|
||||
,@p_payment
|
||||
,@p_past_due
|
||||
,@p_remain_balance
|
||||
,@p_loan_open_date
|
||||
,@p_loanAmount
|
||||
,@p_interestRate
|
||||
,@p_grade
|
||||
,@p_term
|
||||
,@p_installment
|
||||
,@p_isJointApplication
|
||||
,@p_purpose
|
||||
,@p_memberId
|
||||
,@p_residentialState
|
||||
,@p_annualIncome
|
||||
,@p_yearsEmployment
|
||||
,@p_homeOwnership
|
||||
,@p_incomeVerified
|
||||
,@p_creditScore
|
||||
,@p_dtiRatio
|
||||
,@p_revolvingBalance
|
||||
,@p_revolvingUtilizationRate
|
||||
,@p_numDelinquency2Years
|
||||
,@p_numDerogatoryRec
|
||||
,@p_numInquiries6Mon
|
||||
,@p_lengthCreditHistory
|
||||
,@p_numOpenCreditLines
|
||||
,@p_numTotalCreditLines
|
||||
,@p_numChargeoff1year
|
||||
,@p_payment_1
|
||||
,@p_payment_2
|
||||
,@p_payment_3
|
||||
,@p_payment_4
|
||||
,@p_payment_5
|
||||
,@p_past_due_1
|
||||
,@p_past_due_2
|
||||
,@p_past_due_3
|
||||
,@p_past_due_4
|
||||
,@p_past_due_5
|
||||
,@p_remain_balance_1
|
||||
,@p_remain_balance_2
|
||||
,@p_remain_balance_3
|
||||
,@p_remain_balance_4
|
||||
,@p_remain_balance_5'
|
||||
|
||||
EXEC sp_execute_external_script @language = N'R',
|
||||
@script = N'
|
||||
library(RevoScaleR)
|
||||
library(MicrosoftML)
|
||||
# Get best_model.
|
||||
best_model <- unserialize(best_model)
|
||||
OutputDataSet <- rxPredict(best_model, InputDataSet, outData = NULL, extraVarsToWrite = c("memberId", "loanId", "payment_date"))
|
||||
'
|
||||
, @input_data_1 = @inquery
|
||||
, @params = N'@best_model varbinary(max),
|
||||
@p_loanId int,
|
||||
@p_payment_date datetime,
|
||||
@p_payment real,
|
||||
@p_past_due real,
|
||||
@p_remain_balance real,
|
||||
@p_loan_open_date datetime,
|
||||
@p_loanAmount real,
|
||||
@p_interestRate real,
|
||||
@p_grade int,
|
||||
@p_term int,
|
||||
@p_installment real,
|
||||
@p_isJointApplication bit,
|
||||
@p_purpose nvarchar(255),
|
||||
@p_memberId int,
|
||||
@p_residentialState nvarchar(4),
|
||||
@p_annualIncome real,
|
||||
@p_yearsEmployment nvarchar(11),
|
||||
@p_homeOwnership nvarchar(10),
|
||||
@p_incomeVerified bit,
|
||||
@p_creditScore int,
|
||||
@p_dtiRatio real,
|
||||
@p_revolvingBalance real,
|
||||
@p_revolvingUtilizationRate real,
|
||||
@p_numDelinquency2Years int,
|
||||
@p_numDerogatoryRec int,
|
||||
@p_numInquiries6Mon int,
|
||||
@p_lengthCreditHistory int,
|
||||
@p_numOpenCreditLines int,
|
||||
@p_numTotalCreditLines int,
|
||||
@p_numChargeoff1year int,
|
||||
@p_payment_1 real,
|
||||
@p_payment_2 real,
|
||||
@p_payment_3 real,
|
||||
@p_payment_4 real,
|
||||
@p_payment_5 real,
|
||||
@p_past_due_1 real,
|
||||
@p_past_due_2 real,
|
||||
@p_past_due_3 real,
|
||||
@p_past_due_4 real,
|
||||
@p_past_due_5 real,
|
||||
@p_remain_balance_1 real,
|
||||
@p_remain_balance_2 real,
|
||||
@p_remain_balance_3 real,
|
||||
@p_remain_balance_4 real,
|
||||
@p_remain_balance_5 real'
|
||||
, @p_loanId=@loanId
|
||||
, @p_payment_date=@payment_date
|
||||
, @p_payment=@payment
|
||||
, @p_past_due=@past_due
|
||||
, @p_remain_balance=@remain_balance
|
||||
, @p_loan_open_date=@loan_open_date
|
||||
, @p_loanAmount=@loanAmount
|
||||
, @p_interestRate=@interestRate
|
||||
, @p_grade=@grade
|
||||
, @p_term=@term
|
||||
, @p_installment=@installment
|
||||
, @p_isJointApplication=@isJointApplication
|
||||
, @p_purpose=@purpose
|
||||
, @p_memberId=@memberId
|
||||
, @p_residentialState=@residentialState
|
||||
, @p_annualIncome=@annualIncome
|
||||
, @p_yearsEmployment=@yearsEmployment
|
||||
, @p_homeOwnership=@homeOwnership
|
||||
, @p_incomeVerified=@incomeVerified
|
||||
, @p_creditScore=@creditScore
|
||||
, @p_dtiRatio=@dtiRatio
|
||||
, @p_revolvingBalance=@revolvingBalance
|
||||
, @p_revolvingUtilizationRate=@revolvingUtilizationRate
|
||||
, @p_numDelinquency2Years=@numDelinquency2Years
|
||||
, @p_numDerogatoryRec=@numDerogatoryRec
|
||||
, @p_numInquiries6Mon=@numInquiries6Mon
|
||||
, @p_lengthCreditHistory=@lengthCreditHistory
|
||||
, @p_numOpenCreditLines=@numOpenCreditLines
|
||||
, @p_numTotalCreditLines=@numTotalCreditLines
|
||||
, @p_numChargeoff1year=@numChargeoff1year
|
||||
, @p_payment_1=@payment_1
|
||||
, @p_payment_2=@payment_2
|
||||
, @p_payment_3=@payment_3
|
||||
, @p_payment_4=@payment_4
|
||||
, @p_payment_5=@payment_5
|
||||
, @p_past_due_1=@past_due_1
|
||||
, @p_past_due_2=@past_due_2
|
||||
, @p_past_due_3=@past_due_3
|
||||
, @p_past_due_4=@past_due_4
|
||||
, @p_past_due_5=@past_due_5
|
||||
, @p_remain_balance_1=@remain_balance_1
|
||||
, @p_remain_balance_2=@remain_balance_2
|
||||
, @p_remain_balance_3=@remain_balance_3
|
||||
, @p_remain_balance_4=@remain_balance_4
|
||||
, @p_remain_balance_5=@remain_balance_5
|
||||
, @best_model = @bestmodel
|
||||
;
|
||||
END
|
||||
GO
|
Загрузка…
Ссылка в новой задаче