solve conflict
This commit is contained in:
Коммит
b49f7e121d
|
@ -39,9 +39,9 @@ loan_prediction <- function(LocalWorkDir,
|
|||
importedModel <- function(bestModelName) {
|
||||
switch(as.character(bestModelName),
|
||||
forest = {import_model <- model_obj$forest_model},
|
||||
linear = {import_model <- model_obj$linear_model},
|
||||
logistic = {import_model <- model_obj$logistic_model},
|
||||
tree = {import_model <- model_obj$tree_model},
|
||||
linear = {import_model <- model_obj$linear_model},
|
||||
NN = {import_model <- model_obj$NN_model})
|
||||
}
|
||||
|
||||
|
@ -81,4 +81,4 @@ loan_prediction <- function(LocalWorkDir,
|
|||
print("The prediction results are also stored in hive table loanchargeoff_predictions")
|
||||
|
||||
return(finalResult)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
Code for Tiger Team Solution Templates
|
|
@ -1,80 +0,0 @@
|
|||
##############################################################################################
|
||||
# Script to invoke the LoanChargeOff data science workflow with a smaller dataset of 10,000
|
||||
# loans.
|
||||
# It also creates a SQL Server user and stores the password in 'ExporedSqlPassword.txt'.
|
||||
# Users can retrieve the password from the file and decrypt using ConvertTo-SecureString
|
||||
# commandlet in PowerShell.
|
||||
#
|
||||
# Parameters:
|
||||
# dbuser - (Optional) username for database LoanChargeOff
|
||||
# dbpass - (Optional) database password
|
||||
# createuser - (Optional) whethere to create a database user
|
||||
# datadir - directory where raw csv data has been downloaded
|
||||
# datasize - size of the data to train on (10k, 100k, 1m)
|
||||
##############################################################################################
|
||||
Param([string]$datadir, [string]$scriptdir, [string]$dbuser, [string]$dbpass, [bool]$createuser = $true, [ValidateSet("10k", "100k", "1m")][string]$datasize="10k")
|
||||
cd $scriptdir
|
||||
# Function to generate a temporary password for SQL Server
|
||||
Function Get-TempPassword()
|
||||
{
|
||||
Param
|
||||
(
|
||||
[int]$length=10,
|
||||
[string[]]$sourcedata
|
||||
)
|
||||
|
||||
For ($loop=1; $loop -le $length; $loop++)
|
||||
{
|
||||
$TempPassword += ($sourcedata | Get-Random)
|
||||
}
|
||||
return $TempPassword
|
||||
}
|
||||
|
||||
$passwordSource=$NULL
|
||||
$dbpassword = ""
|
||||
$dbusername = "rdemo"
|
||||
$passwordFile = "ExportedSqlPassword.txt"
|
||||
For ($a=33;$a -le 126; $a++)
|
||||
{
|
||||
$passwordSource += ,[char][byte]$a
|
||||
}
|
||||
|
||||
if ($dbuser)
|
||||
{
|
||||
$dbusername = $dbuser
|
||||
}
|
||||
if (!$createuser)
|
||||
{
|
||||
if (!$dbpass)
|
||||
{
|
||||
if (Test-Path $passwordFile)
|
||||
{
|
||||
$secureTxtFromFile = Get-Content $passwordFile
|
||||
$securePasswordObj = $secureTxtFromFile | ConvertTo-SecureString
|
||||
#get back the original unencrypted password
|
||||
$PasswordBSTR = [System.Runtime.InteropServices.Marshal]::SecureStringToBSTR($securePasswordObj)
|
||||
$dbpassword = [System.Runtime.InteropServices.Marshal]::PtrToStringAuto($PasswordBSTR)
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host -ForegroundColor DarkYellow "Either ExportedSqlPassword.txt must exist with encrypted database password or must provide password using dbpass parameter."
|
||||
throw
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
$dbpassword = $dbpass
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host -ForegroundColor Cyan "Creating database user"
|
||||
$dbpassword = Get-TempPassword -length 15 -sourcedata $passwordSource
|
||||
$securePassword = $dbpassword | ConvertTo-SecureString -AsPlainText -Force
|
||||
$secureTxt = $securePassword | ConvertFrom-SecureString
|
||||
Set-Content $passwordFile $secureTxt
|
||||
|
||||
sqlcmd -S $env:COMPUTERNAME -v username="$dbusername" -v password="$dbpassword" -i .\createuser.sql
|
||||
}
|
||||
|
||||
.\Loan_ChargeOff.ps1 -ServerName $env:COMPUTERNAME -DBName LoanChargeOff -username $dbusername -password $dbpassword -uninterrupted y -dataPath $datadir -dataSize $datasize
|
|
@ -1,77 +0,0 @@
|
|||
##############################################################################################
|
||||
# Script to invoke the LoanChargeOff data science workflow with a larger dataset of 1,000,000
|
||||
# loans.
|
||||
# It can also optionally creates a SQL Server user and stores the password in
|
||||
# 'ExporedSqlPassword.txt'. Users can retrieve the password from the file and decrypt using
|
||||
# ConvertTo-SecureString commandlet in PowerShell.
|
||||
#
|
||||
# Parameters:
|
||||
# dbuser - (Optional) username for database LoanChargeOff
|
||||
# dbpass - (Optional) database password
|
||||
# createuser - (Optional) whethere to create a database user
|
||||
##############################################################################################
|
||||
Param([string]$dbuser, [string]$dbpass, [bool]$createuser = $true, [string]$datadir)
|
||||
# Function to generate a temporary password for SQL Server
|
||||
Function Get-TempPassword()
|
||||
{
|
||||
Param
|
||||
(
|
||||
[int]$length=10,
|
||||
[string[]]$sourcedata
|
||||
)
|
||||
|
||||
For ($loop=1; $loop -le $length; $loop++)
|
||||
{
|
||||
$TempPassword += ($sourcedata | Get-Random)
|
||||
}
|
||||
return $TempPassword
|
||||
}
|
||||
|
||||
$passwordSource=$NULL
|
||||
$dbpassword = ""
|
||||
$dbusername = "rdemo"
|
||||
$passwordFile = "ExportedSqlPassword.txt"
|
||||
For ($a=33;$a -le 126; $a++)
|
||||
{
|
||||
$passwordSource += ,[char][byte]$a
|
||||
}
|
||||
|
||||
if ($dbuser)
|
||||
{
|
||||
$dbusername = $dbuser
|
||||
}
|
||||
if (!$createuser)
|
||||
{
|
||||
if (!$dbpass)
|
||||
{
|
||||
if (Test-Path $passwordFile)
|
||||
{
|
||||
$secureTxtFromFile = Get-Content $passwordFile
|
||||
$securePasswordObj = $secureTxtFromFile | ConvertTo-SecureString
|
||||
#get back the original unencrypted password
|
||||
$PasswordBSTR = [System.Runtime.InteropServices.Marshal]::SecureStringToBSTR($securePasswordObj)
|
||||
$dbpassword = [System.Runtime.InteropServices.Marshal]::PtrToStringAuto($PasswordBSTR)
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host -ForegroundColor DarkYellow "Either ExportedSqlPassword.txt must exist with encrypted database password or must provide password using dbpass parameter."
|
||||
throw
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
$dbpassword = $dbpass
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host -ForegroundColor Cyan "Creating database user"
|
||||
$dbpassword = Get-TempPassword -length 15 -sourcedata $passwordSource
|
||||
$securePassword = $dbpassword | ConvertTo-SecureString -AsPlainText -Force
|
||||
$secureTxt = $securePassword | ConvertFrom-SecureString
|
||||
Set-Content $passwordFile $secureTxt
|
||||
|
||||
sqlcmd -S $env:COMPUTERNAME -v username="$dbusername" -v password="$dbpassword" -i .\createuser.sql
|
||||
}
|
||||
|
||||
.\Loan_ChargeOff.ps1 -ServerName $env:COMPUTERNAME -DBName LoanChargeOff -username $dbusername -password $dbpassword -uninterrupted y -dataPath $datadir -dataSize L
|
|
@ -1,23 +0,0 @@
|
|||
param( [string]$helpfile)
|
||||
|
||||
#git clone
|
||||
$desktop = [Environment]::GetFolderPath("Desktop")
|
||||
|
||||
$desktop = $desktop + '\'
|
||||
|
||||
#create the help link in startup program
|
||||
|
||||
$startmenu = [Environment]::GetFolderPath("StartMenu")
|
||||
$startupfolder = $startmenu + '\Programs\Startup\'
|
||||
# We create this since the user startup folder is only created after first login
|
||||
# Alternative is to add is to all user startup
|
||||
mkdir $startupfolder
|
||||
#copy
|
||||
$down = $helpfile
|
||||
Write-Host $down
|
||||
Write-Host $startmenu
|
||||
ls $startmenu
|
||||
Write-Host $startupfolder
|
||||
ls $startupfolder
|
||||
cp -Verbose $down $startupfolder
|
||||
cp -Verbose $down $desktop
|
|
@ -1,103 +0,0 @@
|
|||
SET ansi_nulls on
|
||||
GO
|
||||
SET quoted_identifier on
|
||||
GO
|
||||
|
||||
/* Create the member_info Table. */
|
||||
/* Large DataSets */
|
||||
DROP TABLE IF EXISTS member_info_1m
|
||||
|
||||
CREATE TABLE [member_info_1m](
|
||||
[memberId] [int],
|
||||
[residentialState] [nvarchar](4),
|
||||
[annualIncome] [real],
|
||||
[yearsEmployment] [nvarchar](11),
|
||||
[homeOwnership] [nvarchar](10),
|
||||
[incomeVerified] [bit],
|
||||
[creditScore] [int],
|
||||
[dtiRatio] [real],
|
||||
[revolvingBalance] [real],
|
||||
[revolvingUtilizationRate] [real],
|
||||
[numDelinquency2Years] [int],
|
||||
[numDerogatoryRec] [int],
|
||||
[numInquiries6Mon] [int],
|
||||
[lengthCreditHistory] [int],
|
||||
[numOpenCreditLines] [int],
|
||||
[numTotalCreditLines] [int],
|
||||
[numChargeoff1year] [int]
|
||||
);
|
||||
|
||||
CREATE CLUSTERED COLUMNSTORE INDEX member_info_1m_cci ON member_info_1m WITH (DROP_EXISTING = OFF);
|
||||
GO
|
||||
/* Create the loan_info Table. */
|
||||
|
||||
DROP TABLE IF EXISTS loan_info_1m
|
||||
|
||||
CREATE TABLE [loan_info_1m](
|
||||
[loanId] [int],
|
||||
[loan_open_date] [datetime],
|
||||
[memberId] [int],
|
||||
[loanAmount] [real],
|
||||
[interestRate] [real],
|
||||
[grade] [int],
|
||||
[term] [int],
|
||||
[installment] [real],
|
||||
[isJointApplication] [bit],
|
||||
[purpose] [nvarchar](255)
|
||||
);
|
||||
|
||||
CREATE CLUSTERED COLUMNSTORE INDEX loan_info_1m_cci ON loan_info_1m WITH (DROP_EXISTING = OFF);
|
||||
GO
|
||||
/* Create the payments_info Table*/
|
||||
|
||||
DROP TABLE IF EXISTS payments_info_1m
|
||||
|
||||
CREATE TABLE [payments_info_1m](
|
||||
[loanId] [int],
|
||||
[payment_date] [date],
|
||||
[payment] [real],
|
||||
[past_due] [real],
|
||||
[remain_balance] [real],
|
||||
[closed] [bit],
|
||||
[charged_off] [bit]
|
||||
);
|
||||
|
||||
CREATE CLUSTERED COLUMNSTORE INDEX payments_info_1m_cci ON payments_info_1m WITH (DROP_EXISTING = OFF);
|
||||
GO
|
||||
|
||||
DROP TABLE IF EXISTS [loan_chargeoff_models_1m];
|
||||
|
||||
CREATE TABLE [loan_chargeoff_models_1m]
|
||||
(
|
||||
[model_name] varchar(30) not null default('default model') primary key,
|
||||
[model] varbinary(max) not null,
|
||||
[auc] real,
|
||||
[accuracy] real,
|
||||
[precision] real,
|
||||
[recall] real,
|
||||
[f1score] real,
|
||||
[training_ts] datetime default(GETDATE())
|
||||
);
|
||||
GO
|
||||
|
||||
DROP TABLE IF EXISTS selected_features_1m;
|
||||
|
||||
CREATE TABLE [selected_features_1m](
|
||||
[feature_id] [int] IDENTITY(1,1) NOT NULL,
|
||||
[feature_name] [nvarchar](500) NOT NULL
|
||||
);
|
||||
GO
|
||||
|
||||
DROP TABLE IF EXISTS [loan_chargeoff_prediction_1m]
|
||||
|
||||
CREATE TABLE [loan_chargeoff_prediction_1m](
|
||||
[memberId] [int],
|
||||
[loanId] [int],
|
||||
[payment_date] [date],
|
||||
[prediction_date] [date] default(GETDATE()),
|
||||
[PredictedLabel] [nvarchar](255),
|
||||
[Score.1] [float],
|
||||
[Probability.1] [float]
|
||||
);
|
||||
|
||||
GO
|
|
@ -1,137 +0,0 @@
|
|||
-- View over the underlying table for features and labels required
|
||||
drop view if exists vw_loan_chargeoff_train_10k
|
||||
go
|
||||
create view vw_loan_chargeoff_train_10k
|
||||
as
|
||||
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
|
||||
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
|
||||
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
|
||||
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
|
||||
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
|
||||
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
|
||||
from
|
||||
(
|
||||
select *,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
|
||||
(select MAX(charged_off+0) from payments_info_10k p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
|
||||
from payments_info_10k p1 ) AS t inner join loan_info_10k l ON t.loanId = l.loanId inner join member_info_10k m ON l.memberId = m.memberId
|
||||
where t.charge_off IS NOT NULL
|
||||
and ((payment_date between '2016-09-12' and '2016-12-12' and charge_off = 1) or (payment_date = '2017-01-12'));
|
||||
go
|
||||
|
||||
drop view if exists vw_loan_chargeoff_test_10k
|
||||
go
|
||||
create view vw_loan_chargeoff_test_10k
|
||||
as
|
||||
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
|
||||
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
|
||||
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
|
||||
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
|
||||
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
|
||||
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
|
||||
from
|
||||
(
|
||||
select *,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
|
||||
(select MAX(charged_off+0) from payments_info_10k p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
|
||||
from payments_info_10k p1 ) AS t inner join loan_info_10k l ON t.loanId = l.loanId inner join member_info_10k m ON l.memberId = m.memberId
|
||||
where t.charge_off IS NOT NULL
|
||||
and payment_date = '2017-02-12';
|
||||
go
|
||||
|
||||
drop view if exists vw_loan_chargeoff_score_10k
|
||||
go
|
||||
create view vw_loan_chargeoff_score_10k
|
||||
as
|
||||
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
|
||||
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
|
||||
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
|
||||
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
|
||||
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
|
||||
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
|
||||
from
|
||||
(
|
||||
select *,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
|
||||
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
|
||||
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
|
||||
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
|
||||
(select MAX(charged_off+0) from payments_info_10k p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
|
||||
from payments_info_10k p1 ) AS t inner join loan_info_10k l ON t.loanId = l.loanId inner join member_info_10k m ON l.memberId = m.memberId
|
||||
where t.charge_off IS NOT NULL
|
||||
and payment_date > '2017-02-12';
|
||||
go
|
||||
|
||||
|
||||
-- persist the view in case of large dataset in order to get faster results
|
||||
drop table if exists [loan_chargeoff_train_10k]
|
||||
go
|
||||
|
||||
select *
|
||||
into [loan_chargeoff_train_10k]
|
||||
from [vw_loan_chargeoff_train_10k]
|
||||
go
|
||||
|
||||
create clustered columnstore index [cci_loan_chargeoff_train_10k] on [loan_chargeoff_train_10k]
|
||||
go
|
||||
|
||||
drop table if exists [loan_chargeoff_test_10k]
|
||||
go
|
||||
|
||||
select *
|
||||
into [loan_chargeoff_test_10k]
|
||||
from [vw_loan_chargeoff_test_10k]
|
||||
go
|
||||
|
||||
create clustered columnstore index [cci_loan_chargeoff_test_10k] on [loan_chargeoff_test_10k]
|
||||
go
|
||||
|
||||
drop table if exists [loan_chargeoff_score_10k]
|
||||
go
|
||||
|
||||
select *
|
||||
into [loan_chargeoff_score_10k]
|
||||
from [vw_loan_chargeoff_score_10k]
|
||||
go
|
||||
|
||||
create clustered columnstore index [cci_loan_chargeoff_score_10k] on [loan_chargeoff_score_10k]
|
||||
go
|
|
@ -1,140 +0,0 @@
|
|||
-- View over the underlying table for features and labels required
|
||||
/* Large DataSets */
|
||||
drop view if exists [dbo].[vw_loan_chargeoff_1m]
|
||||
go
|
||||
create view [dbo].[vw_loan_chargeoff_1m]
|
||||
as
|
||||
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
|
||||
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
|
||||
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
|
||||
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
|
||||
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
|
||||
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
|
||||
from
|
||||
(
|
||||
select *,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
|
||||
(select MAX(charged_off+0) from payments_info_1m p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
|
||||
from payments_info_1m p1 ) AS t inner join loan_info_1m l ON t.loanId = l.loanId inner join member_info_1m m ON l.memberId = m.memberId
|
||||
where t.charge_off IS NOT NULL
|
||||
and ((payment_date between '2016-09-12' and '2016-12-12' and charge_off = 1) or (payment_date = '2017-01-12'))
|
||||
|
||||
GO
|
||||
|
||||
drop view if exists [dbo].[vw_loan_chargeoff_test_1m]
|
||||
go
|
||||
create view [dbo].[vw_loan_chargeoff_test_1m]
|
||||
as
|
||||
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
|
||||
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
|
||||
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
|
||||
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
|
||||
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
|
||||
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
|
||||
from
|
||||
(
|
||||
select *,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
|
||||
(select MAX(charged_off+0) from payments_info_1m p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
|
||||
from payments_info_1m p1 ) AS t inner join loan_info_1m l ON t.loanId = l.loanId inner join member_info_1m m ON l.memberId = m.memberId
|
||||
where t.charge_off IS NOT NULL
|
||||
and payment_date = '2017-02-12'
|
||||
GO
|
||||
|
||||
drop view if exists [dbo].[vw_loan_chargeoff_score_1m]
|
||||
go
|
||||
create view [dbo].[vw_loan_chargeoff_score_1m]
|
||||
as
|
||||
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
|
||||
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
|
||||
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
|
||||
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
|
||||
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
|
||||
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
|
||||
from
|
||||
(
|
||||
select *,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
|
||||
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
|
||||
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
|
||||
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
|
||||
(select MAX(charged_off+0) from payments_info_1m p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
|
||||
from payments_info_1m p1 ) AS t inner join loan_info_1m l ON t.loanId = l.loanId inner join member_info_1m m ON l.memberId = m.memberId
|
||||
where t.charge_off IS NOT NULL
|
||||
and payment_date > '2017-02-12'
|
||||
GO
|
||||
|
||||
-- persist the view in case of large dataset in order to get faster results
|
||||
|
||||
/* Large dataset */
|
||||
drop table if exists [loan_chargeoff_train_1m]
|
||||
go
|
||||
|
||||
select *
|
||||
into [loan_chargeoff_train_1m]
|
||||
from [vw_loan_chargeoff_1m]
|
||||
go
|
||||
|
||||
create clustered columnstore index [cci_loan_chargeoff_train_1m] on [loan_chargeoff_train_1m]
|
||||
go
|
||||
|
||||
drop table if exists [loan_chargeoff_test_1m]
|
||||
go
|
||||
|
||||
select *
|
||||
into [loan_chargeoff_test_1m]
|
||||
from [vw_loan_chargeoff_test_1m]
|
||||
go
|
||||
|
||||
create clustered columnstore index [cci_loan_chargeoff_test_1m] on [loan_chargeoff_test_1m]
|
||||
go
|
||||
|
||||
drop table if exists [loan_chargeoff_score_1m]
|
||||
go
|
||||
|
||||
select *
|
||||
into [loan_chargeoff_score_1m]
|
||||
from [vw_loan_chargeoff_score_1m]
|
||||
go
|
||||
|
||||
create clustered columnstore index [cci_loan_chargeoff_score_1m] on [loan_chargeoff_score_1m]
|
||||
go
|
|
@ -0,0 +1,15 @@
|
|||
##############################################################################
|
||||
# Helper script to retrieve the password for 'rdemo' user if needed. During
|
||||
# deployment of the solution template a new user is created with a random
|
||||
# password which is stored in encrypted form in a text file.
|
||||
#
|
||||
# Must be run as the same user as the Data Science VM user supplied during
|
||||
# deployment.
|
||||
##############################################################################
|
||||
$passwordFile = "ExportedSqlPassword.txt"
|
||||
|
||||
$secureTxtFromFile = Get-Content $passwordFile
|
||||
$securePasswordObj = $secureTxtFromFile | ConvertTo-SecureString
|
||||
#get back the original unencrypted password
|
||||
$PasswordBSTR = [System.Runtime.InteropServices.Marshal]::SecureStringToBSTR($securePasswordObj)
|
||||
[System.Runtime.InteropServices.Marshal]::PtrToStringAuto($PasswordBSTR)
|
|
@ -70,9 +70,11 @@ function ExecuteSQL
|
|||
{
|
||||
param(
|
||||
[String]
|
||||
$sqlscript
|
||||
$sqlscript,
|
||||
[String]
|
||||
$VariableArray=""
|
||||
)
|
||||
Invoke-Sqlcmd -ServerInstance $ServerName -Database $DBName -Username $username -Password $password -InputFile $sqlscript -QueryTimeout 200000
|
||||
Invoke-Sqlcmd -ServerInstance $ServerName -Database $DBName -Username $username -Password "$password" -InputFile $sqlscript -Variable $VariableArray -QueryTimeout 200000
|
||||
}
|
||||
##########################################################################
|
||||
# Function wrapper to invoke SQL query
|
||||
|
@ -83,7 +85,7 @@ param(
|
|||
[String]
|
||||
$sqlquery
|
||||
)
|
||||
Invoke-Sqlcmd -ServerInstance $ServerName -Database $DBName -Username $username -Password $password -Query $sqlquery -QueryTimeout 200000
|
||||
Invoke-Sqlcmd -ServerInstance $ServerName -Database $DBName -Username $username -Password "$password" -Query $sqlquery -QueryTimeout 200000
|
||||
}
|
||||
|
||||
##########################################################################
|
||||
|
@ -113,7 +115,7 @@ $connectionString2 = GetConnectionString2
|
|||
# Check if the SQL server or database exists
|
||||
##########################################################################
|
||||
$query = "IF NOT EXISTS(SELECT * FROM sys.databases WHERE NAME = '$DBName') CREATE DATABASE $DBName"
|
||||
Invoke-Sqlcmd -ServerInstance $ServerName -Username $username -Password $password -Query $query -ErrorAction SilentlyContinue
|
||||
Invoke-Sqlcmd -ServerInstance $ServerName -Username $username -Password "$password" -Query $query -ErrorAction SilentlyContinue
|
||||
if ($? -eq $false)
|
||||
{
|
||||
Write-Host -ForegroundColor Red "Failed the test to connect to SQL server: $ServerName database: $DBName !"
|
||||
|
@ -124,7 +126,7 @@ if ($? -eq $false)
|
|||
}
|
||||
|
||||
$query = "USE $DBName;"
|
||||
Invoke-Sqlcmd -ServerInstance $ServerName -Username $username -Password $password -Query $query
|
||||
Invoke-Sqlcmd -ServerInstance $ServerName -Username $username -Password "$password" -Query $query
|
||||
|
||||
|
||||
##########################################################################
|
||||
|
@ -139,8 +141,8 @@ if ($uninterrupted -eq 'y' -or $uninterrupted -eq 'Y')
|
|||
{
|
||||
# create training and test tables
|
||||
Write-Host -ForeGroundColor 'green' ("Create SQL tables: member_info, loan_info, payments_info")
|
||||
$script = $filePath + "step1_create_tables" + $table_suffix + ".sql"
|
||||
ExecuteSQL $script
|
||||
$script = $filePath + "step1_create_tables.sql"
|
||||
ExecuteSQL $script "datasize = $dataSize"
|
||||
|
||||
Write-Host -ForeGroundColor 'green' ("Populate SQL tables: member_info, loan_info, payments_info")
|
||||
$dataList = "member_info", "loan_info", "payments_info"
|
||||
|
@ -153,23 +155,23 @@ if ($uninterrupted -eq 'y' -or $uninterrupted -eq 'Y')
|
|||
Write-Host -ForeGroundColor 'magenta'(" Populate SQL table: {0}... from {1}" -f $dataFile, $destination)
|
||||
$tableName = $DBName + ".dbo." + $dataFile + $table_suffix
|
||||
$tableSchema = $dataFilePath + $dataFile + $table_suffix + ".xml"
|
||||
bcp $tableName format nul -c -x -f $tableSchema -U $username -S $ServerName -P $password -t ','
|
||||
bcp $tableName format nul -c -x -f $tableSchema -U $username -S $ServerName -P "$password" -t ','
|
||||
Write-Host -ForeGroundColor 'magenta'(" Loading {0} to SQL table..." -f $dataFile)
|
||||
bcp $tableName in $destination -t ',' -S $ServerName -f $tableSchema -F 2 -C "RAW" -b 100000 -U $username -P $password -e $error_file
|
||||
Write-Host -ForeGroundColor 'magenta'(" Done...Loading {0} to SQL table..." -f $dataFile)
|
||||
bcp $tableName in $destination -t ',' -S $ServerName -f $tableSchema -F 2 -C "RAW" -b 100000 -U $username -P "$password" -e $error_file
|
||||
Write-Host -ForeGroundColor 'magenta'(" Done...Loading {0} to SQL table {1}..." -f $dataFile, $tableName)
|
||||
}
|
||||
|
||||
|
||||
|
||||
# create the views for features and label with training, test and scoring split
|
||||
Write-Host -ForeGroundColor 'magenta'(" Creating features label view and persisting...")
|
||||
$script = $filepath + "step2_features_label_view" + $table_suffix + ".sql"
|
||||
ExecuteSQL $script
|
||||
$script = $filepath + "step2_features_label_view.sql"
|
||||
ExecuteSQL $script "datasize=$dataSize"
|
||||
Write-Host -ForeGroundColor 'magenta'(" Done creating features label view and persisting...")
|
||||
|
||||
# create the stored procedure for training
|
||||
$script = $filepath + "step3_train_test_model.sql"
|
||||
ExecuteSQL $script
|
||||
ExecuteSQL $script "datasize=$dataSize"
|
||||
Write-Host -ForeGroundColor 'magenta'(" Done creating training and eval stored proc...")
|
||||
|
||||
# execute the training
|
||||
|
@ -186,7 +188,7 @@ if ($uninterrupted -eq 'y' -or $uninterrupted -eq 'Y')
|
|||
|
||||
# create the stored procedure for recommendations
|
||||
$script = $filepath + "step4_chargeoff_batch_prediction.sql"
|
||||
ExecuteSQL $script
|
||||
ExecuteSQL $script "datasize=$dataSize"
|
||||
Write-Host -ForeGroundColor 'magenta'(" Done creating batch scoring stored proc...")
|
||||
|
||||
#score on the data
|
||||
|
@ -196,13 +198,13 @@ if ($uninterrupted -eq 'y' -or $uninterrupted -eq 'Y')
|
|||
|
||||
# create the stored procedure for recommendations
|
||||
$script = $filepath + "step4a_chargeoff_ondemand_prediction.sql"
|
||||
ExecuteSQL $script
|
||||
ExecuteSQL $script "datasize=$dataSize"
|
||||
Write-Host -ForeGroundColor 'magenta'(" Done creating on demand scoring stored proc [predict_chargeoff_ondemand]...")
|
||||
|
||||
}
|
||||
catch
|
||||
{
|
||||
Write-Host -ForegroundColor DarkYellow "Exception in populating database tables:"
|
||||
Write-Host -ForegroundColor Yellow "Exception executing Data Science pipeline..."
|
||||
Write-Host -ForegroundColor Red $Error[0].Exception
|
||||
throw
|
||||
}
|
||||
|
@ -228,8 +230,8 @@ if ($ans -eq 'y' -or $ans -eq 'Y')
|
|||
{
|
||||
# create training and test tables
|
||||
Write-Host -ForeGroundColor 'green' ("Create SQL tables: member_info, loan_info, payments_info")
|
||||
$script = $filePath + "step1_create_tables" + $table_suffix + ".sql"
|
||||
ExecuteSQL $script
|
||||
$script = $filePath + "step1_create_tables.sql"
|
||||
ExecuteSQL $script "datasize = $dataSize"
|
||||
|
||||
Write-Host -ForeGroundColor 'green' ("Populate SQL tables: member_info, loan_info, payments_info")
|
||||
$dataList = "member_info", "loan_info", "payments_info"
|
||||
|
@ -242,10 +244,10 @@ if ($ans -eq 'y' -or $ans -eq 'Y')
|
|||
Write-Host -ForeGroundColor 'magenta'(" Populate SQL table: {0} from {1}..." -f $dataFile, $destination)
|
||||
$tableName = $DBName + ".dbo." + $dataFile + $table_suffix
|
||||
$tableSchema = $dataFilePath + $dataFile + $table_suffix + ".xml"
|
||||
bcp $tableName format nul -c -x -f $tableSchema -U $username -S $ServerName -P $password -t ','
|
||||
bcp $tableName format nul -c -x -f $tableSchema -U $username -S $ServerName -P "$password" -t ','
|
||||
Write-Host -ForeGroundColor 'magenta'(" Loading {0} to SQL table..." -f $dataFile)
|
||||
bcp $tableName in $destination -t ',' -S $ServerName -f $tableSchema -F 2 -C "RAW" -b 100000 -U $username -P $password -e $error_file
|
||||
Write-Host -ForeGroundColor 'magenta'(" Done...Loading {0} to SQL table..." -f $dataFile)
|
||||
bcp $tableName in $destination -t ',' -S $ServerName -f $tableSchema -F 2 -C "RAW" -b 100000 -U $username -P "$password" -e $error_file
|
||||
Write-Host -ForeGroundColor 'magenta'(" Done...Loading {0} to SQL table {1}..." -f $dataFile, $tableName)
|
||||
}
|
||||
}
|
||||
catch
|
||||
|
@ -269,8 +271,8 @@ if ($ans -eq 'y' -or $ans -eq 'Y')
|
|||
{
|
||||
# create features, labels view
|
||||
Write-Host -ForeGroundColor 'Cyan' (" Creating feature/label views...")
|
||||
$script = $filepath + "step2_features_label_view" + $table_suffix + ".sql"
|
||||
ExecuteSQL $script
|
||||
$script = $filepath + "step2_features_label_view.sql"
|
||||
ExecuteSQL $script "datasize = $dataSize"
|
||||
}
|
||||
|
||||
##########################################################################
|
||||
|
@ -286,7 +288,7 @@ if ($ans -eq 'y' -or $ans -eq 'Y')
|
|||
{
|
||||
# create the stored procedure for feature engineering
|
||||
$script = $filepath + "step2a_optional_feature_selection.sql"
|
||||
ExecuteSQL $script
|
||||
ExecuteSQL $script "datasize=$dataSize"
|
||||
|
||||
# execute the feature engineering
|
||||
Write-Host -ForeGroundColor 'Cyan' (" selecting features using MicrosoftML selectFeatures mlTransform with Logistic Regression...")
|
||||
|
@ -308,7 +310,7 @@ if ($ans -eq 'y' -or $ans -eq 'Y')
|
|||
{
|
||||
# create the stored procedure for training
|
||||
$script = $filepath + "step3_train_test_model.sql"
|
||||
ExecuteSQL $script
|
||||
ExecuteSQL $script "datasize=$dataSize"
|
||||
|
||||
Write-Host -ForeGroundColor 'magenta'(" Starting training and evaluation of models...")
|
||||
$modelNames = 'logistic_reg','fast_linear','fast_trees','fast_forest','neural_net'
|
||||
|
@ -334,7 +336,7 @@ if ($ans -eq 'y' -or $ans -eq 'Y')
|
|||
{
|
||||
# create the stored procedure for recommendations
|
||||
$script = $filepath + "step4_chargeoff_batch_prediction.sql"
|
||||
ExecuteSQL $script
|
||||
ExecuteSQL $script "datasize=$dataSize"
|
||||
|
||||
# compute loan chargeoff predictions
|
||||
Write-Host -ForeGroundColor 'Cyan' ("Scoring based on best performing model score table = $scoreTable, prediction table = $predictionTable...")
|
||||
|
@ -352,7 +354,7 @@ if ($ans -eq 'y' -or $ans -eq 'Y')
|
|||
{
|
||||
# create the stored procedure for recommendations
|
||||
$script = $filepath + "step4a_chargeoff_ondemand_prediction.sql"
|
||||
ExecuteSQL $script
|
||||
ExecuteSQL $script "datasize=$dataSize"
|
||||
|
||||
Write-Host -ForeGroundColor 'Cyan' ("Done creating on demand chargeoff prediction stored proc [predict_chargeoff_ondemand]...")
|
||||
}
|
||||
|
@ -363,5 +365,5 @@ Write-Host -foregroundcolor 'green'("Loan Chargeoff Prediction Workflow Finished
|
|||
|
||||
$endTime =Get-Date
|
||||
$totalTime = ($endTime-$startTime).ToString()
|
||||
Write-Host "Finished running at:" $endTime
|
||||
Write-Host "Finished running Loan_ChargeOff.ps1 at:" $endTime
|
||||
Write-Host "Total time used: " -foregroundcolor 'green' $totalTime.ToString()
|
|
@ -10,13 +10,15 @@
|
|||
################################################################################################
|
||||
param([string]$serverName,[string]$baseurl,[string]$username,[string]$password)
|
||||
|
||||
$startTime= Get-Date
|
||||
Write-Host "Start time for setup is:" $startTime
|
||||
$originalLocation = Get-Location
|
||||
# This is the directory for the data/code download
|
||||
$solutionTemplateSetupDir = "LoanChargeOffSolution"
|
||||
$solutionTemplateSetupPath = "D:\" + $solutionTemplateSetupDir
|
||||
$dataDir = "Data"
|
||||
$dataDirPath = $solutionTemplateSetupPath + "\" + $dataDir
|
||||
$checkoutDir = "Code"
|
||||
$checkoutDir = "Source"
|
||||
New-Item -Path "D:\" -Name $solutionTemplateSetupDir -ItemType directory -force
|
||||
New-Item -Path $solutionTemplateSetupPath -Name $dataDir -ItemType directory -force
|
||||
|
||||
|
@ -25,8 +27,6 @@ Start-Transcript -Path $setupLog -Append
|
|||
|
||||
cd $dataDirPath
|
||||
|
||||
$helpShortCutFilePath = $solutionTemplateSetupPath + "\LoanChargeOffHelp.url"
|
||||
|
||||
# List of files to be downloaded
|
||||
$dataList = "loan_info_10k", "member_info_10k", "payments_info_10k", "loan_info_100k", "member_info_100k", "payments_info_100k", "loan_info_1m", "member_info_1m", "payments_info_1m"
|
||||
$dataExtn = ".csv"
|
||||
|
@ -34,41 +34,50 @@ $hashExtn = ".hash"
|
|||
foreach ($dataFile in $dataList)
|
||||
{
|
||||
$down = $baseurl + '/' + $dataFile + $dataExtn
|
||||
Write-Host $down
|
||||
Write-Host -ForeGroundColor 'magenta' "Downloading file $down..."
|
||||
Start-BitsTransfer -Source $down
|
||||
}
|
||||
|
||||
#checkout setup scripts/code from github
|
||||
cd $solutionTemplateSetupPath
|
||||
Remove-Item $checkoutDir -Force -Recurse
|
||||
|
||||
if (Test-Path $checkoutDir)
|
||||
{
|
||||
Remove-Item $checkoutDir -Force -Recurse
|
||||
}
|
||||
|
||||
git clone -n https://github.com/Microsoft/r-server-loan-chargeoff $checkoutDir
|
||||
cd $checkoutDir
|
||||
git config core.sparsecheckout true
|
||||
echo "/*`r`n!HDI" | out-file -encoding ascii .git/info/sparse-checkout
|
||||
git checkout master
|
||||
|
||||
$sqlsolutionCodePath = $solutionTemplateSetupPath + "\" + $checkoutDir + "\SQL"
|
||||
$sqlsolutionCodePath = $solutionTemplateSetupPath + "\" + $checkoutDir + "\SQLR"
|
||||
$helpShortCutFilePath = $sqlsolutionCodePath + "\LoanChargeOffHelp.url"
|
||||
cd $sqlsolutionCodePath
|
||||
|
||||
# make sure the hashes match for data files
|
||||
Write-Host -ForeGroundColor 'magenta' "Checking integrity of downloaded files..."
|
||||
foreach ($dataFile in $dataList)
|
||||
{
|
||||
$dataFileHash = Get-FileHash ($dataDirPath + "\" + $dataFile + $dataExtn) -Algorithm SHA512
|
||||
$storedHash = Get-Content ($dataFile + $hashExtn)
|
||||
if ($dataFileHash.Hash -ne $storedHash)
|
||||
{
|
||||
Write-Host -ForeGroundColor 'Red' "Data file has been corrupted. Please try again."
|
||||
Write-Error "Data file has been corrupted. Please try again."
|
||||
throw
|
||||
}
|
||||
}
|
||||
Write-Host -ForeGroundColor 'magenta' "File integrity check successful."
|
||||
|
||||
# making sure that the data files conform to windows style of line ending.
|
||||
Write-Host -ForeGroundColor 'Cyan' "Converting data files from unix2dos"
|
||||
Write-Host -ForeGroundColor 'magenta' "Converting data files from unix2dos..."
|
||||
foreach ($dataFile in $dataList)
|
||||
{
|
||||
$csvfile = $dataDirPath + "\" + $dataFile + $dataExtn
|
||||
unix2dos $csvfile
|
||||
}
|
||||
|
||||
Write-Host -ForeGroundColor 'magenta' "Done with unix2dos conversion."
|
||||
# Start the script for DB creation. Due to privilege issues with SYSTEM user (the user that runs the
|
||||
# extension script), we use ps-remoting to login as admin use and run the DB creation scripts
|
||||
|
||||
|
@ -79,9 +88,13 @@ $command2 ="setupHelp.ps1"
|
|||
|
||||
Enable-PSRemoting -Force
|
||||
Invoke-Command -Credential $credential -ComputerName $serverName -FilePath $command1 -ArgumentList $dataDirPath, $sqlsolutionCodePath
|
||||
Invoke-Command -Credential $credential -ComputerName $serverName -FilePath $command2 -ArgumentList $helpShortCutFilePath
|
||||
Invoke-Command -Credential $credential -ComputerName $serverName -FilePath $command2 -ArgumentList $helpShortCutFilePath, $solutionTemplateSetupPath
|
||||
Disable-PSRemoting -Force
|
||||
|
||||
cd $originalLocation.Path
|
||||
$endTime= Get-Date
|
||||
$totalTime = $endTime - $startTime
|
||||
Write-Host "Finished running setup at " $endTime
|
||||
Write-Host "Total time for setup:" $totalTime
|
||||
Stop-Transcript
|
||||
|
|
@ -1,5 +1,8 @@
|
|||
:on error exit
|
||||
--
|
||||
-- remove old $(username) user and login from master
|
||||
-- remove old $(username) user and login from master.
|
||||
-- $(username) and $(password) is substituted by Invoke-SqlCmd
|
||||
-- through environment variables.
|
||||
--
|
||||
USE [master]
|
||||
GO
|
|
@ -0,0 +1,101 @@
|
|||
##############################################################################################
|
||||
# Script to invoke the LoanChargeOff data science workflow with a smaller dataset of 10,000
|
||||
# loans.
|
||||
# It also creates a SQL Server user and stores the password in 'ExporedSqlPassword.txt'.
|
||||
# Users can retrieve the password from the file and decrypt using ConvertTo-SecureString
|
||||
# commandlet in PowerShell.
|
||||
#
|
||||
# Parameters:
|
||||
# datadir - directory where raw csv data has been downloaded
|
||||
# scriptdir - directory where scripts are checked out from github
|
||||
# dbuser - (Optional) username for database LoanChargeOff
|
||||
# dbpass - (Optional) database password
|
||||
# createuser - (Optional) whethere to create a database user
|
||||
# datasize - size of the dataset (10k, 100k, 1m)
|
||||
##############################################################################################
|
||||
Param([string]$datadir, [string]$scriptdir, [string]$dbuser, [string]$dbpass, [bool]$createuser = $true, [ValidateSet("10k", "100k", "1m")][string]$datasize="10k")
|
||||
cd $scriptdir
|
||||
|
||||
$dbpassword = ""
|
||||
$dbusername = "rdemo"
|
||||
$passwordFile = "ExportedSqlPassword.txt"
|
||||
|
||||
function Retrieve-FilePassword([string]$file=$passwordFile)
|
||||
{
|
||||
$secureTxtFromFile = Get-Content $file
|
||||
$securePasswordObj = $secureTxtFromFile | ConvertTo-SecureString
|
||||
#get back the original unencrypted password
|
||||
$PasswordBSTR = [System.Runtime.InteropServices.Marshal]::SecureStringToBSTR($securePasswordObj)
|
||||
[System.Runtime.InteropServices.Marshal]::PtrToStringAuto($PasswordBSTR)
|
||||
}
|
||||
|
||||
if ($dbuser)
|
||||
{
|
||||
$dbusername = $dbuser
|
||||
}
|
||||
if (!$createuser)
|
||||
{
|
||||
if (!$dbpass)
|
||||
{
|
||||
if (Test-Path $passwordFile)
|
||||
{
|
||||
$dbpassword = Retrieve-FilePassword($passwordFile)
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host -ForegroundColor Yellow "Either ExportedSqlPassword.txt must exist with encrypted database password or must provide password using dbpass parameter."
|
||||
throw
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
$dbpassword = $dbpass
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host -ForegroundColor 'Cyan' "Creating database user"
|
||||
[Reflection.Assembly]::LoadWithPartialName("System.Web")
|
||||
$dbpassword = [System.Web.Security.Membership]::GeneratePassword(15,0)
|
||||
|
||||
# Variables to pass to createuser.sql script
|
||||
# Cannot use -v option as sqlcmd does not like special characters which maybe part of the randomly generated password.
|
||||
$sqlcmdvars = @{"username" = "$dbusername"; "password" = "$dbpassword"}
|
||||
$old_env = @{}
|
||||
|
||||
foreach ($var in $sqlcmdvars.GetEnumerator()) {
|
||||
# Save Environment
|
||||
$old_env.Add($var.Name, [Environment]::GetEnvironmentVariable($var.Value, "User"))
|
||||
[Environment]::SetEnvironmentVariable($var.Name, $var.Value)
|
||||
}
|
||||
try {
|
||||
#sqlcmd -S $env:COMPUTERNAME -b -i .\createuser.sql
|
||||
Invoke-Sqlcmd -ServerInstance $env:COMPUTERNAME -InputFile .\createuser.sql
|
||||
# save password securely for later retrieval
|
||||
$securePassword = $dbpassword | ConvertTo-SecureString -AsPlainText -Force
|
||||
$secureTxt = $securePassword | ConvertFrom-SecureString
|
||||
Set-Content $passwordFile $secureTxt
|
||||
} catch {
|
||||
Write-Host -ForegroundColor 'Yellow' "Error creating database user, see error message output"
|
||||
Write-Host -ForegroundColor 'Red' $Error[0].Exception
|
||||
#Try to read password from stored file
|
||||
if (Test-Path $passwordFile)
|
||||
{
|
||||
Write-Host -ForegroundColor 'Yellow' "Retrieving password from stored file."
|
||||
$dbpassword = Retrieve-FilePassword($passwordFile)
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host -ForegroundColor DarkYellow "Either ExportedSqlPassword.txt must exist with encrypted database password or must provide password using dbpass parameter."
|
||||
throw
|
||||
}
|
||||
} finally {
|
||||
# Restore Environment
|
||||
foreach ($var in $old_env.GetEnumerator()) {
|
||||
[Environment]::SetEnvironmentVariable($var.Name, $var.Value)
|
||||
}
|
||||
}
|
||||
Write-Host -ForegroundColor 'Cyan' "Done creating database user"
|
||||
}
|
||||
|
||||
.\Loan_ChargeOff.ps1 -ServerName $env:COMPUTERNAME -DBName LoanChargeOff -username $dbusername -password "$dbpassword" -uninterrupted y -dataPath $datadir -dataSize $datasize
|
|
@ -0,0 +1,37 @@
|
|||
#######################################################################
|
||||
# Script to create help short cut and solution folder shortcut.
|
||||
#
|
||||
# Parameters:
|
||||
# helpfile - path to the help url file.
|
||||
# solutionPath - path to the solution folder with data and
|
||||
# source.
|
||||
#######################################################################
|
||||
param( [string]$helpfile, [string]$solutionPath)
|
||||
|
||||
#git clone
|
||||
$desktop = [Environment]::GetFolderPath("Desktop")
|
||||
|
||||
$desktop = $desktop + '\'
|
||||
|
||||
#create the help link in startup program
|
||||
|
||||
$startmenu = [Environment]::GetFolderPath("StartMenu")
|
||||
$startupfolder = $startmenu + '\Programs\Startup\'
|
||||
# We create this since the user startup folder is only created after first login
|
||||
# Alternative is to add is to all user startup
|
||||
mkdir $startupfolder
|
||||
#copy
|
||||
$down = $helpfile
|
||||
Write-Host $down
|
||||
Write-Host $startmenu
|
||||
ls $startmenu
|
||||
Write-Host $startupfolder
|
||||
ls $startupfolder
|
||||
cp -Verbose $down $startupfolder
|
||||
cp -Verbose $down $desktop
|
||||
|
||||
#create shortcut to solution folder on desktop
|
||||
$WsShell = New-Object -ComObject WScript.Shell
|
||||
$shortcut = $WsShell.CreateShortcut($desktop + "LoanChargeOff.lnk")
|
||||
$shortcut.TargetPath = $solutionPath
|
||||
$shortcut.Save()
|
|
@ -1,3 +1,12 @@
|
|||
/*
|
||||
* SQL Script to create tables required for training, testing and scoring
|
||||
* of models.
|
||||
* It creates tables for member_info, loan_info and payments_info tables.
|
||||
* It also creates tables to store models with evaluation stats, selected features
|
||||
* and prediction tables.
|
||||
* $(datasize) is substituted through Invoke-SqlCmd's Variable option
|
||||
* (in powershell).
|
||||
*/
|
||||
SET ansi_nulls on
|
||||
GO
|
||||
SET quoted_identifier on
|
||||
|
@ -5,9 +14,9 @@ GO
|
|||
|
||||
/* Create the member_info Table. */
|
||||
|
||||
DROP TABLE IF EXISTS member_info_10k
|
||||
DROP TABLE IF EXISTS member_info_$(datasize)
|
||||
|
||||
CREATE TABLE [member_info_10k](
|
||||
CREATE TABLE [member_info_$(datasize)](
|
||||
[memberId] [int],
|
||||
[residentialState] [nvarchar](4),
|
||||
[annualIncome] [real],
|
||||
|
@ -27,13 +36,13 @@ CREATE TABLE [member_info_10k](
|
|||
[numChargeoff1year] [int]
|
||||
);
|
||||
|
||||
CREATE CLUSTERED COLUMNSTORE INDEX member_info_10k_cci ON member_info_10k WITH (DROP_EXISTING = OFF);
|
||||
CREATE CLUSTERED COLUMNSTORE INDEX member_info_$(datasize)_cci ON member_info_$(datasize) WITH (DROP_EXISTING = OFF);
|
||||
GO
|
||||
/* Create the loan_info Table. */
|
||||
|
||||
DROP TABLE IF EXISTS loan_info_10k
|
||||
DROP TABLE IF EXISTS loan_info_$(datasize)
|
||||
|
||||
CREATE TABLE [loan_info_10k](
|
||||
CREATE TABLE [loan_info_$(datasize)](
|
||||
[loanId] [int],
|
||||
[loan_open_date] [datetime],
|
||||
[memberId] [int],
|
||||
|
@ -46,13 +55,13 @@ CREATE TABLE [loan_info_10k](
|
|||
[purpose] [nvarchar](255)
|
||||
);
|
||||
|
||||
CREATE CLUSTERED COLUMNSTORE INDEX loan_info_10k_cci ON loan_info_10k WITH (DROP_EXISTING = OFF);
|
||||
CREATE CLUSTERED COLUMNSTORE INDEX loan_info_$(datasize)_cci ON loan_info_$(datasize) WITH (DROP_EXISTING = OFF);
|
||||
GO
|
||||
/* Create the payments_info Table*/
|
||||
|
||||
DROP TABLE IF EXISTS payments_info_10k
|
||||
DROP TABLE IF EXISTS payments_info_$(datasize)
|
||||
|
||||
CREATE TABLE [payments_info_10k](
|
||||
CREATE TABLE [payments_info_$(datasize)](
|
||||
[loanId] [int],
|
||||
[payment_date] [datetime],
|
||||
[payment] [real],
|
||||
|
@ -62,12 +71,12 @@ CREATE TABLE [payments_info_10k](
|
|||
[charged_off] [bit]
|
||||
);
|
||||
|
||||
CREATE CLUSTERED COLUMNSTORE INDEX payments_info_10k_cci ON payments_info_10k WITH (DROP_EXISTING = OFF);
|
||||
CREATE CLUSTERED COLUMNSTORE INDEX payments_info_$(datasize)_cci ON payments_info_$(datasize) WITH (DROP_EXISTING = OFF);
|
||||
GO
|
||||
|
||||
DROP TABLE IF EXISTS [loan_chargeoff_models_10k];
|
||||
DROP TABLE IF EXISTS [loan_chargeoff_models_$(datasize)];
|
||||
|
||||
CREATE TABLE [loan_chargeoff_models_10k]
|
||||
CREATE TABLE [loan_chargeoff_models_$(datasize)]
|
||||
(
|
||||
[model_name] varchar(30) not null default('default model') primary key,
|
||||
[model] varbinary(max) not null,
|
||||
|
@ -80,17 +89,17 @@ CREATE TABLE [loan_chargeoff_models_10k]
|
|||
);
|
||||
GO
|
||||
|
||||
DROP TABLE IF EXISTS [selected_features_10k];
|
||||
DROP TABLE IF EXISTS [selected_features_$(datasize)];
|
||||
|
||||
CREATE TABLE [selected_features_10k](
|
||||
CREATE TABLE [selected_features_$(datasize)](
|
||||
[feature_id] [int] IDENTITY(1,1) NOT NULL,
|
||||
[feature_name] [nvarchar](500) NOT NULL
|
||||
);
|
||||
GO
|
||||
|
||||
DROP TABLE IF EXISTS [loan_chargeoff_prediction_10k]
|
||||
DROP TABLE IF EXISTS [loan_chargeoff_prediction_$(datasize)]
|
||||
|
||||
CREATE TABLE [loan_chargeoff_prediction_10k](
|
||||
CREATE TABLE [loan_chargeoff_prediction_$(datasize)](
|
||||
[memberId] [int],
|
||||
[loanId] [int],
|
||||
[payment_date] [date],
|
|
@ -0,0 +1,144 @@
|
|||
/*
|
||||
* SQL script to create views with feature and label columns for training, testing and prediction.
|
||||
* We also persist these views to physical tables for faster training/scoring times.
|
||||
* If there is not much data these views can be used directly.
|
||||
* $(datasize) is substituted through Invoke-SqlCmd's Variable option
|
||||
* (in powershell).
|
||||
*/
|
||||
-- View over the underlying table for features and labels required
|
||||
drop view if exists vw_loan_chargeoff_train_$(datasize)
|
||||
go
|
||||
create view vw_loan_chargeoff_train_$(datasize)
|
||||
as
|
||||
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
|
||||
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
|
||||
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
|
||||
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
|
||||
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
|
||||
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
|
||||
from
|
||||
(
|
||||
select *,
|
||||
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
|
||||
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
|
||||
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
|
||||
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
|
||||
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
|
||||
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
|
||||
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
|
||||
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
|
||||
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
|
||||
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
|
||||
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
|
||||
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
|
||||
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
|
||||
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
|
||||
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
|
||||
(select MAX(charged_off+0) from payments_info_$(datasize) p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
|
||||
from payments_info_$(datasize) p1 ) AS t inner join loan_info_$(datasize) l ON t.loanId = l.loanId inner join member_info_$(datasize) m ON l.memberId = m.memberId
|
||||
where t.charge_off IS NOT NULL
|
||||
and ((payment_date between '2016-09-12' and '2016-12-12' and charge_off = 1) or (payment_date = '2017-01-12'));
|
||||
go
|
||||
|
||||
drop view if exists vw_loan_chargeoff_test_$(datasize)
|
||||
go
|
||||
create view vw_loan_chargeoff_test_$(datasize)
|
||||
as
|
||||
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
|
||||
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
|
||||
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
|
||||
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
|
||||
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
|
||||
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
|
||||
from
|
||||
(
|
||||
select *,
|
||||
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
|
||||
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
|
||||
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
|
||||
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
|
||||
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
|
||||
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
|
||||
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
|
||||
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
|
||||
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
|
||||
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
|
||||
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
|
||||
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
|
||||
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
|
||||
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
|
||||
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
|
||||
(select MAX(charged_off+0) from payments_info_$(datasize) p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
|
||||
from payments_info_$(datasize) p1 ) AS t inner join loan_info_$(datasize) l ON t.loanId = l.loanId inner join member_info_$(datasize) m ON l.memberId = m.memberId
|
||||
where t.charge_off IS NOT NULL
|
||||
and payment_date = '2017-02-12';
|
||||
go
|
||||
|
||||
drop view if exists vw_loan_chargeoff_score_$(datasize)
|
||||
go
|
||||
create view vw_loan_chargeoff_score_$(datasize)
|
||||
as
|
||||
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
|
||||
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
|
||||
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
|
||||
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
|
||||
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
|
||||
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
|
||||
from
|
||||
(
|
||||
select *,
|
||||
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
|
||||
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
|
||||
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
|
||||
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
|
||||
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
|
||||
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
|
||||
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
|
||||
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
|
||||
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
|
||||
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
|
||||
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
|
||||
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
|
||||
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
|
||||
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
|
||||
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
|
||||
(select MAX(charged_off+0) from payments_info_$(datasize) p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
|
||||
from payments_info_$(datasize) p1 ) AS t inner join loan_info_$(datasize) l ON t.loanId = l.loanId inner join member_info_$(datasize) m ON l.memberId = m.memberId
|
||||
where t.charge_off IS NOT NULL
|
||||
and payment_date > '2017-02-12';
|
||||
go
|
||||
|
||||
|
||||
-- persist the view in case of large dataset in order to get faster results
|
||||
drop table if exists [loan_chargeoff_train_$(datasize)]
|
||||
go
|
||||
|
||||
select *
|
||||
into [loan_chargeoff_train_$(datasize)]
|
||||
from [vw_loan_chargeoff_train_$(datasize)]
|
||||
go
|
||||
|
||||
create clustered columnstore index [cci_loan_chargeoff_train_$(datasize)] on [loan_chargeoff_train_$(datasize)]
|
||||
go
|
||||
|
||||
drop table if exists [loan_chargeoff_test_$(datasize)]
|
||||
go
|
||||
|
||||
select *
|
||||
into [loan_chargeoff_test_$(datasize)]
|
||||
from [vw_loan_chargeoff_test_$(datasize)]
|
||||
go
|
||||
|
||||
create clustered columnstore index [cci_loan_chargeoff_test_$(datasize)] on [loan_chargeoff_test_$(datasize)]
|
||||
go
|
||||
|
||||
drop table if exists [loan_chargeoff_score_$(datasize)]
|
||||
go
|
||||
|
||||
select *
|
||||
into [loan_chargeoff_score_$(datasize)]
|
||||
from [vw_loan_chargeoff_score_$(datasize)]
|
||||
go
|
||||
|
||||
create clustered columnstore index [cci_loan_chargeoff_score_$(datasize)] on [loan_chargeoff_score_$(datasize)]
|
||||
go
|
|
@ -1,3 +1,9 @@
|
|||
/*
|
||||
* SQLR script to demonstrate feature selection available in MicrosoftML package.
|
||||
* We use this same mechanism during training so this step is optional to run, but
|
||||
* serves as an example of an approach for feature selection, i.e., preselect features
|
||||
* and store in database table for later use in training of models.
|
||||
*/
|
||||
SET ANSI_NULLS ON
|
||||
GO
|
||||
SET QUOTED_IDENTIFIER ON
|
||||
|
@ -6,6 +12,14 @@ GO
|
|||
DROP PROCEDURE IF EXISTS [dbo].[select_features];
|
||||
GO
|
||||
|
||||
/*
|
||||
* Stored procedure for feature selection.
|
||||
* Parameters:
|
||||
* @training_set_table - table with training data
|
||||
* @test_set_table - table with test data
|
||||
* @selected_features_table - table to store selected features in
|
||||
* @connectionString - connection string to connect to the database for use in the R script
|
||||
*/
|
||||
CREATE PROCEDURE [select_features] @training_set_table varchar(100), @test_set_table varchar(100), @selected_features_table varchar(100), @connectionString varchar(300)
|
||||
AS
|
||||
BEGIN
|
|
@ -1,4 +1,6 @@
|
|||
/****** Stored Procedure to train models ******/
|
||||
/*
|
||||
* SQLR script to create stored procedure for training.
|
||||
*/
|
||||
SET ANSI_NULLS ON
|
||||
GO
|
||||
SET QUOTED_IDENTIFIER ON
|
||||
|
@ -7,7 +9,20 @@ GO
|
|||
DROP PROCEDURE IF EXISTS [dbo].[train_model];
|
||||
GO
|
||||
|
||||
CREATE PROCEDURE [train_model] @training_set_table varchar(100), @test_set_table varchar(100), @scored_table varchar(100), @model_table varchar(100), @model_name_param varchar(50), @connectionString varchar(300)
|
||||
/*
|
||||
* Stored Procedure for training of models using MicrosoftML algorithms. This also evaluates the models and stores
|
||||
* the following stats along with serialized model binary, accuracy, auc, precision, recall, f1score.
|
||||
* The parameters can be tuned for various algorithms based on performance on your data.
|
||||
* Parameters:
|
||||
* @training_set_table - training data table name
|
||||
* @test_set_table - test data table name for model evaluation
|
||||
* @scored_table - table to store scores in when doing model evaluation
|
||||
* @model_table - table to store model in serialized binary format along with evaluation stats
|
||||
* @model_name_param - the algorithm to use for training the model.
|
||||
* Can be one of 'logistic_reg', 'fast_trees', 'fast_forest', 'fast_linear', 'neural_net'
|
||||
* @connectionString - connection string to connect to the database for use in the R script
|
||||
*/
|
||||
CREATE PROCEDURE [train_model] @training_set_table varchar(100), @test_set_table varchar(100), @scored_table varchar(100), @model_table varchar(100), @model_alg varchar(50), @connectionString varchar(300)
|
||||
AS
|
||||
BEGIN
|
||||
|
||||
|
@ -89,7 +104,7 @@ stat_f1score <- model_stats[[5]]
|
|||
'
|
||||
, @params = N'@model_name varchar(20), @connection_string varchar(300), @train_set varchar(100), @test_set varchar(100), @score_set varchar(100),
|
||||
@modelbin varbinary(max) OUTPUT, @stat_auc real OUTPUT, @stat_accuracy real OUTPUT, @stat_precision real OUTPUT, @stat_recall real OUTPUT, @stat_f1score real OUTPUT'
|
||||
, @model_name = @model_name_param
|
||||
, @model_name = @model_alg
|
||||
, @connection_string = @connectionString
|
||||
, @train_set = @training_set_table
|
||||
, @test_set = @test_set_table
|
||||
|
@ -101,9 +116,9 @@ stat_f1score <- model_stats[[5]]
|
|||
, @stat_recall = @recall OUTPUT
|
||||
, @stat_f1score = @f1score OUTPUT;
|
||||
|
||||
SET @del_cmd = N'DELETE FROM ' + @model_table + N' WHERE model_name = ''' + @model_name_param + ''''
|
||||
SET @del_cmd = N'DELETE FROM ' + @model_table + N' WHERE model_name = ''' + @model_alg + ''''
|
||||
EXEC sp_executesql @del_cmd;
|
||||
SET @ins_cmd = N'INSERT INTO ' + @model_table + N' (model_name, model, auc, accuracy, precision, recall, f1score) VALUES (''' + @model_name_param + ''', @p_payload, @p_auc, @p_accuracy, @p_precision, @p_recall, @p_f1score)'
|
||||
SET @ins_cmd = N'INSERT INTO ' + @model_table + N' (model_name, model, auc, accuracy, precision, recall, f1score) VALUES (''' + @model_alg + ''', @p_payload, @p_auc, @p_accuracy, @p_precision, @p_recall, @p_f1score)'
|
||||
SET @param_def = N'@p_payload varbinary(max),
|
||||
@p_auc real,
|
||||
@p_accuracy real,
|
|
@ -1,4 +1,6 @@
|
|||
|
||||
/*
|
||||
* SQLR script to do batch scoring.
|
||||
*/
|
||||
SET ANSI_NULLS ON
|
||||
GO
|
||||
SET QUOTED_IDENTIFIER ON
|
||||
|
@ -7,6 +9,14 @@ GO
|
|||
DROP PROCEDURE IF EXISTS [dbo].[predict_chargeoff]
|
||||
GO
|
||||
|
||||
/*
|
||||
* Stored Procedure to do batch scoring using the 'best model' based on f1score.
|
||||
* Parameters:
|
||||
* @score_table - Table with data to score/make prediction on
|
||||
* @score_prediction_table - Table to store predictions
|
||||
* @models_table - Table which has serialized binary models stored along with evaluation stats (during training step)
|
||||
* @connectionString - connection string to connect to the database for use in the R script
|
||||
*/
|
||||
CREATE PROCEDURE [predict_chargeoff] @score_table varchar(100), @score_prediction_table varchar(100), @models_table varchar(100), @connectionString varchar(300)
|
||||
|
||||
AS
|
|
@ -1,9 +1,16 @@
|
|||
|
||||
/*
|
||||
* SQLR script to do on demand scoring/prediction of one record.
|
||||
*/
|
||||
SET ANSI_NULLS ON
|
||||
GO
|
||||
SET QUOTED_IDENTIFIER ON
|
||||
GO
|
||||
|
||||
/*
|
||||
* Stored Procedure for on demand scoring/prediction using the 'best model' based on f1score.
|
||||
* Parameters:
|
||||
* @models_table - Table which has serialized binary models stored along with evaluation stats (during training step)
|
||||
* Rest of the parameters are the features used during training.
|
||||
*/
|
||||
DROP PROCEDURE IF EXISTS [dbo].[predict_chargeoff_ondemand]
|
||||
GO
|
||||
|
Загрузка…
Ссылка в новой задаче