This commit is contained in:
Eqbal Zaffar 2017-06-15 20:06:06 -07:00
Родитель 7cd4e6aea0
Коммит d2dd81eb34
6 изменённых файлов: 164 добавлений и 405 удалений

Просмотреть файл

@ -70,9 +70,11 @@ function ExecuteSQL
{
param(
[String]
$sqlscript
$sqlscript,
[String]
$VariableArray=""
)
Invoke-Sqlcmd -ServerInstance $ServerName -Database $DBName -Username $username -Password $password -InputFile $sqlscript -QueryTimeout 200000
Invoke-Sqlcmd -ServerInstance $ServerName -Database $DBName -Username $username -Password $password -InputFile $sqlscript -Variable $VariableArray -QueryTimeout 200000
}
##########################################################################
# Function wrapper to invoke SQL query
@ -139,8 +141,8 @@ if ($uninterrupted -eq 'y' -or $uninterrupted -eq 'Y')
{
# create training and test tables
Write-Host -ForeGroundColor 'green' ("Create SQL tables: member_info, loan_info, payments_info")
$script = $filePath + "step1_create_tables" + $table_suffix + ".sql"
ExecuteSQL $script
$script = $filePath + "step1_create_tables.sql"
ExecuteSQL $script "datasize = $dataSize"
Write-Host -ForeGroundColor 'green' ("Populate SQL tables: member_info, loan_info, payments_info")
$dataList = "member_info", "loan_info", "payments_info"
@ -163,8 +165,8 @@ if ($uninterrupted -eq 'y' -or $uninterrupted -eq 'Y')
# create the views for features and label with training, test and scoring split
Write-Host -ForeGroundColor 'magenta'(" Creating features label view and persisting...")
$script = $filepath + "step2_features_label_view" + $table_suffix + ".sql"
ExecuteSQL $script
$script = $filepath + "step2_features_label_view.sql"
ExecuteSQL $script "datasize = $dataSize"
Write-Host -ForeGroundColor 'magenta'(" Done creating features label view and persisting...")
# create the stored procedure for training
@ -228,8 +230,8 @@ if ($ans -eq 'y' -or $ans -eq 'Y')
{
# create training and test tables
Write-Host -ForeGroundColor 'green' ("Create SQL tables: member_info, loan_info, payments_info")
$script = $filePath + "step1_create_tables" + $table_suffix + ".sql"
ExecuteSQL $script
$script = $filePath + "step1_create_tables.sql"
ExecuteSQL $script "datasize = $dataSize"
Write-Host -ForeGroundColor 'green' ("Populate SQL tables: member_info, loan_info, payments_info")
$dataList = "member_info", "loan_info", "payments_info"
@ -269,8 +271,8 @@ if ($ans -eq 'y' -or $ans -eq 'Y')
{
# create features, labels view
Write-Host -ForeGroundColor 'Cyan' (" Creating feature/label views...")
$script = $filepath + "step2_features_label_view" + $table_suffix + ".sql"
ExecuteSQL $script
$script = $filepath + "step2_features_label_view.sql"
ExecuteSQL $script "datasize = $dataSize"
}
##########################################################################

Просмотреть файл

@ -5,9 +5,9 @@ GO
/* Create the member_info Table. */
DROP TABLE IF EXISTS member_info_10k
DROP TABLE IF EXISTS member_info_$(datasize)
CREATE TABLE [member_info_10k](
CREATE TABLE [member_info_$(datasize)](
[memberId] [int],
[residentialState] [nvarchar](4),
[annualIncome] [real],
@ -27,13 +27,13 @@ CREATE TABLE [member_info_10k](
[numChargeoff1year] [int]
);
CREATE CLUSTERED COLUMNSTORE INDEX member_info_10k_cci ON member_info_10k WITH (DROP_EXISTING = OFF);
CREATE CLUSTERED COLUMNSTORE INDEX member_info_$(datasize)_cci ON member_info_$(datasize) WITH (DROP_EXISTING = OFF);
GO
/* Create the loan_info Table. */
DROP TABLE IF EXISTS loan_info_10k
DROP TABLE IF EXISTS loan_info_$(datasize)
CREATE TABLE [loan_info_10k](
CREATE TABLE [loan_info_$(datasize)](
[loanId] [int],
[loan_open_date] [datetime],
[memberId] [int],
@ -46,13 +46,13 @@ CREATE TABLE [loan_info_10k](
[purpose] [nvarchar](255)
);
CREATE CLUSTERED COLUMNSTORE INDEX loan_info_10k_cci ON loan_info_10k WITH (DROP_EXISTING = OFF);
CREATE CLUSTERED COLUMNSTORE INDEX loan_info_$(datasize)_cci ON loan_info_$(datasize) WITH (DROP_EXISTING = OFF);
GO
/* Create the payments_info Table*/
DROP TABLE IF EXISTS payments_info_10k
DROP TABLE IF EXISTS payments_info_$(datasize)
CREATE TABLE [payments_info_10k](
CREATE TABLE [payments_info_$(datasize)](
[loanId] [int],
[payment_date] [datetime],
[payment] [real],
@ -62,12 +62,12 @@ CREATE TABLE [payments_info_10k](
[charged_off] [bit]
);
CREATE CLUSTERED COLUMNSTORE INDEX payments_info_10k_cci ON payments_info_10k WITH (DROP_EXISTING = OFF);
CREATE CLUSTERED COLUMNSTORE INDEX payments_info_$(datasize)_cci ON payments_info_$(datasize) WITH (DROP_EXISTING = OFF);
GO
DROP TABLE IF EXISTS [loan_chargeoff_models_10k];
DROP TABLE IF EXISTS [loan_chargeoff_models_$(datasize)];
CREATE TABLE [loan_chargeoff_models_10k]
CREATE TABLE [loan_chargeoff_models_$(datasize)]
(
[model_name] varchar(30) not null default('default model') primary key,
[model] varbinary(max) not null,
@ -80,17 +80,17 @@ CREATE TABLE [loan_chargeoff_models_10k]
);
GO
DROP TABLE IF EXISTS [selected_features_10k];
DROP TABLE IF EXISTS [selected_features_$(datasize)];
CREATE TABLE [selected_features_10k](
CREATE TABLE [selected_features_$(datasize)](
[feature_id] [int] IDENTITY(1,1) NOT NULL,
[feature_name] [nvarchar](500) NOT NULL
);
GO
DROP TABLE IF EXISTS [loan_chargeoff_prediction_10k]
DROP TABLE IF EXISTS [loan_chargeoff_prediction_$(datasize)]
CREATE TABLE [loan_chargeoff_prediction_10k](
CREATE TABLE [loan_chargeoff_prediction_$(datasize)](
[memberId] [int],
[loanId] [int],
[payment_date] [date],

Просмотреть файл

@ -1,103 +0,0 @@
SET ansi_nulls on
GO
SET quoted_identifier on
GO
/* Create the member_info Table. */
/* Large DataSets */
DROP TABLE IF EXISTS member_info_1m
CREATE TABLE [member_info_1m](
[memberId] [int],
[residentialState] [nvarchar](4),
[annualIncome] [real],
[yearsEmployment] [nvarchar](11),
[homeOwnership] [nvarchar](10),
[incomeVerified] [bit],
[creditScore] [int],
[dtiRatio] [real],
[revolvingBalance] [real],
[revolvingUtilizationRate] [real],
[numDelinquency2Years] [int],
[numDerogatoryRec] [int],
[numInquiries6Mon] [int],
[lengthCreditHistory] [int],
[numOpenCreditLines] [int],
[numTotalCreditLines] [int],
[numChargeoff1year] [int]
);
CREATE CLUSTERED COLUMNSTORE INDEX member_info_1m_cci ON member_info_1m WITH (DROP_EXISTING = OFF);
GO
/* Create the loan_info Table. */
DROP TABLE IF EXISTS loan_info_1m
CREATE TABLE [loan_info_1m](
[loanId] [int],
[loan_open_date] [datetime],
[memberId] [int],
[loanAmount] [real],
[interestRate] [real],
[grade] [int],
[term] [int],
[installment] [real],
[isJointApplication] [bit],
[purpose] [nvarchar](255)
);
CREATE CLUSTERED COLUMNSTORE INDEX loan_info_1m_cci ON loan_info_1m WITH (DROP_EXISTING = OFF);
GO
/* Create the payments_info Table*/
DROP TABLE IF EXISTS payments_info_1m
CREATE TABLE [payments_info_1m](
[loanId] [int],
[payment_date] [date],
[payment] [real],
[past_due] [real],
[remain_balance] [real],
[closed] [bit],
[charged_off] [bit]
);
CREATE CLUSTERED COLUMNSTORE INDEX payments_info_1m_cci ON payments_info_1m WITH (DROP_EXISTING = OFF);
GO
DROP TABLE IF EXISTS [loan_chargeoff_models_1m];
CREATE TABLE [loan_chargeoff_models_1m]
(
[model_name] varchar(30) not null default('default model') primary key,
[model] varbinary(max) not null,
[auc] real,
[accuracy] real,
[precision] real,
[recall] real,
[f1score] real,
[training_ts] datetime default(GETDATE())
);
GO
DROP TABLE IF EXISTS selected_features_1m;
CREATE TABLE [selected_features_1m](
[feature_id] [int] IDENTITY(1,1) NOT NULL,
[feature_name] [nvarchar](500) NOT NULL
);
GO
DROP TABLE IF EXISTS [loan_chargeoff_prediction_1m]
CREATE TABLE [loan_chargeoff_prediction_1m](
[memberId] [int],
[loanId] [int],
[payment_date] [date],
[prediction_date] [date] default(GETDATE()),
[PredictedLabel] [nvarchar](255),
[Score.1] [float],
[Probability.1] [float]
);
GO

Просмотреть файл

@ -0,0 +1,137 @@
-- View over the underlying table for features and labels required
drop view if exists vw_loan_chargeoff_train_$(datasize)
go
create view vw_loan_chargeoff_train_$(datasize)
as
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
from
(
select *,
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
(select MAX(charged_off+0) from payments_info_$(datasize) p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
from payments_info_$(datasize) p1 ) AS t inner join loan_info_$(datasize) l ON t.loanId = l.loanId inner join member_info_$(datasize) m ON l.memberId = m.memberId
where t.charge_off IS NOT NULL
and ((payment_date between '2016-09-12' and '2016-12-12' and charge_off = 1) or (payment_date = '2017-01-12'));
go
drop view if exists vw_loan_chargeoff_test_$(datasize)
go
create view vw_loan_chargeoff_test_$(datasize)
as
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
from
(
select *,
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
(select MAX(charged_off+0) from payments_info_$(datasize) p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
from payments_info_$(datasize) p1 ) AS t inner join loan_info_$(datasize) l ON t.loanId = l.loanId inner join member_info_$(datasize) m ON l.memberId = m.memberId
where t.charge_off IS NOT NULL
and payment_date = '2017-02-12';
go
drop view if exists vw_loan_chargeoff_score_$(datasize)
go
create view vw_loan_chargeoff_score_$(datasize)
as
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
from
(
select *,
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
(select top 1 payment from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
(select top 1 past_due from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
(select top 1 remain_balance from payments_info_$(datasize) p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
(select MAX(charged_off+0) from payments_info_$(datasize) p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
from payments_info_$(datasize) p1 ) AS t inner join loan_info_$(datasize) l ON t.loanId = l.loanId inner join member_info_$(datasize) m ON l.memberId = m.memberId
where t.charge_off IS NOT NULL
and payment_date > '2017-02-12';
go
-- persist the view in case of large dataset in order to get faster results
drop table if exists [loan_chargeoff_train_$(datasize)]
go
select *
into [loan_chargeoff_train_$(datasize)]
from [vw_loan_chargeoff_train_$(datasize)]
go
create clustered columnstore index [cci_loan_chargeoff_train_$(datasize)] on [loan_chargeoff_train_$(datasize)]
go
drop table if exists [loan_chargeoff_test_$(datasize)]
go
select *
into [loan_chargeoff_test_$(datasize)]
from [vw_loan_chargeoff_test_$(datasize)]
go
create clustered columnstore index [cci_loan_chargeoff_test_$(datasize)] on [loan_chargeoff_test_$(datasize)]
go
drop table if exists [loan_chargeoff_score_$(datasize)]
go
select *
into [loan_chargeoff_score_$(datasize)]
from [vw_loan_chargeoff_score_$(datasize)]
go
create clustered columnstore index [cci_loan_chargeoff_score_$(datasize)] on [loan_chargeoff_score_$(datasize)]
go

Просмотреть файл

@ -1,137 +0,0 @@
-- View over the underlying table for features and labels required
drop view if exists vw_loan_chargeoff_train_10k
go
create view vw_loan_chargeoff_train_10k
as
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
from
(
select *,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
(select MAX(charged_off+0) from payments_info_10k p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
from payments_info_10k p1 ) AS t inner join loan_info_10k l ON t.loanId = l.loanId inner join member_info_10k m ON l.memberId = m.memberId
where t.charge_off IS NOT NULL
and ((payment_date between '2016-09-12' and '2016-12-12' and charge_off = 1) or (payment_date = '2017-01-12'));
go
drop view if exists vw_loan_chargeoff_test_10k
go
create view vw_loan_chargeoff_test_10k
as
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
from
(
select *,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
(select MAX(charged_off+0) from payments_info_10k p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
from payments_info_10k p1 ) AS t inner join loan_info_10k l ON t.loanId = l.loanId inner join member_info_10k m ON l.memberId = m.memberId
where t.charge_off IS NOT NULL
and payment_date = '2017-02-12';
go
drop view if exists vw_loan_chargeoff_score_10k
go
create view vw_loan_chargeoff_score_10k
as
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
from
(
select *,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
(select top 1 payment from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
(select top 1 past_due from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
(select top 1 remain_balance from payments_info_10k p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
(select MAX(charged_off+0) from payments_info_10k p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
from payments_info_10k p1 ) AS t inner join loan_info_10k l ON t.loanId = l.loanId inner join member_info_10k m ON l.memberId = m.memberId
where t.charge_off IS NOT NULL
and payment_date > '2017-02-12';
go
-- persist the view in case of large dataset in order to get faster results
drop table if exists [loan_chargeoff_train_10k]
go
select *
into [loan_chargeoff_train_10k]
from [vw_loan_chargeoff_train_10k]
go
create clustered columnstore index [cci_loan_chargeoff_train_10k] on [loan_chargeoff_train_10k]
go
drop table if exists [loan_chargeoff_test_10k]
go
select *
into [loan_chargeoff_test_10k]
from [vw_loan_chargeoff_test_10k]
go
create clustered columnstore index [cci_loan_chargeoff_test_10k] on [loan_chargeoff_test_10k]
go
drop table if exists [loan_chargeoff_score_10k]
go
select *
into [loan_chargeoff_score_10k]
from [vw_loan_chargeoff_score_10k]
go
create clustered columnstore index [cci_loan_chargeoff_score_10k] on [loan_chargeoff_score_10k]
go

Просмотреть файл

@ -1,140 +0,0 @@
-- View over the underlying table for features and labels required
/* Large DataSets */
drop view if exists [dbo].[vw_loan_chargeoff_1m]
go
create view [dbo].[vw_loan_chargeoff_1m]
as
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
from
(
select *,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
(select MAX(charged_off+0) from payments_info_1m p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
from payments_info_1m p1 ) AS t inner join loan_info_1m l ON t.loanId = l.loanId inner join member_info_1m m ON l.memberId = m.memberId
where t.charge_off IS NOT NULL
and ((payment_date between '2016-09-12' and '2016-12-12' and charge_off = 1) or (payment_date = '2017-01-12'))
GO
drop view if exists [dbo].[vw_loan_chargeoff_test_1m]
go
create view [dbo].[vw_loan_chargeoff_test_1m]
as
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
from
(
select *,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
(select MAX(charged_off+0) from payments_info_1m p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
from payments_info_1m p1 ) AS t inner join loan_info_1m l ON t.loanId = l.loanId inner join member_info_1m m ON l.memberId = m.memberId
where t.charge_off IS NOT NULL
and payment_date = '2017-02-12'
GO
drop view if exists [dbo].[vw_loan_chargeoff_score_1m]
go
create view [dbo].[vw_loan_chargeoff_score_1m]
as
select t.loanId, t.payment_date, t.payment, t.past_due, t.remain_balance,
l.loan_open_date, l.loanAmount,l.interestRate,l.grade,l.term,l.installment,l.isJointApplication,l.purpose,
m.memberId,m.residentialState,m.annualIncome,m.yearsEmployment,m.homeOwnership,m.incomeVerified,m.creditScore,m.dtiRatio,m.revolvingBalance,m.revolvingUtilizationRate,m.numDelinquency2Years,m.numDerogatoryRec,m.numInquiries6Mon,m.lengthCreditHistory,m.numOpenCreditLines,m.numTotalCreditLines,m.numChargeoff1year,
ISNULL(t.payment_1, 0) payment_1,ISNULL(t.payment_2, 0) payment_2,ISNULL(t.payment_3, 0) payment_3,ISNULL(t.payment_4, 0) payment_4,ISNULL(t.payment_5, 0) payment_5,
ISNULL(t.past_due_1, 0) past_due_1,ISNULL(t.past_due_2, 0) past_due_2,ISNULL(t.past_due_3, 0) past_due_3,ISNULL(t.past_due_4, 0) past_due_4,ISNULL(t.past_due_5, 0) past_due_5,
ISNULL(t.remain_balance_1, 0) remain_balance_1,ISNULL(t.remain_balance_2, 0) remain_balance_2,ISNULL(t.remain_balance_3, 0) remain_balance_3,ISNULL(t.remain_balance_4, 0) remain_balance_4,ISNULL(t.remain_balance_5, 0) remain_balance_5, t.charge_off
from
(
select *,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) payment_1,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) payment_2,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) payment_3,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) payment_4,
(select top 1 payment from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) payment_5,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) past_due_1,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) past_due_2,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) past_due_3,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) past_due_4,
(select top 1 past_due from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) past_due_5,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 1 AND p1.loanId = p2.loanId) remain_balance_1,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 2 AND p1.loanId = p2.loanId) remain_balance_2,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 3 AND p1.loanId = p2.loanId) remain_balance_3,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 4 AND p1.loanId = p2.loanId) remain_balance_4,
(select top 1 remain_balance from payments_info_1m p2 where DATEDIFF(month, p2.payment_date,p1.payment_date) = 5 AND p1.loanId = p2.loanId) remain_balance_5,
(select MAX(charged_off+0) from payments_info_1m p2 where DATEDIFF(month, p1.payment_date,p2.payment_date) IN (1,2,3) AND p1.loanId = p2.loanId) charge_off
from payments_info_1m p1 ) AS t inner join loan_info_1m l ON t.loanId = l.loanId inner join member_info_1m m ON l.memberId = m.memberId
where t.charge_off IS NOT NULL
and payment_date > '2017-02-12'
GO
-- persist the view in case of large dataset in order to get faster results
/* Large dataset */
drop table if exists [loan_chargeoff_train_1m]
go
select *
into [loan_chargeoff_train_1m]
from [vw_loan_chargeoff_1m]
go
create clustered columnstore index [cci_loan_chargeoff_train_1m] on [loan_chargeoff_train_1m]
go
drop table if exists [loan_chargeoff_test_1m]
go
select *
into [loan_chargeoff_test_1m]
from [vw_loan_chargeoff_test_1m]
go
create clustered columnstore index [cci_loan_chargeoff_test_1m] on [loan_chargeoff_test_1m]
go
drop table if exists [loan_chargeoff_score_1m]
go
select *
into [loan_chargeoff_score_1m]
from [vw_loan_chargeoff_score_1m]
go
create clustered columnstore index [cci_loan_chargeoff_score_1m] on [loan_chargeoff_score_1m]
go