This commit is contained in:
Eqbal Zaffar 2017-06-28 18:47:38 -07:00
Родитель 9f482c4c97
Коммит 26450c2eab
1 изменённых файлов: 30 добавлений и 170 удалений

Просмотреть файл

@ -118,7 +118,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Elapsed time: 00:00:00.6056614\n" "Elapsed time: 00:00:00.6347310\n"
] ]
}, },
{ {
@ -349,200 +349,60 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"[1] \"Starting to train with logistic_reg\"\n", "[1] \"Starting to train with logistic_reg\"\n",
"Elapsed time: 00:00:00.0261225\n", "Elapsed time: 00:00:00.0377952\n",
"[1] \"Done training.\"\n" "[1] \"Done training.\"\n",
] "[1] \"Done writing predictions for evaluation of model.\"\n",
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning message in `[<-`(`*tmp*`, \"outputDataSource\", value = <S4 object of class structure(\"RxSqlServerData\", package = \"RevoScaleR\")>):\n",
"\"implicit list embedding of S4 objects is deprecated\""
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1] \"Done writing predictions for evaluation of model.\"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning message in `[<-`(`*tmp*`, \"outputDataSource\", value = <S4 object of class structure(\"RxSqlServerData\", package = \"RevoScaleR\")>):\n",
"\"implicit list embedding of S4 objects is deprecated\""
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Rows Read: 101, Total Rows Processed: 101, Total Chunk Time: 0.007 seconds \n", "Rows Read: 101, Total Rows Processed: 101, Total Chunk Time: 0.007 seconds \n",
"[1] \"Starting to train with fast_trees\"\n", "[1] \"Starting to train with fast_trees\"\n",
"[1] \"Done training.\"\n", "[1] \"Done training.\"\n",
"Elapsed time: 00:00:00.0465263\n" "Elapsed time: 00:00:00.0766733\n",
] "[1] \"Done writing predictions for evaluation of model.\"\n",
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning message in `[<-`(`*tmp*`, \"outputDataSource\", value = <S4 object of class structure(\"RxSqlServerData\", package = \"RevoScaleR\")>):\n",
"\"implicit list embedding of S4 objects is deprecated\""
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1] \"Done writing predictions for evaluation of model.\"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning message in `[<-`(`*tmp*`, \"outputDataSource\", value = <S4 object of class structure(\"RxSqlServerData\", package = \"RevoScaleR\")>):\n",
"\"implicit list embedding of S4 objects is deprecated\""
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Rows Read: 101, Total Rows Processed: 101, Total Chunk Time: 0.007 seconds \n", "Rows Read: 101, Total Rows Processed: 101, Total Chunk Time: 0.007 seconds \n",
"[1] \"Starting to train with fast_forest\"\n", "[1] \"Starting to train with fast_forest\"\n",
"[1] \"Done training.\"\n", "[1] \"Done training.\"\n",
"Elapsed time: 00:00:00.0237266\n" "Elapsed time: 00:00:00.0455352\n",
] "[1] \"Done writing predictions for evaluation of model.\"\n",
}, "Rows Read: 101, Total Rows Processed: 101, Total Chunk Time: 0.007 seconds \n",
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning message in `[<-`(`*tmp*`, \"outputDataSource\", value = <S4 object of class structure(\"RxSqlServerData\", package = \"RevoScaleR\")>):\n",
"\"implicit list embedding of S4 objects is deprecated\""
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1] \"Done writing predictions for evaluation of model.\"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning message in `[<-`(`*tmp*`, \"outputDataSource\", value = <S4 object of class structure(\"RxSqlServerData\", package = \"RevoScaleR\")>):\n",
"\"implicit list embedding of S4 objects is deprecated\""
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Rows Read: 101, Total Rows Processed: 101, Total Chunk Time: 0.006 seconds \n",
"[1] \"Starting to train with fast_linear\"\n", "[1] \"Starting to train with fast_linear\"\n",
"Elapsed time: 00:00:00.0715138\n", "Elapsed time: 00:00:00.0402667\n",
"[1] \"Done training.\"\n" "[1] \"Done training.\"\n",
] "[1] \"Done writing predictions for evaluation of model.\"\n",
}, "Rows Read: 101, Total Rows Processed: 101, Total Chunk Time: 0.007 seconds \n",
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning message in `[<-`(`*tmp*`, \"outputDataSource\", value = <S4 object of class structure(\"RxSqlServerData\", package = \"RevoScaleR\")>):\n",
"\"implicit list embedding of S4 objects is deprecated\""
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1] \"Done writing predictions for evaluation of model.\"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning message in `[<-`(`*tmp*`, \"outputDataSource\", value = <S4 object of class structure(\"RxSqlServerData\", package = \"RevoScaleR\")>):\n",
"\"implicit list embedding of S4 objects is deprecated\""
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Rows Read: 101, Total Rows Processed: 101, Total Chunk Time: 0.006 seconds \n",
"[1] \"Starting to train with neural_net\"\n", "[1] \"Starting to train with neural_net\"\n",
"[1] \"Done training.\"\n", "[1] \"Done training.\"\n",
"Elapsed time: 00:00:00.0719790\n" "Elapsed time: 00:00:00.0938715\n",
] "[1] \"Done writing predictions for evaluation of model.\"\n",
}, "Rows Read: 101, Total Rows Processed: 101, Total Chunk Time: 0.010 seconds \n"
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning message in `[<-`(`*tmp*`, \"outputDataSource\", value = <S4 object of class structure(\"RxSqlServerData\", package = \"RevoScaleR\")>):\n",
"\"implicit list embedding of S4 objects is deprecated\""
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1] \"Done writing predictions for evaluation of model.\"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning message in `[<-`(`*tmp*`, \"outputDataSource\", value = <S4 object of class structure(\"RxSqlServerData\", package = \"RevoScaleR\")>):\n",
"\"implicit list embedding of S4 objects is deprecated\""
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Rows Read: 101, Total Rows Processed: 101, Total Chunk Time: 0.006 seconds \n"
] ]
}, },
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"$model_name\n", "$model_name\n",
"[1] \"fast_trees\"\n", "[1] \"fast_linear\"\n",
"\n", "\n",
"$model\n", "$model\n",
"Call:\n", "Call:\n",
"rxFastTrees(formula = model_formula, data = training_set, mlTransforms = ml_trans)\n", "rxFastLinear(formula = model_formula, data = training_set, mlTransforms = ml_trans)\n",
"\n", "\n",
"FastTreeBinaryClassification (BinaryClassifierTrainer) for: charge_off~payment+past_due+remain_balance+loanAmount+interestRate+grade+term+installment+isJointApplication+purpose+residentialState+branch+annualIncome+yearsEmployment+homeOwnership+incomeVerified+creditScore+dtiRatio+revolvingBalance+revolvingUtilizationRate+numDelinquency2Years+numDerogatoryRec+numInquiries6Mon+lengthCreditHistory+numOpenCreditLines+numTotalCreditLines+numChargeoff1year+payment_1+payment_2+payment_3+payment_4+payment_5+past_due_1+past_due_2+past_due_3+past_due_4+past_due_5+remain_balance_1+remain_balance_2+remain_balance_3+remain_balance_4+remain_balance_5\n", "SDCA (BinaryClassifierTrainer) for: charge_off~payment+past_due+remain_balance+loanAmount+interestRate+grade+term+installment+isJointApplication+purpose+residentialState+branch+annualIncome+yearsEmployment+homeOwnership+incomeVerified+creditScore+dtiRatio+revolvingBalance+revolvingUtilizationRate+numDelinquency2Years+numDerogatoryRec+numInquiries6Mon+lengthCreditHistory+numOpenCreditLines+numTotalCreditLines+numChargeoff1year+payment_1+payment_2+payment_3+payment_4+payment_5+past_due_1+past_due_2+past_due_3+past_due_4+past_due_5+remain_balance_1+remain_balance_2+remain_balance_3+remain_balance_4+remain_balance_5\n",
"Data: training_set (RxSqlServerData Data Source) \n", "Data: training_set (RxSqlServerData Data Source) \n",
"\n", "\n",
"$stats\n", "$stats\n",
"$stats$auc\n", "$stats$auc\n",
"[1] 0.9928972\n", "[1] 0.9975022\n",
"\n", "\n",
"$stats$accuracy\n", "$stats$accuracy\n",
"[1] 0.9936091\n", "[1] 0.993602\n",
"\n", "\n",
"$stats$precision\n", "$stats$precision\n",
"[1] 0.8670886\n", "[1] 0.8452381\n",
"\n", "\n",
"$stats$recall\n", "$stats$recall\n",
"[1] 0.7919075\n", "[1] 0.8208092\n",
"\n", "\n",
"$stats$f1score\n", "$stats$f1score\n",
"[1] 0.8277946\n", "[1] 0.8328446\n",
"\n" "\n"
] ]
}, },
@ -552,7 +412,6 @@
], ],
"source": [ "source": [
"# train on MicrosoftML algorithms\n", "# train on MicrosoftML algorithms\n",
"# Warning message about \"implicit list embedding of S4 objects is deprecated\" is expected\n",
"ml_algs <- c(\"logistic_reg\", \"fast_trees\", \"fast_forest\", \"fast_linear\", \"neural_net\")\n", "ml_algs <- c(\"logistic_reg\", \"fast_trees\", \"fast_forest\", \"fast_linear\", \"neural_net\")\n",
"model_stats <- lapply(ml_algs, train_model)\n", "model_stats <- lapply(ml_algs, train_model)\n",
"\n", "\n",
@ -611,6 +470,7 @@
" \n", " \n",
" # Warning: this will drop and recreate the prediction table\n", " # Warning: this will drop and recreate the prediction table\n",
" rxPredict(best_model$model, scoring_data, outData = prediction_data, extraVarsToWrite = c(\"loanId\", \"payment_date\"), overwrite=TRUE)\n", " rxPredict(best_model$model, scoring_data, outData = prediction_data, extraVarsToWrite = c(\"loanId\", \"payment_date\"), overwrite=TRUE)\n",
" print(\"Completed batch scoring.\")\n",
"}" "}"
] ]
}, },
@ -622,21 +482,21 @@
}, },
"outputs": [ "outputs": [
{ {
"name": "stderr", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Warning message in `[<-`(`*tmp*`, \"outputDataSource\", value = <S4 object of class structure(\"RxSqlServerData\", package = \"RevoScaleR\")>):\n", "[1] \"Completed batch scoring.\"\n"
"\"implicit list embedding of S4 objects is deprecated\""
] ]
} }
], ],
"source": [ "source": [
"# Warning message \"implicit list embedding of S4 objects is deprecated\" is expected.\n", "# call batch scoring function\n",
"batch_score()" "batch_score()"
] ]
} }
], ],
"metadata": { "metadata": {
"anaconda-cloud": {},
"kernelspec": { "kernelspec": {
"display_name": "R", "display_name": "R",
"language": "R", "language": "R",
@ -648,7 +508,7 @@
"mimetype": "text/x-r-source", "mimetype": "text/x-r-source",
"name": "R", "name": "R",
"pygments_lexer": "r", "pygments_lexer": "r",
"version": "3.3.2" "version": "3.3.3"
} }
}, },
"nbformat": 4, "nbformat": 4,