1
0
Форкнуть 0
This commit is contained in:
Matt Savarino 2021-07-20 12:24:44 -07:00
Родитель 2bcc1e851a
Коммит e493818b8d
3 изменённых файлов: 49 добавлений и 36 удалений

Просмотреть файл

@ -16,6 +16,16 @@
} }
}, },
"cells": [ "cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Copyright (c) Microsoft Corporation.\n",
"# Licensed under the MIT License."
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -27,8 +37,7 @@
"2. Load the datasets\n", "2. Load the datasets\n",
"3. Convert string to list and remove punctuations from text\n", "3. Convert string to list and remove punctuations from text\n",
"4. Store datasets\n", "4. Store datasets\n",
"\n", "\n"
""
] ]
}, },
{ {
@ -49,8 +58,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"### Define variables (input folder, datasets, ....)\n", "### Define variables (input folder, datasets, ....)\n"
""
] ]
}, },
{ {
@ -72,8 +80,7 @@
"\n", "\n",
"# Azure Storage path\n", "# Azure Storage path\n",
"adls_path = \"abfss://%s@%s.dfs.core.windows.net/MicrosoftNewsDataset/\" % (container, account_name)\n", "adls_path = \"abfss://%s@%s.dfs.core.windows.net/MicrosoftNewsDataset/\" % (container, account_name)\n",
"\n", "\n"
""
] ]
}, },
{ {
@ -253,16 +260,14 @@
" df_preprocess = df_preprocess.dropna()\n", " df_preprocess = df_preprocess.dropna()\n",
"\n", "\n",
" # Keep results in dictionary\n", " # Keep results in dictionary\n",
" results[key] = df_preprocess\n", " results[key] = df_preprocess\n"
""
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Store datasets\n", "## Store datasets\n"
""
] ]
}, },
{ {
@ -274,9 +279,8 @@
"# Write dataset to spark table\n", "# Write dataset to spark table\n",
"results['train'].write.mode('overwrite').saveAsTable('default.ActivityTrain')\n", "results['train'].write.mode('overwrite').saveAsTable('default.ActivityTrain')\n",
"results['test'].write.mode('overwrite').saveAsTable('default.ActivityTest')\n", "results['test'].write.mode('overwrite').saveAsTable('default.ActivityTest')\n",
"results['dev'].write.mode('overwrite').saveAsTable('default.ActivityDev')\n", "results['dev'].write.mode('overwrite').saveAsTable('default.ActivityDev')\n"
""
] ]
} }
] ]
} }

Просмотреть файл

@ -16,6 +16,16 @@
} }
}, },
"cells": [ "cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Copyright (c) Microsoft Corporation.\n",
"# Licensed under the MIT License."
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -29,8 +39,7 @@
"4. Train model\n", "4. Train model\n",
"5. Test model\n", "5. Test model\n",
"6. Store model and transformer\n", "6. Store model and transformer\n",
"\n", "\n"
""
] ]
}, },
{ {
@ -58,8 +67,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Define variables (input folder, datasets, ....)\n", "## Define variables (input folder, datasets, ....)\n"
""
] ]
}, },
{ {
@ -187,8 +195,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Define and Train the model\n", "## Define and Train the model\n"
""
] ]
}, },
{ {
@ -277,16 +284,14 @@
"f1, weightedPrecision, weightedRecall, auc = evaluate_model(model,df_dev_feature)\n", "f1, weightedPrecision, weightedRecall, auc = evaluate_model(model,df_dev_feature)\n",
"print('DEV AUC:', auc)\n", "print('DEV AUC:', auc)\n",
"print('DEV F1:', f1)\n", "print('DEV F1:', f1)\n",
"print('DEV Precision:', weightedPrecision, 'DEV Recall:',weightedRecall)\n", "print('DEV Precision:', weightedPrecision, 'DEV Recall:',weightedRecall)\n"
""
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Store fitted feature processor and model\n", "## Store fitted feature processor and model\n"
""
] ]
}, },
{ {
@ -297,8 +302,7 @@
"source": [ "source": [
"# store model\n", "# store model\n",
"fitted_processor.write().overwrite().save(feature_processor_name)\n", "fitted_processor.write().overwrite().save(feature_processor_name)\n",
"model.write().overwrite().save(model_name)\n", "model.write().overwrite().save(model_name)\n"
""
] ]
} }
] ]

Просмотреть файл

@ -16,6 +16,16 @@
} }
}, },
"cells": [ "cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Copyright (c) Microsoft Corporation.\n",
"# Licensed under the MIT License."
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -29,8 +39,7 @@
"3. Apply model to dataset\n", "3. Apply model to dataset\n",
"4. Cleanup results and store model\n", "4. Cleanup results and store model\n",
"5. Sample queries\n", "5. Sample queries\n",
"\n", "\n"
""
] ]
}, },
{ {
@ -49,8 +58,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Define variables \n", "## Define variables \n"
""
] ]
}, },
{ {
@ -65,16 +73,14 @@
"feature_processor_name = 'feature_proprecssor.mml'\n", "feature_processor_name = 'feature_proprecssor.mml'\n",
"col_user = 'User_ID'\n", "col_user = 'User_ID'\n",
"col_item = 'Article_ID'\n", "col_item = 'Article_ID'\n",
"dataset_test = 'default.activitytest'\n", "dataset_test = 'default.activitytest'\n"
""
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Load dataset, feature processor and model\n", "## Load dataset, feature processor and model\n"
""
] ]
}, },
{ {
@ -104,8 +110,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Apply feature processor and model to dataset\n", "## Apply feature processor and model to dataset\n"
""
] ]
}, },
{ {
@ -249,4 +254,4 @@
] ]
} }
] ]
} }