added copyright & license
This commit is contained in:
Родитель
2bcc1e851a
Коммит
e493818b8d
|
@ -16,6 +16,16 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Copyright (c) Microsoft Corporation.\n",
|
||||||
|
"# Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
@ -27,8 +37,7 @@
|
||||||
"2. Load the datasets\n",
|
"2. Load the datasets\n",
|
||||||
"3. Convert string to list and remove punctuations from text\n",
|
"3. Convert string to list and remove punctuations from text\n",
|
||||||
"4. Store datasets\n",
|
"4. Store datasets\n",
|
||||||
"\n",
|
"\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -49,8 +58,7 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Define variables (input folder, datasets, ....)\n",
|
"### Define variables (input folder, datasets, ....)\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -72,8 +80,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"# Azure Storage path\n",
|
"# Azure Storage path\n",
|
||||||
"adls_path = \"abfss://%s@%s.dfs.core.windows.net/MicrosoftNewsDataset/\" % (container, account_name)\n",
|
"adls_path = \"abfss://%s@%s.dfs.core.windows.net/MicrosoftNewsDataset/\" % (container, account_name)\n",
|
||||||
"\n",
|
"\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -253,16 +260,14 @@
|
||||||
" df_preprocess = df_preprocess.dropna()\n",
|
" df_preprocess = df_preprocess.dropna()\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Keep results in dictionary\n",
|
" # Keep results in dictionary\n",
|
||||||
" results[key] = df_preprocess\n",
|
" results[key] = df_preprocess\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Store datasets\n",
|
"## Store datasets\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -274,9 +279,8 @@
|
||||||
"# Write dataset to spark table\n",
|
"# Write dataset to spark table\n",
|
||||||
"results['train'].write.mode('overwrite').saveAsTable('default.ActivityTrain')\n",
|
"results['train'].write.mode('overwrite').saveAsTable('default.ActivityTrain')\n",
|
||||||
"results['test'].write.mode('overwrite').saveAsTable('default.ActivityTest')\n",
|
"results['test'].write.mode('overwrite').saveAsTable('default.ActivityTest')\n",
|
||||||
"results['dev'].write.mode('overwrite').saveAsTable('default.ActivityDev')\n",
|
"results['dev'].write.mode('overwrite').saveAsTable('default.ActivityDev')\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
|
@ -16,6 +16,16 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Copyright (c) Microsoft Corporation.\n",
|
||||||
|
"# Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
@ -29,8 +39,7 @@
|
||||||
"4. Train model\n",
|
"4. Train model\n",
|
||||||
"5. Test model\n",
|
"5. Test model\n",
|
||||||
"6. Store model and transformer\n",
|
"6. Store model and transformer\n",
|
||||||
"\n",
|
"\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -58,8 +67,7 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Define variables (input folder, datasets, ....)\n",
|
"## Define variables (input folder, datasets, ....)\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -187,8 +195,7 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Define and Train the model\n",
|
"## Define and Train the model\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -277,16 +284,14 @@
|
||||||
"f1, weightedPrecision, weightedRecall, auc = evaluate_model(model,df_dev_feature)\n",
|
"f1, weightedPrecision, weightedRecall, auc = evaluate_model(model,df_dev_feature)\n",
|
||||||
"print('DEV AUC:', auc)\n",
|
"print('DEV AUC:', auc)\n",
|
||||||
"print('DEV F1:', f1)\n",
|
"print('DEV F1:', f1)\n",
|
||||||
"print('DEV Precision:', weightedPrecision, 'DEV Recall:',weightedRecall)\n",
|
"print('DEV Precision:', weightedPrecision, 'DEV Recall:',weightedRecall)\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Store fitted feature processor and model\n",
|
"## Store fitted feature processor and model\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -297,8 +302,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# store model\n",
|
"# store model\n",
|
||||||
"fitted_processor.write().overwrite().save(feature_processor_name)\n",
|
"fitted_processor.write().overwrite().save(feature_processor_name)\n",
|
||||||
"model.write().overwrite().save(model_name)\n",
|
"model.write().overwrite().save(model_name)\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -16,6 +16,16 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Copyright (c) Microsoft Corporation.\n",
|
||||||
|
"# Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
@ -29,8 +39,7 @@
|
||||||
"3. Apply model to dataset\n",
|
"3. Apply model to dataset\n",
|
||||||
"4. Cleanup results and store model\n",
|
"4. Cleanup results and store model\n",
|
||||||
"5. Sample queries\n",
|
"5. Sample queries\n",
|
||||||
"\n",
|
"\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -49,8 +58,7 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Define variables \n",
|
"## Define variables \n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -65,16 +73,14 @@
|
||||||
"feature_processor_name = 'feature_proprecssor.mml'\n",
|
"feature_processor_name = 'feature_proprecssor.mml'\n",
|
||||||
"col_user = 'User_ID'\n",
|
"col_user = 'User_ID'\n",
|
||||||
"col_item = 'Article_ID'\n",
|
"col_item = 'Article_ID'\n",
|
||||||
"dataset_test = 'default.activitytest'\n",
|
"dataset_test = 'default.activitytest'\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Load dataset, feature processor and model\n",
|
"## Load dataset, feature processor and model\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -104,8 +110,7 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Apply feature processor and model to dataset\n",
|
"## Apply feature processor and model to dataset\n"
|
||||||
""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -249,4 +254,4 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
Загрузка…
Ссылка в новой задаче