diff --git a/src/01-Load-Data.ipynb b/src/01-Load-Data.ipynb index 3ed3cee..feb8817 100644 --- a/src/01-Load-Data.ipynb +++ b/src/01-Load-Data.ipynb @@ -16,6 +16,16 @@ } }, "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Copyright (c) Microsoft Corporation.\n", + "# Licensed under the MIT License." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -27,8 +37,7 @@ "2. Load the datasets\n", "3. Convert string to list and remove punctuations from text\n", "4. Store datasets\n", - "\n", - "" + "\n" ] }, { @@ -49,8 +58,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Define variables (input folder, datasets, ....)\n", - "" + "### Define variables (input folder, datasets, ....)\n" ] }, { @@ -72,8 +80,7 @@ "\n", "# Azure Storage path\n", "adls_path = \"abfss://%s@%s.dfs.core.windows.net/MicrosoftNewsDataset/\" % (container, account_name)\n", - "\n", - "" + "\n" ] }, { @@ -253,16 +260,14 @@ " df_preprocess = df_preprocess.dropna()\n", "\n", " # Keep results in dictionary\n", - " results[key] = df_preprocess\n", - "" + " results[key] = df_preprocess\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Store datasets\n", - "" + "## Store datasets\n" ] }, { @@ -274,9 +279,8 @@ "# Write dataset to spark table\n", "results['train'].write.mode('overwrite').saveAsTable('default.ActivityTrain')\n", "results['test'].write.mode('overwrite').saveAsTable('default.ActivityTest')\n", - "results['dev'].write.mode('overwrite').saveAsTable('default.ActivityDev')\n", - "" + "results['dev'].write.mode('overwrite').saveAsTable('default.ActivityDev')\n" ] } ] -} +} \ No newline at end of file diff --git a/src/02-Train-Model.ipynb b/src/02-Train-Model.ipynb index ab1a81b..acd8582 100644 --- a/src/02-Train-Model.ipynb +++ b/src/02-Train-Model.ipynb @@ -16,6 +16,16 @@ } }, "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Copyright (c) Microsoft Corporation.\n", + "# Licensed under the MIT License." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -29,8 +39,7 @@ "4. Train model\n", "5. Test model\n", "6. Store model and transformer\n", - "\n", - "" + "\n" ] }, { @@ -58,8 +67,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Define variables (input folder, datasets, ....)\n", - "" + "## Define variables (input folder, datasets, ....)\n" ] }, { @@ -187,8 +195,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Define and Train the model\n", - "" + "## Define and Train the model\n" ] }, { @@ -277,16 +284,14 @@ "f1, weightedPrecision, weightedRecall, auc = evaluate_model(model,df_dev_feature)\n", "print('DEV AUC:', auc)\n", "print('DEV F1:', f1)\n", - "print('DEV Precision:', weightedPrecision, 'DEV Recall:',weightedRecall)\n", - "" + "print('DEV Precision:', weightedPrecision, 'DEV Recall:',weightedRecall)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Store fitted feature processor and model\n", - "" + "## Store fitted feature processor and model\n" ] }, { @@ -297,8 +302,7 @@ "source": [ "# store model\n", "fitted_processor.write().overwrite().save(feature_processor_name)\n", - "model.write().overwrite().save(model_name)\n", - "" + "model.write().overwrite().save(model_name)\n" ] } ] diff --git a/src/03-Recommendations.ipynb b/src/03-Recommendations.ipynb index c5bfe25..b72eab6 100644 --- a/src/03-Recommendations.ipynb +++ b/src/03-Recommendations.ipynb @@ -16,6 +16,16 @@ } }, "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Copyright (c) Microsoft Corporation.\n", + "# Licensed under the MIT License." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -29,8 +39,7 @@ "3. Apply model to dataset\n", "4. Cleanup results and store model\n", "5. Sample queries\n", - "\n", - "" + "\n" ] }, { @@ -49,8 +58,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Define variables \n", - "" + "## Define variables \n" ] }, { @@ -65,16 +73,14 @@ "feature_processor_name = 'feature_proprecssor.mml'\n", "col_user = 'User_ID'\n", "col_item = 'Article_ID'\n", - "dataset_test = 'default.activitytest'\n", - "" + "dataset_test = 'default.activitytest'\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Load dataset, feature processor and model\n", - "" + "## Load dataset, feature processor and model\n" ] }, { @@ -104,8 +110,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Apply feature processor and model to dataset\n", - "" + "## Apply feature processor and model to dataset\n" ] }, { @@ -249,4 +254,4 @@ ] } ] -} +} \ No newline at end of file