Custom Heirarchical Time series forecasting with Azure Automl (#233)
* Custom Heirarchical Time series forecasting with Azure Automl
* Custom hts refactored
* Custom_Hierarchical_Time_Series_Forecasting_Solution
* Update Readme.md
* AzureML_Custom_Hierarchical_Time_Series_Forecasting 📈
* Heirarchical time series forecasting refactored
* Heirarchical time series forecasting refactored
* Heirarchical time series forecasting refactored
This commit is contained in:
Родитель
c4ba9dbdb3
Коммит
d782106860
|
@ -0,0 +1,560 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "RFQ17USVXKsN"
|
||||
},
|
||||
"source": [
|
||||
"# Custom Hierarchical Time Series Forecasting\n",
|
||||
"\n",
|
||||
"Dynamically slicing the dataset based on the hierarchy variables the user passes for which they require the forecast for,then using the sliced dataset the model is trained and generates forecasts for the same. There is an additional caching feature to avoid retraining of models on the same heirarchy variables.\n",
|
||||
"\n",
|
||||
"This is benificial in 2 ways:\n",
|
||||
"\n",
|
||||
"- We can work with the standard compute resource and don't need to acquire more expensive compute resources\n",
|
||||
"- Traning time reduced significantly, as we train only on what's needed at this point in time.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "p6X0v-aQXT_i"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Imports\n",
|
||||
"import numpy as np\n",
|
||||
"import pickle\n",
|
||||
"import pandas as pd\n",
|
||||
"import logging\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1637063996274
|
||||
},
|
||||
"id": "DSus9oMzXKsi"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from azureml.automl.core.forecasting_parameters import ForecastingParameters\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Runner:\n",
|
||||
"\n",
|
||||
" def __init__(\n",
|
||||
" self,\n",
|
||||
" train_df_path,\n",
|
||||
" date_var,\n",
|
||||
" hr_vars,\n",
|
||||
" freq,\n",
|
||||
" holiday_feature,\n",
|
||||
" target_var,\n",
|
||||
" ):\n",
|
||||
" # Storing all the configuration parameters\n",
|
||||
" self.df = pd.read_csv(train_df_path)\n",
|
||||
" self.freq = freq\n",
|
||||
" self.target_var = target_var\n",
|
||||
" self.date_time_var = date_var\n",
|
||||
" self.df[date_var] = pd.to_datetime(self.df[date_var])\n",
|
||||
" self.hr_vars = hr_vars\n",
|
||||
" self.holiday = holiday_feature\n",
|
||||
" self.suggestion = {}\n",
|
||||
" # generate a list of unique values in the hierarchy list passed and store it as a dictionary\n",
|
||||
" for x in self.hr_vars:\n",
|
||||
" self.suggestion[x] = list(self.df[x].unique())\n",
|
||||
"\n",
|
||||
" self.job_cache = {}\n",
|
||||
"\n",
|
||||
" def _get_suggestions(self):\n",
|
||||
" \n",
|
||||
" return self.suggestion\n",
|
||||
"\n",
|
||||
" # Creating the Training Job\n",
|
||||
" def _create_job(self, config_list, test_df_path):\n",
|
||||
" self.config_list = config_list\n",
|
||||
" \"\"\"Generating a Unique key name based on the hierarchy combination passed. \n",
|
||||
" The key would look like this __state_WA_store_id_1_product_category_B_SKU_B2.csv\"\"\"\n",
|
||||
" key_val = '_'\n",
|
||||
" for x in config_list:\n",
|
||||
" key_val = key_val + '_' + x[0] + '_' + str(x[1])\n",
|
||||
"\n",
|
||||
" print ('Check if Key Exists in Job Cache')\n",
|
||||
" # Measure to avouid re-training the model on the same heirarchy combination\n",
|
||||
" if key_val in self.job_cache.keys():\n",
|
||||
"\n",
|
||||
" return key_val\n",
|
||||
" else:\n",
|
||||
"\n",
|
||||
" # Perform Slicing of Dataset based on the hierarchy combination passed\n",
|
||||
"\n",
|
||||
" final_df = self.df\n",
|
||||
" for x in config_list:\n",
|
||||
" final_df = final_df[final_df[x[0]] == x[1]]\n",
|
||||
" self.final_df = final_df\n",
|
||||
" path = key_val + '.csv'\n",
|
||||
" final_df.to_csv(path)\n",
|
||||
"\n",
|
||||
" train_data = pd.read_csv(path)\n",
|
||||
" # Train_data is the sliced dataframe we pass for training\n",
|
||||
" # Setting up Automl Config\n",
|
||||
" forecasting_parameters = ForecastingParameters(\n",
|
||||
" time_column_name=self.date_time_var,\n",
|
||||
" forecast_horizon=50,\n",
|
||||
" country_or_region_for_holidays='US',\n",
|
||||
" freq=self.freq,\n",
|
||||
" target_lags='auto',\n",
|
||||
" target_rolling_window_size=10,\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" automl_config = AutoMLConfig(\n",
|
||||
" task='forecasting',\n",
|
||||
" primary_metric='normalized_root_mean_squared_error',\n",
|
||||
" experiment_timeout_minutes=15,\n",
|
||||
" enable_early_stopping=True,\n",
|
||||
" training_data=train_data,\n",
|
||||
" label_column_name=self.target_var,\n",
|
||||
" n_cross_validations=5,\n",
|
||||
" enable_ensembling=False,\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
" forecasting_parameters=forecasting_parameters,\n",
|
||||
" )\n",
|
||||
" ws = Workspace.from_config()\n",
|
||||
" experiment = Experiment(ws, 'local-Delta')\n",
|
||||
" local_run = experiment.submit(automl_config,\n",
|
||||
" show_output=True)\n",
|
||||
" print ('Training Job Complete')\n",
|
||||
" (best_run, fitted_model) = local_run.get_output()\n",
|
||||
" print ('Making Predictions')\n",
|
||||
" # saving the model in cache with key generated from it's hierarchy combination\n",
|
||||
" self.job_cache[key_val] = fitted_model\n",
|
||||
" print ('Finish')\n",
|
||||
" return key_val\n",
|
||||
"\n",
|
||||
" def _predict(self, test_df_path, key_val):\n",
|
||||
" \n",
|
||||
" test_df = pd.read_csv(test_df_path)\n",
|
||||
" fitted_model = self.job_cache[key_val]\n",
|
||||
" # slicing the target data basis the hierarchy combination to predict output fot that combination\n",
|
||||
" print ('Slicing Test Data')\n",
|
||||
" for x in self.config_list:\n",
|
||||
" test_df = test_df[test_df[x[0]] == x[1]]\n",
|
||||
"\n",
|
||||
" final_test_df = test_df\n",
|
||||
" test_path = key_val + 'test_df' + '.csv'\n",
|
||||
" final_test_df.to_csv(test_path)\n",
|
||||
" print ('Test Data Slicing Finish')\n",
|
||||
" test_data = pd.read_csv(test_path)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" \n",
|
||||
" print ('Creating Query')\n",
|
||||
" \n",
|
||||
" fitted_model.quantiles = [0.05, 0.5, 0.9, 0.75]\n",
|
||||
" result = fitted_model.forecast_quantiles(test_data,\n",
|
||||
" ignore_data_errors=True)\n",
|
||||
" print ('Finish')\n",
|
||||
" return result\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Z0NHEUQBXKsu"
|
||||
},
|
||||
"source": [
|
||||
"Intializing the class and passing in the prarameters\n",
|
||||
"- train_df_path: path to the training dataset\n",
|
||||
"- test_df_path: path to the test dataset\n",
|
||||
"- hr_vars: list of hierarchy variables\n",
|
||||
"- date_var : date variable\n",
|
||||
"- target_var : target variable (The variable for which you want your forecasts in this case it is the quantity)\n",
|
||||
"- holiday_feature:Bool, if you want to include the holiday feature in the model\n",
|
||||
"- freq: frequency of the time series data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1637063997756
|
||||
},
|
||||
"id": "rJAHrS9SXKsx",
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"r = Runner(train_df_path='data/hts-sample-train.csv',date_var='date',target_var='quantity',holiday_feature=True,hr_vars=[\"state\",'store_id','product_category','SKU'],freq='D')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "CHmcYBqBXKsy"
|
||||
},
|
||||
"source": [
|
||||
"Having a look at the unique values of the hierarchy columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1637063998841
|
||||
},
|
||||
"id": "IvXhndUiXKsz",
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
},
|
||||
"outputId": "5edd3d9d-a015-44c3-ecda-d6a0658e7ecf"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'state': ['CA', 'FL', 'WA'], 'store_id': [1, 2, 3], 'product_category': ['A', 'B'], 'SKU': ['A1', 'A2', 'A3', 'B1', 'B2']}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"suggestion = r._get_suggestions()\n",
|
||||
"print(suggestion)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "rBh1fcjCXKs3"
|
||||
},
|
||||
"source": [
|
||||
"Creating the training Job\n",
|
||||
"- test_df_path:path to the test dataset\n",
|
||||
"- Config List : List consisting of Tuple of (heirarchy_column_name,heirarchy_value)\n",
|
||||
"\n",
|
||||
"Consider we want predictions for state=WA,store_id=1,product_category=B,SKU=B2,hence the config list to be passed will appear something like this\n",
|
||||
"``` \n",
|
||||
"[(\"state\",\"WA\"),('store_id',1),('product_category','B'),(\"SKU\",\"B2\")]\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"The class method returns a key that uniquely identifies the model saved in Job cache,the key is then used for predictions\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1637065014177
|
||||
},
|
||||
"id": "Jh34DcEXXKs7",
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
},
|
||||
"outputId": "b5a72f3f-91a2-438e-acb7-a2f7635b5d04"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Check if Key Exists in Job Cache\n",
|
||||
"No run_configuration provided, running on local with default configuration\n",
|
||||
"Running in the active local environment.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<table style=\"width:100%\"><tr><th>Experiment</th><th>Id</th><th>Type</th><th>Status</th><th>Details Page</th><th>Docs Page</th></tr><tr><td>local-Delta</td><td>AutoML_bd470986-202e-4ca5-a11a-6d221626d946</td><td>automl</td><td>Preparing</td><td><a href=\"https://ml.azure.com/runs/AutoML_bd470986-202e-4ca5-a11a-6d221626d946?wsid=/subscriptions/c8204c65-7397-4888-a397-a21bc631464e/resourcegroups/ml-resource-grp/workspaces/ml-studio&tid=638c2807-f86c-4538-9f10-19623d30686b\" target=\"_blank\" rel=\"noopener\">Link to Azure Machine Learning studio</a></td><td><a href=\"https://docs.microsoft.com/en-us/python/api/overview/azure/ml/intro?view=azure-ml-py\" target=\"_blank\" rel=\"noopener\">Link to Documentation</a></td></tr></table>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Current status: DatasetFeaturization. Beginning to featurize the dataset.\n",
|
||||
"Current status: DatasetFeaturizationCompleted. Completed featurizing the dataset.\n",
|
||||
"Heuristic parameters: Target_Lag = '[0]'.\n",
|
||||
"Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.\n",
|
||||
"Current status: DatasetFeaturization. Beginning to featurize the CV split.\n",
|
||||
"Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.\n",
|
||||
"Current status: DatasetFeaturization. Beginning to featurize the CV split.\n",
|
||||
"Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.\n",
|
||||
"Current status: DatasetFeaturization. Beginning to featurize the CV split.\n",
|
||||
"Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.\n",
|
||||
"Current status: DatasetFeaturization. Beginning to featurize the CV split.\n",
|
||||
"Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.\n",
|
||||
"Current status: DatasetFeaturization. Beginning to featurize the CV split.\n",
|
||||
"Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.\n",
|
||||
"\n",
|
||||
"****************************************************************************************************\n",
|
||||
"DATA GUARDRAILS: \n",
|
||||
"\n",
|
||||
"TYPE: Short series handling\n",
|
||||
"STATUS: PASSED\n",
|
||||
"DESCRIPTION: Automated ML detected enough data points for each series in the input data to continue with training.\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"****************************************************************************************************\n",
|
||||
"\n",
|
||||
"TYPE: Frequency detection\n",
|
||||
"STATUS: PASSED\n",
|
||||
"DESCRIPTION: The time series was analyzed, all data points are aligned with detected frequency.\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"****************************************************************************************************\n",
|
||||
"\n",
|
||||
"TYPE: Missing feature values imputation\n",
|
||||
"STATUS: PASSED\n",
|
||||
"DESCRIPTION: No feature missing values were detected in the training data.\n",
|
||||
" Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization\n",
|
||||
"\n",
|
||||
"****************************************************************************************************\n",
|
||||
"\n",
|
||||
"TYPE: Memory Issues Detection\n",
|
||||
"STATUS: PASSED\n",
|
||||
"DESCRIPTION: The selected horizon, lag and rolling window values were analyzed, and no potential memory issues were detected.\n",
|
||||
" Learn more about time-series forecasting configurations: https://aka.ms/AutomatedMLForecastingConfiguration\n",
|
||||
"\n",
|
||||
"****************************************************************************************************\n",
|
||||
"Current status: ModelSelection. Beginning model selection.\n",
|
||||
"\n",
|
||||
"****************************************************************************************************\n",
|
||||
"ITERATION: The iteration being evaluated.\n",
|
||||
"PIPELINE: A summary description of the pipeline being evaluated.\n",
|
||||
"DURATION: Time taken for the current iteration.\n",
|
||||
"METRIC: The result of computing score on the fitted pipeline.\n",
|
||||
"BEST: The best observed score thus far.\n",
|
||||
"****************************************************************************************************\n",
|
||||
"\n",
|
||||
" ITERATION PIPELINE DURATION METRIC BEST\n",
|
||||
" 0 Naive 0:00:32 0.3364 0.3364\n",
|
||||
" 1 SeasonalNaive 0:00:32 0.3103 0.3103\n",
|
||||
" 2 Average 0:00:32 0.2303 0.2303\n",
|
||||
" 3 SeasonalAverage 0:00:33 0.2350 0.2303\n",
|
||||
" 4 ExponentialSmoothing 0:00:50 0.2331 0.2303\n",
|
||||
" 5 Arimax 0:00:51 1.0000 0.2303\n",
|
||||
" 6 ProphetModel 0:00:43 0.2541 0.2303\n",
|
||||
" 7 StandardScalerWrapper LightGBM 0:00:30 0.2659 0.2303\n",
|
||||
" 8 StandardScalerWrapper XGBoostRegressor 0:00:56 0.2677 0.2303\n",
|
||||
" 9 MaxAbsScaler ElasticNet 0:00:27 0.2295 0.2295\n",
|
||||
" 10 RobustScaler ElasticNet 0:00:35 0.2263 0.2263\n",
|
||||
" 11 MinMaxScaler ElasticNet 0:00:25 0.2205 0.2205\n",
|
||||
" 12 StandardScalerWrapper ElasticNet 0:00:31 0.2362 0.2205\n",
|
||||
" 13 MinMaxScaler RandomForest 0:00:25 0.2275 0.2205\n",
|
||||
" 14 MaxAbsScaler ElasticNet 0:00:24 0.2280 0.2205\n",
|
||||
" 15 StandardScalerWrapper ElasticNet 0:00:25 0.2203 0.2203\n",
|
||||
" 16 MaxAbsScaler ElasticNet 0:00:25 0.2253 0.2203\n",
|
||||
" 17 StandardScalerWrapper ElasticNet 0:00:28 0.2225 0.2203\n",
|
||||
" 18 MinMaxScaler ExtremeRandomTrees 0:00:27 0.2392 0.2203\n",
|
||||
" 19 MinMaxScaler ExtremeRandomTrees 0:00:33 0.2311 0.2203\n",
|
||||
" 21 MinMaxScaler ExtremeRandomTrees 0:00:27 0.2286 0.2203\n",
|
||||
" 22 MaxAbsScaler ExtremeRandomTrees 0:00:27 0.2404 0.2203\n",
|
||||
" 23 MinMaxScaler DecisionTree 0:00:23 0.2320 0.2203\n",
|
||||
" 24 RobustScaler ExtremeRandomTrees 0:00:25 0.2382 0.2203\n",
|
||||
" 25 StandardScalerWrapper ElasticNet 0:00:27 0.2149 0.2149\n",
|
||||
" 26 RobustScaler DecisionTree 0:00:26 0.2590 0.2149\n",
|
||||
"Stopping criteria reached at iteration 27. Ending experiment.\n",
|
||||
"****************************************************************************************************\n",
|
||||
"Current status: BestRunExplainModel. Best run model explanations started\n",
|
||||
"Current status: ModelExplanationDataSetSetup. Model explanations data setup completed\n",
|
||||
"Current status: PickSurrogateModel. Choosing LightGBM as the surrogate model for explanations\n",
|
||||
"Current status: EngineeredFeatureExplanations. Computation of engineered features started\n",
|
||||
"Current status: EngineeredFeatureExplanations. Computation of engineered features completed\n",
|
||||
"Current status: RawFeaturesExplanations. Computation of raw features started\n",
|
||||
"Current status: RawFeaturesExplanations. Computation of raw features completed\n",
|
||||
"Current status: BestRunExplainModel. Best run model explanations completed\n",
|
||||
"****************************************************************************************************\n",
|
||||
"Training Job Complete\n",
|
||||
"Making Predictions\n",
|
||||
"Finish\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:interpret_community.common.explanation_utils:Using default datastore for uploads\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"k = r._create_job(config_list=[(\"state\",\"WA\"),('store_id',1),('product_category','B'),(\"SKU\",\"B2\")],test_df_path='hts-sample-test.csv')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "IR2ISW74XKs9"
|
||||
},
|
||||
"source": [
|
||||
"Fetching Results \n",
|
||||
"- key_val: key returned by the training job\n",
|
||||
"\n",
|
||||
"Based on the key passed,the model is fetched from the cache and the predictions are generated.\n",
|
||||
"By default predictions are generated for the 5th percentile range,50th percentile range and 75th percentile range and 90th percentile range"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1637065133812
|
||||
},
|
||||
"id": "YwlMq-jYXKs_",
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
},
|
||||
"outputId": "d163b562-fcff-4153-d055-f62f4a454bda"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Slicing Test Data\n",
|
||||
"Test Data Slicing Finish\n",
|
||||
"Creating Query\n",
|
||||
"Finish\n",
|
||||
" date 0.05 0.5 0.9 0.75\n",
|
||||
"0 2016-07-28 6.67 7.55 8.23 7.91\n",
|
||||
"1 2016-07-29 6.95 7.82 8.50 8.17\n",
|
||||
"2 2016-07-30 7.10 7.88 8.50 8.21\n",
|
||||
"3 2016-07-31 7.34 8.03 8.56 8.31\n",
|
||||
"4 2016-08-01 6.95 7.65 8.20 7.94\n",
|
||||
"5 2016-08-02 7.08 7.72 8.22 7.98\n",
|
||||
"6 2016-08-03 6.96 7.67 8.21 7.95\n",
|
||||
"7 2016-08-04 6.83 7.69 8.35 8.04\n",
|
||||
"8 2016-08-05 7.12 7.82 8.36 8.10\n",
|
||||
"9 2016-08-06 7.12 7.84 8.40 8.13\n",
|
||||
"10 2016-08-07 7.21 7.92 8.47 8.21\n",
|
||||
"11 2016-08-08 6.88 7.56 8.10 7.84\n",
|
||||
"12 2016-08-09 7.29 7.77 8.14 7.96\n",
|
||||
"13 2016-08-10 6.86 7.57 8.13 7.87\n",
|
||||
"14 2016-08-11 6.88 7.56 8.10 7.84\n",
|
||||
"15 2016-08-12 6.99 7.68 8.21 7.96\n",
|
||||
"16 2016-08-13 7.13 7.78 8.28 8.04\n",
|
||||
"17 2016-08-14 7.04 7.69 8.19 7.96\n",
|
||||
"18 2016-08-15 7.03 7.69 8.19 7.95\n",
|
||||
"19 2016-08-16 6.86 7.48 7.95 7.73\n",
|
||||
"20 2016-08-17 7.05 7.73 8.25 8.01\n",
|
||||
"21 2016-08-18 6.97 7.61 8.11 7.87\n",
|
||||
"22 2016-08-19 7.00 7.72 8.29 8.02\n",
|
||||
"23 2016-08-20 6.96 7.75 8.36 8.07\n",
|
||||
"24 2016-08-21 7.01 7.77 8.35 8.08\n",
|
||||
"25 2016-08-22 6.48 7.38 8.08 7.75\n",
|
||||
"26 2016-08-23 6.69 7.61 8.34 7.99\n",
|
||||
"27 2016-08-24 6.85 7.68 8.33 8.02\n",
|
||||
"28 2016-08-25 6.93 7.72 8.33 8.04\n",
|
||||
"29 2016-08-26 7.14 7.91 8.51 8.22\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"final_res = r._predict(test_df_path='data/hts-sample-test.csv',key_val=k)\n",
|
||||
"print(final_res)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "0YJ1456DXKtC",
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"name": "Custom Heirarchical Time Series Forecasting.ipynb",
|
||||
"provenance": []
|
||||
},
|
||||
"kernel_info": {
|
||||
"name": "python3-azureml"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6 - AzureML",
|
||||
"language": "python",
|
||||
"name": "python3-azureml"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
},
|
||||
"microsoft": {
|
||||
"host": {
|
||||
"AzureML": {
|
||||
"notebookHasBeenCompleted": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"nteract": {
|
||||
"version": "nteract-front-end@1.0.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
# Azure AutoML Hierarchical Time Series Forecasting
|
||||
|
||||
## Hierarchical Time Series Forecasting<br>
|
||||
|
||||
This type of forecasting is used when there are multiple variable categories forming an hierarchy.
|
||||
|
||||
The Data under consideration is as follows:<br>
|
||||
![Data](./pics/data.png)
|
||||
|
||||
Hierarchy Order for the Data is as follows: <br>
|
||||
![Hierarchy Image](./pics/hierarchy.png)
|
||||
|
||||
We will be predicting the Quantity for this combination of Data on daily basis for a time period of 30 days
|
||||
|
||||
```
|
||||
state=WA,store_id=1,product_category=B, SKU=B2
|
||||
```
|
||||
|
||||
The Dataset used for training is named as :
|
||||
hts-sample-train.csv
|
||||
|
||||
The Dataset used for testing is named as :
|
||||
hts-sample-test.csv
|
||||
|
||||
## Problem with conventional Hierarchical Time Series Forecasting :
|
||||
|
||||
Using a standard compute resource (STANDARD_DS11_V2) and training the model over the dataset provided in the Data folder took more than 3 hours to train the model. As per the [documentation](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-auto-train-forecast), I assume that the model is trained on all hierarchy combinations at once.<br><br>
|
||||
|
||||
The issue is that I wanted forecasts for a very niche set of query, i.e. state=WA,store_id=1,product_category=B, SKU=B2, but I had to wait for the whole traning to get over hence I have come up with this class-based solution to address this issue by natively using Azure Automl <br><br>
|
||||
|
||||
## Solution Proposal<br>
|
||||
|
||||
I dynamically slice the dataset based on the hierarchy variables the user passes for which it requires the forecast for, then using the sliced dataset, I train the model and generate forecasts for the same. There is an additional caching feature to avoid retraining models on the same hierarchy variables.
|
||||
|
||||
This is beneficial in 2 ways:<br>
|
||||
|
||||
- We can work with the standard compute resource and don't need to acquire more expensive compute resources
|
||||
- Traning time reduced significantly, as we train only on what's needed at this point.
|
||||
|
||||
## Evaluation Metrics<br>
|
||||
|
||||
I have used normalized_root_mean_squared_error metric for evaluation.<br>
|
||||
Natively Azure Automl tries to fit different forecasting models and evaulates them on the specified metric.(in this case normalized_root_mean_squared_error)
|
||||
|
||||
The forecasting model with the least error is then chosen for the prediction job
|
||||
|
||||
![Results](./pics/result.png)
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Двоичные данные
contrib/AzureML Custom Hierarchical Time Series Forecasting/pics/data.png
Normal file
Двоичные данные
contrib/AzureML Custom Hierarchical Time Series Forecasting/pics/data.png
Normal file
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 36 KiB |
Двоичные данные
contrib/AzureML Custom Hierarchical Time Series Forecasting/pics/hierarchy.png
Normal file
Двоичные данные
contrib/AzureML Custom Hierarchical Time Series Forecasting/pics/hierarchy.png
Normal file
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 16 KiB |
Двоичные данные
contrib/AzureML Custom Hierarchical Time Series Forecasting/pics/result.png
Normal file
Двоичные данные
contrib/AzureML Custom Hierarchical Time Series Forecasting/pics/result.png
Normal file
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 87 KiB |
|
@ -4,8 +4,8 @@ The contrib directory contains code which is not part of the main Forecasting re
|
|||
|
||||
Each project should live in its own subdirectory `/contrib/<project>` and contain a README.md file with detailed description what the project does and how to use it. In addition, when adding a new project, a brief description should be added to the table below.
|
||||
|
||||
|
||||
## Projects
|
||||
| Directory | Project description |
|
||||
|---|---|
|
||||
| [\<project name and link\>] | \<short project description\> |
|
||||
|
||||
| Directory | Project description |
|
||||
| --------------------------------------------------- | --------------------------------------------------------------------------------------------------------------- |
|
||||
| AzureML Custom Hierarchical Time Series Forecasting | An approach to generate hierarchical time series forecasts even with low computational resources within 20 mins |
|
||||
|
|
Загрузка…
Ссылка в новой задаче