From f747d1c5c316a17be17a4f081e013e1f42dd9795 Mon Sep 17 00:00:00 2001 From: Chenhui Hu Date: Fri, 17 Jan 2020 10:00:59 -0500 Subject: [PATCH] Chenhui/cpu unit test pipeline (#38) * address review comments * added full conda path * minor change * added conda to PATH * added build status in README Former-commit-id: 8caaa3c662a4f293efe7e72623909b456362925b --- README.md | 8 +- .../dataset/retail/download_data.r | 2 +- .../tests}/source_entire.R | 0 .../tests}/test_datasets.R | 0 forecasting_lib/tests/test_datasets.py | 66 +++++++++++++++++ tests/ci/cpu_unit_tests_linux.yml | 63 ++++++++++++++++ tests/unit/test_datasets.py | 73 ------------------- 7 files changed, 137 insertions(+), 75 deletions(-) rename {tests/unit => forecasting_lib/tests}/source_entire.R (100%) rename {tests/unit => forecasting_lib/tests}/test_datasets.R (100%) create mode 100644 forecasting_lib/tests/test_datasets.py create mode 100644 tests/ci/cpu_unit_tests_linux.yml delete mode 100644 tests/unit/test_datasets.py diff --git a/README.md b/README.md index f65d301d..180ab60b 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ # Forecasting Best Practices -This repository contains examples and best practices for building Forecasting solutions and systems, provided as [Jupyter notebooks](examples) and [a library of utility functions](forecasting_lib). The focus of the repository is on state-of-the-art methods and common scenarios that are popular among researchers and practitioners working on forecasting problems. \ No newline at end of file +This repository contains examples and best practices for building Forecasting solutions and systems, provided as [Jupyter notebooks](examples) and [a library of utility functions](forecasting_lib). The focus of the repository is on state-of-the-art methods and common scenarios that are popular among researchers and practitioners working on forecasting problems. + +## Build Status +| Build | Branch | Status | +| --- | --- | --- | +| **Linux CPU** | master | [![Build Status](https://dev.azure.com/best-practices/forecasting/_apis/build/status/cpu_unit_tests_linux?branchName=master)](https://dev.azure.com/best-practices/forecasting/_build/latest?definitionId=128&branchName=master) | +| **Linux CPU** | staging | [![Build Status](https://dev.azure.com/best-practices/forecasting/_apis/build/status/cpu_unit_tests_linux?branchName=staging)](https://dev.azure.com/best-practices/forecasting/_build/latest?definitionId=128&branchName=staging) | \ No newline at end of file diff --git a/forecasting_lib/forecasting_lib/dataset/retail/download_data.r b/forecasting_lib/forecasting_lib/dataset/retail/download_data.r index 23f2e7c7..2827cd10 100644 --- a/forecasting_lib/forecasting_lib/dataset/retail/download_data.r +++ b/forecasting_lib/forecasting_lib/dataset/retail/download_data.r @@ -13,7 +13,7 @@ data("orangeJuice") yx <- orangeJuice[[1]] storedemo <- orangeJuice[[2]] -fpath <- file.path("retail_sales", "OrangeJuice_Pt_3Weeks_Weekly", "data") +fpath <- file.path("contrib", "tsperf", "OrangeJuice_Pt_3Weeks_Weekly", "data") write.csv(yx, file = file.path(fpath, "yx.csv"), quote = FALSE, na = " ", row.names = FALSE) write.csv(storedemo, file = file.path(fpath, "storedemo.csv"), quote = FALSE, na = " ", row.names = FALSE) diff --git a/tests/unit/source_entire.R b/forecasting_lib/tests/source_entire.R similarity index 100% rename from tests/unit/source_entire.R rename to forecasting_lib/tests/source_entire.R diff --git a/tests/unit/test_datasets.R b/forecasting_lib/tests/test_datasets.R similarity index 100% rename from tests/unit/test_datasets.R rename to forecasting_lib/tests/test_datasets.R diff --git a/forecasting_lib/tests/test_datasets.py b/forecasting_lib/tests/test_datasets.py new file mode 100644 index 00000000..496fa48d --- /dev/null +++ b/forecasting_lib/tests/test_datasets.py @@ -0,0 +1,66 @@ +import os +import subprocess +import pandas as pd + + +def test_download_retail_data(): + RETAIL_DIR = os.path.join(".", "forecasting_lib", "forecasting_lib", "dataset", "retail") + DATA_DIR = os.path.join(".", "contrib", "tsperf", "OrangeJuice_Pt_3Weeks_Weekly", "data") + SCRIPT_PATH = os.path.join(RETAIL_DIR, "download_data.r") + DATA_FILE_LIST = ["yx.csv", "storedemo.csv"] + # Remove data files if they are existed + for f in DATA_FILE_LIST: + file_path = os.path.join(DATA_DIR, f) + if os.path.exists(file_path): + os.remove(file_path) + assert not os.path.exists(file_path) + # Call data download script + try: + subprocess.call(["sudo", "Rscript", SCRIPT_PATH]) + except subprocess.CalledProcessError as e: + print(e.output) + # Check downloaded data + DATA_DIM_LIST = [(106139, 19), (83, 12)] + COLUMN_NAME_LIST = [ + [ + "store", + "brand", + "week", + "logmove", + "constant", + "price1", + "price2", + "price3", + "price4", + "price5", + "price6", + "price7", + "price8", + "price9", + "price10", + "price11", + "deal", + "feat", + "profit", + ], + [ + "STORE", + "AGE60", + "EDUC", + "ETHNIC", + "INCOME", + "HHLARGE", + "WORKWOM", + "HVAL150", + "SSTRDIST", + "SSTRVOL", + "CPDIST5", + "CPWVOL5", + ], + ] + for idx, f in enumerate(DATA_FILE_LIST): + file_path = os.path.join(DATA_DIR, f) + assert os.path.exists(file_path) + df = pd.read_csv(file_path, index_col=None) + assert df.shape == DATA_DIM_LIST[idx] + assert list(df) == COLUMN_NAME_LIST[idx] diff --git a/tests/ci/cpu_unit_tests_linux.yml b/tests/ci/cpu_unit_tests_linux.yml new file mode 100644 index 00000000..63bb5bb1 --- /dev/null +++ b/tests/ci/cpu_unit_tests_linux.yml @@ -0,0 +1,63 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +# Pull request against these branches will trigger this build +pr: +- master +- staging + +# Any commit to these branches will trigger the build. +trigger: +- staging +- master + + +jobs: +- job: cpu_unit_tests_linux + timeoutInMinutes: 10 # how long to run the job before automatically cancelling + pool: + # vmImage: 'ubuntu-16.04' # hosted machine + name: ForecastingAgents + + steps: + +# Uncomment if hosted machine +# - task: UsePythonVersion@0 +# inputs: +# versionSpec: '3.6.8' +# architecture: 'x64' +# addToPath: true +# displayName: 'Use Python 3.6.8' + + - bash: | + echo "##vso[task.prependpath]/data/anaconda/bin" + export PATH="/data/anaconda/bin:$PATH" + conda env list + displayName: Add Conda to PATH + +# Uncomment if needed +# Conda creation can take around 10 min +# - bash: | +# python tools/generate_conda_file.py +# conda env create -n forecasting_cpu -f forecasting_cpu.yaml +# displayName: 'Creating Conda Environment with dependencies' + + - bash: | + eval "$(conda shell.bash hook)" && conda activate forecast_cpu + pytest --durations=0 forecasting_lib/tests -m "not notebooks and not gpu and not azureml" --junitxml=junit/test-unitttest.xml + displayName: 'Run Unit tests' + +# Uncomment if needed +# - bash: | +# echo Remove Conda Environment +# conda remove -n forecasting_cpu --all -q --force -y +# echo Done Cleanup +# displayName: 'Cleanup Task' +# condition: always() + + - task: PublishTestResults@2 + inputs: + testResultsFiles: '**/test-unitttest.xml' + testRunTitle: 'Test results for PyTest' + + diff --git a/tests/unit/test_datasets.py b/tests/unit/test_datasets.py deleted file mode 100644 index 5d123887..00000000 --- a/tests/unit/test_datasets.py +++ /dev/null @@ -1,73 +0,0 @@ -import os -import sys -import pytest -import subprocess -import pandas as pd - -def test_download_retail_data(): - BENCHMARK_DIR = os.path.join(".", "retail_sales", "OrangeJuice_Pt_3Weeks_Weekly") - DATA_DIR = os.path.join(BENCHMARK_DIR, "data") - SCRIPT_PATH = os.path.join(BENCHMARK_DIR, "common", "download_data.r") - DATA_FILE_LIST = ["yx.csv", "storedemo.csv"] - # Remove data files if they are existed - for f in DATA_FILE_LIST: - file_path = os.path.join(DATA_DIR, f) - if os.path.exists(file_path): - os.remove(file_path) - assert not os.path.exists(file_path) - # Call data download script - try: - subprocess.call(["Rscript", SCRIPT_PATH]) - except subprocess.CalledProcessError as e: - print(e.output) - # Check downloaded data - DATA_DIM_LIST = [(106139, 19), (83, 12)] - COLUMN_NAME_LIST = [["store", "brand", "week", "logmove", "constant", - "price1", "price2", "price3", "price4", "price5", - "price6", "price7", "price8", "price9", "price10", - "price11", "deal", "feat", "profit"], - ["STORE", "AGE60", "EDUC", "ETHNIC", "INCOME", - "HHLARGE", "WORKWOM","HVAL150","SSTRDIST", - "SSTRVOL", "CPDIST5", "CPWVOL5"] - ] - for idx, f in enumerate(DATA_FILE_LIST): - file_path = os.path.join(DATA_DIR, f) - assert os.path.exists(file_path) - df = pd.read_csv(file_path, index_col=None) - assert df.shape == DATA_DIM_LIST[idx] - assert list(df) == COLUMN_NAME_LIST[idx] - -def test_download_energy_data(): - BENCHMARK_DIR = os.path.join(".", "energy_load", "GEFCom2017_D_Prob_MT_hourly") - DATA_DIR = os.path.join(BENCHMARK_DIR, "data") - SCRIPT_PATH = os.path.join(BENCHMARK_DIR, "common", "download_data.py") - DATA_FILE_LIST = ["2011_smd_hourly.xls", "2012_smd_hourly.xls", - "2013_smd_hourly.xls", "2014_smd_hourly.xls", - "2015_smd_hourly.xls", "2016_smd_hourly.xls", - "2017_smd_hourly.xlsx"] - DATA_DIM_LIST = [[(57, 5), (8760, 16)] + [(8760, 14)]*8, - [(57, 5), (8784, 16)] + [(8784, 14)]*8, - [(59, 5), (8760, 16)] + [(8760, 14)]*8, - [(59, 5), (8760, 16)] + [(8760, 14)]*8 + [(0,1)], - [(57, 5), (8760, 16)] + [(8760, 14)]*8, - [(47, 10), (8784, 17)] + [(8784, 14)]*8, - [(51, 13), (8760, 21)] + [(8760, 14)]*8] - # Remove data files if they are existed - for f in DATA_FILE_LIST: - file_path = os.path.join(DATA_DIR, f) - if os.path.exists(file_path): - os.remove(file_path) - assert not os.path.exists(file_path) - # Call data download script - try: - subprocess.check_output(["python", SCRIPT_PATH]) - except subprocess.CalledProcessError as e: - print(e.output) - # Check downloaded data (only check dimensions since download_data.py checks column names) - for file_idx, f in enumerate(DATA_FILE_LIST): - file_path = os.path.join(DATA_DIR, f) - assert os.path.exists(file_path) - xls = pd.ExcelFile(file_path) - for sheet_idx, s in enumerate(xls.sheet_names): - assert xls.parse(s).shape == DATA_DIM_LIST[file_idx][sheet_idx] -