Chenhui/cpu unit test pipeline (#38)

* address review comments

* added full conda path

* minor change

* added conda to PATH

* added build status in README


Former-commit-id: 8caaa3c662
This commit is contained in:
Chenhui Hu 2020-01-17 10:00:59 -05:00 коммит произвёл GitHub
Родитель a11d74efc8
Коммит f747d1c5c3
7 изменённых файлов: 137 добавлений и 75 удалений

Просмотреть файл

@ -1,3 +1,9 @@
# Forecasting Best Practices
This repository contains examples and best practices for building Forecasting solutions and systems, provided as [Jupyter notebooks](examples) and [a library of utility functions](forecasting_lib). The focus of the repository is on state-of-the-art methods and common scenarios that are popular among researchers and practitioners working on forecasting problems.
This repository contains examples and best practices for building Forecasting solutions and systems, provided as [Jupyter notebooks](examples) and [a library of utility functions](forecasting_lib). The focus of the repository is on state-of-the-art methods and common scenarios that are popular among researchers and practitioners working on forecasting problems.
## Build Status
| Build | Branch | Status |
| --- | --- | --- |
| **Linux CPU** | master | [![Build Status](https://dev.azure.com/best-practices/forecasting/_apis/build/status/cpu_unit_tests_linux?branchName=master)](https://dev.azure.com/best-practices/forecasting/_build/latest?definitionId=128&branchName=master) |
| **Linux CPU** | staging | [![Build Status](https://dev.azure.com/best-practices/forecasting/_apis/build/status/cpu_unit_tests_linux?branchName=staging)](https://dev.azure.com/best-practices/forecasting/_build/latest?definitionId=128&branchName=staging) |

Просмотреть файл

@ -13,7 +13,7 @@ data("orangeJuice")
yx <- orangeJuice[[1]]
storedemo <- orangeJuice[[2]]
fpath <- file.path("retail_sales", "OrangeJuice_Pt_3Weeks_Weekly", "data")
fpath <- file.path("contrib", "tsperf", "OrangeJuice_Pt_3Weeks_Weekly", "data")
write.csv(yx, file = file.path(fpath, "yx.csv"), quote = FALSE, na = " ", row.names = FALSE)
write.csv(storedemo, file = file.path(fpath, "storedemo.csv"), quote = FALSE, na = " ", row.names = FALSE)

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -0,0 +1,66 @@
import os
import subprocess
import pandas as pd
def test_download_retail_data():
RETAIL_DIR = os.path.join(".", "forecasting_lib", "forecasting_lib", "dataset", "retail")
DATA_DIR = os.path.join(".", "contrib", "tsperf", "OrangeJuice_Pt_3Weeks_Weekly", "data")
SCRIPT_PATH = os.path.join(RETAIL_DIR, "download_data.r")
DATA_FILE_LIST = ["yx.csv", "storedemo.csv"]
# Remove data files if they are existed
for f in DATA_FILE_LIST:
file_path = os.path.join(DATA_DIR, f)
if os.path.exists(file_path):
os.remove(file_path)
assert not os.path.exists(file_path)
# Call data download script
try:
subprocess.call(["sudo", "Rscript", SCRIPT_PATH])
except subprocess.CalledProcessError as e:
print(e.output)
# Check downloaded data
DATA_DIM_LIST = [(106139, 19), (83, 12)]
COLUMN_NAME_LIST = [
[
"store",
"brand",
"week",
"logmove",
"constant",
"price1",
"price2",
"price3",
"price4",
"price5",
"price6",
"price7",
"price8",
"price9",
"price10",
"price11",
"deal",
"feat",
"profit",
],
[
"STORE",
"AGE60",
"EDUC",
"ETHNIC",
"INCOME",
"HHLARGE",
"WORKWOM",
"HVAL150",
"SSTRDIST",
"SSTRVOL",
"CPDIST5",
"CPWVOL5",
],
]
for idx, f in enumerate(DATA_FILE_LIST):
file_path = os.path.join(DATA_DIR, f)
assert os.path.exists(file_path)
df = pd.read_csv(file_path, index_col=None)
assert df.shape == DATA_DIM_LIST[idx]
assert list(df) == COLUMN_NAME_LIST[idx]

Просмотреть файл

@ -0,0 +1,63 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# Pull request against these branches will trigger this build
pr:
- master
- staging
# Any commit to these branches will trigger the build.
trigger:
- staging
- master
jobs:
- job: cpu_unit_tests_linux
timeoutInMinutes: 10 # how long to run the job before automatically cancelling
pool:
# vmImage: 'ubuntu-16.04' # hosted machine
name: ForecastingAgents
steps:
# Uncomment if hosted machine
# - task: UsePythonVersion@0
# inputs:
# versionSpec: '3.6.8'
# architecture: 'x64'
# addToPath: true
# displayName: 'Use Python 3.6.8'
- bash: |
echo "##vso[task.prependpath]/data/anaconda/bin"
export PATH="/data/anaconda/bin:$PATH"
conda env list
displayName: Add Conda to PATH
# Uncomment if needed
# Conda creation can take around 10 min
# - bash: |
# python tools/generate_conda_file.py
# conda env create -n forecasting_cpu -f forecasting_cpu.yaml
# displayName: 'Creating Conda Environment with dependencies'
- bash: |
eval "$(conda shell.bash hook)" && conda activate forecast_cpu
pytest --durations=0 forecasting_lib/tests -m "not notebooks and not gpu and not azureml" --junitxml=junit/test-unitttest.xml
displayName: 'Run Unit tests'
# Uncomment if needed
# - bash: |
# echo Remove Conda Environment
# conda remove -n forecasting_cpu --all -q --force -y
# echo Done Cleanup
# displayName: 'Cleanup Task'
# condition: always()
- task: PublishTestResults@2
inputs:
testResultsFiles: '**/test-unitttest.xml'
testRunTitle: 'Test results for PyTest'

Просмотреть файл

@ -1,73 +0,0 @@
import os
import sys
import pytest
import subprocess
import pandas as pd
def test_download_retail_data():
BENCHMARK_DIR = os.path.join(".", "retail_sales", "OrangeJuice_Pt_3Weeks_Weekly")
DATA_DIR = os.path.join(BENCHMARK_DIR, "data")
SCRIPT_PATH = os.path.join(BENCHMARK_DIR, "common", "download_data.r")
DATA_FILE_LIST = ["yx.csv", "storedemo.csv"]
# Remove data files if they are existed
for f in DATA_FILE_LIST:
file_path = os.path.join(DATA_DIR, f)
if os.path.exists(file_path):
os.remove(file_path)
assert not os.path.exists(file_path)
# Call data download script
try:
subprocess.call(["Rscript", SCRIPT_PATH])
except subprocess.CalledProcessError as e:
print(e.output)
# Check downloaded data
DATA_DIM_LIST = [(106139, 19), (83, 12)]
COLUMN_NAME_LIST = [["store", "brand", "week", "logmove", "constant",
"price1", "price2", "price3", "price4", "price5",
"price6", "price7", "price8", "price9", "price10",
"price11", "deal", "feat", "profit"],
["STORE", "AGE60", "EDUC", "ETHNIC", "INCOME",
"HHLARGE", "WORKWOM","HVAL150","SSTRDIST",
"SSTRVOL", "CPDIST5", "CPWVOL5"]
]
for idx, f in enumerate(DATA_FILE_LIST):
file_path = os.path.join(DATA_DIR, f)
assert os.path.exists(file_path)
df = pd.read_csv(file_path, index_col=None)
assert df.shape == DATA_DIM_LIST[idx]
assert list(df) == COLUMN_NAME_LIST[idx]
def test_download_energy_data():
BENCHMARK_DIR = os.path.join(".", "energy_load", "GEFCom2017_D_Prob_MT_hourly")
DATA_DIR = os.path.join(BENCHMARK_DIR, "data")
SCRIPT_PATH = os.path.join(BENCHMARK_DIR, "common", "download_data.py")
DATA_FILE_LIST = ["2011_smd_hourly.xls", "2012_smd_hourly.xls",
"2013_smd_hourly.xls", "2014_smd_hourly.xls",
"2015_smd_hourly.xls", "2016_smd_hourly.xls",
"2017_smd_hourly.xlsx"]
DATA_DIM_LIST = [[(57, 5), (8760, 16)] + [(8760, 14)]*8,
[(57, 5), (8784, 16)] + [(8784, 14)]*8,
[(59, 5), (8760, 16)] + [(8760, 14)]*8,
[(59, 5), (8760, 16)] + [(8760, 14)]*8 + [(0,1)],
[(57, 5), (8760, 16)] + [(8760, 14)]*8,
[(47, 10), (8784, 17)] + [(8784, 14)]*8,
[(51, 13), (8760, 21)] + [(8760, 14)]*8]
# Remove data files if they are existed
for f in DATA_FILE_LIST:
file_path = os.path.join(DATA_DIR, f)
if os.path.exists(file_path):
os.remove(file_path)
assert not os.path.exists(file_path)
# Call data download script
try:
subprocess.check_output(["python", SCRIPT_PATH])
except subprocess.CalledProcessError as e:
print(e.output)
# Check downloaded data (only check dimensions since download_data.py checks column names)
for file_idx, f in enumerate(DATA_FILE_LIST):
file_path = os.path.join(DATA_DIR, f)
assert os.path.exists(file_path)
xls = pd.ExcelFile(file_path)
for sheet_idx, s in enumerate(xls.sheet_names):
assert xls.parse(s).shape == DATA_DIM_LIST[file_idx][sheet_idx]