Chenhui/cpu unit test pipeline (#38)
* address review comments
* added full conda path
* minor change
* added conda to PATH
* added build status in README
Former-commit-id: 8caaa3c662
This commit is contained in:
Родитель
a11d74efc8
Коммит
f747d1c5c3
|
@ -1,3 +1,9 @@
|
|||
# Forecasting Best Practices
|
||||
|
||||
This repository contains examples and best practices for building Forecasting solutions and systems, provided as [Jupyter notebooks](examples) and [a library of utility functions](forecasting_lib). The focus of the repository is on state-of-the-art methods and common scenarios that are popular among researchers and practitioners working on forecasting problems.
|
||||
This repository contains examples and best practices for building Forecasting solutions and systems, provided as [Jupyter notebooks](examples) and [a library of utility functions](forecasting_lib). The focus of the repository is on state-of-the-art methods and common scenarios that are popular among researchers and practitioners working on forecasting problems.
|
||||
|
||||
## Build Status
|
||||
| Build | Branch | Status |
|
||||
| --- | --- | --- |
|
||||
| **Linux CPU** | master | [![Build Status](https://dev.azure.com/best-practices/forecasting/_apis/build/status/cpu_unit_tests_linux?branchName=master)](https://dev.azure.com/best-practices/forecasting/_build/latest?definitionId=128&branchName=master) |
|
||||
| **Linux CPU** | staging | [![Build Status](https://dev.azure.com/best-practices/forecasting/_apis/build/status/cpu_unit_tests_linux?branchName=staging)](https://dev.azure.com/best-practices/forecasting/_build/latest?definitionId=128&branchName=staging) |
|
|
@ -13,7 +13,7 @@ data("orangeJuice")
|
|||
yx <- orangeJuice[[1]]
|
||||
storedemo <- orangeJuice[[2]]
|
||||
|
||||
fpath <- file.path("retail_sales", "OrangeJuice_Pt_3Weeks_Weekly", "data")
|
||||
fpath <- file.path("contrib", "tsperf", "OrangeJuice_Pt_3Weeks_Weekly", "data")
|
||||
|
||||
write.csv(yx, file = file.path(fpath, "yx.csv"), quote = FALSE, na = " ", row.names = FALSE)
|
||||
write.csv(storedemo, file = file.path(fpath, "storedemo.csv"), quote = FALSE, na = " ", row.names = FALSE)
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
import os
|
||||
import subprocess
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def test_download_retail_data():
|
||||
RETAIL_DIR = os.path.join(".", "forecasting_lib", "forecasting_lib", "dataset", "retail")
|
||||
DATA_DIR = os.path.join(".", "contrib", "tsperf", "OrangeJuice_Pt_3Weeks_Weekly", "data")
|
||||
SCRIPT_PATH = os.path.join(RETAIL_DIR, "download_data.r")
|
||||
DATA_FILE_LIST = ["yx.csv", "storedemo.csv"]
|
||||
# Remove data files if they are existed
|
||||
for f in DATA_FILE_LIST:
|
||||
file_path = os.path.join(DATA_DIR, f)
|
||||
if os.path.exists(file_path):
|
||||
os.remove(file_path)
|
||||
assert not os.path.exists(file_path)
|
||||
# Call data download script
|
||||
try:
|
||||
subprocess.call(["sudo", "Rscript", SCRIPT_PATH])
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(e.output)
|
||||
# Check downloaded data
|
||||
DATA_DIM_LIST = [(106139, 19), (83, 12)]
|
||||
COLUMN_NAME_LIST = [
|
||||
[
|
||||
"store",
|
||||
"brand",
|
||||
"week",
|
||||
"logmove",
|
||||
"constant",
|
||||
"price1",
|
||||
"price2",
|
||||
"price3",
|
||||
"price4",
|
||||
"price5",
|
||||
"price6",
|
||||
"price7",
|
||||
"price8",
|
||||
"price9",
|
||||
"price10",
|
||||
"price11",
|
||||
"deal",
|
||||
"feat",
|
||||
"profit",
|
||||
],
|
||||
[
|
||||
"STORE",
|
||||
"AGE60",
|
||||
"EDUC",
|
||||
"ETHNIC",
|
||||
"INCOME",
|
||||
"HHLARGE",
|
||||
"WORKWOM",
|
||||
"HVAL150",
|
||||
"SSTRDIST",
|
||||
"SSTRVOL",
|
||||
"CPDIST5",
|
||||
"CPWVOL5",
|
||||
],
|
||||
]
|
||||
for idx, f in enumerate(DATA_FILE_LIST):
|
||||
file_path = os.path.join(DATA_DIR, f)
|
||||
assert os.path.exists(file_path)
|
||||
df = pd.read_csv(file_path, index_col=None)
|
||||
assert df.shape == DATA_DIM_LIST[idx]
|
||||
assert list(df) == COLUMN_NAME_LIST[idx]
|
|
@ -0,0 +1,63 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
# Pull request against these branches will trigger this build
|
||||
pr:
|
||||
- master
|
||||
- staging
|
||||
|
||||
# Any commit to these branches will trigger the build.
|
||||
trigger:
|
||||
- staging
|
||||
- master
|
||||
|
||||
|
||||
jobs:
|
||||
- job: cpu_unit_tests_linux
|
||||
timeoutInMinutes: 10 # how long to run the job before automatically cancelling
|
||||
pool:
|
||||
# vmImage: 'ubuntu-16.04' # hosted machine
|
||||
name: ForecastingAgents
|
||||
|
||||
steps:
|
||||
|
||||
# Uncomment if hosted machine
|
||||
# - task: UsePythonVersion@0
|
||||
# inputs:
|
||||
# versionSpec: '3.6.8'
|
||||
# architecture: 'x64'
|
||||
# addToPath: true
|
||||
# displayName: 'Use Python 3.6.8'
|
||||
|
||||
- bash: |
|
||||
echo "##vso[task.prependpath]/data/anaconda/bin"
|
||||
export PATH="/data/anaconda/bin:$PATH"
|
||||
conda env list
|
||||
displayName: Add Conda to PATH
|
||||
|
||||
# Uncomment if needed
|
||||
# Conda creation can take around 10 min
|
||||
# - bash: |
|
||||
# python tools/generate_conda_file.py
|
||||
# conda env create -n forecasting_cpu -f forecasting_cpu.yaml
|
||||
# displayName: 'Creating Conda Environment with dependencies'
|
||||
|
||||
- bash: |
|
||||
eval "$(conda shell.bash hook)" && conda activate forecast_cpu
|
||||
pytest --durations=0 forecasting_lib/tests -m "not notebooks and not gpu and not azureml" --junitxml=junit/test-unitttest.xml
|
||||
displayName: 'Run Unit tests'
|
||||
|
||||
# Uncomment if needed
|
||||
# - bash: |
|
||||
# echo Remove Conda Environment
|
||||
# conda remove -n forecasting_cpu --all -q --force -y
|
||||
# echo Done Cleanup
|
||||
# displayName: 'Cleanup Task'
|
||||
# condition: always()
|
||||
|
||||
- task: PublishTestResults@2
|
||||
inputs:
|
||||
testResultsFiles: '**/test-unitttest.xml'
|
||||
testRunTitle: 'Test results for PyTest'
|
||||
|
||||
|
|
@ -1,73 +0,0 @@
|
|||
import os
|
||||
import sys
|
||||
import pytest
|
||||
import subprocess
|
||||
import pandas as pd
|
||||
|
||||
def test_download_retail_data():
|
||||
BENCHMARK_DIR = os.path.join(".", "retail_sales", "OrangeJuice_Pt_3Weeks_Weekly")
|
||||
DATA_DIR = os.path.join(BENCHMARK_DIR, "data")
|
||||
SCRIPT_PATH = os.path.join(BENCHMARK_DIR, "common", "download_data.r")
|
||||
DATA_FILE_LIST = ["yx.csv", "storedemo.csv"]
|
||||
# Remove data files if they are existed
|
||||
for f in DATA_FILE_LIST:
|
||||
file_path = os.path.join(DATA_DIR, f)
|
||||
if os.path.exists(file_path):
|
||||
os.remove(file_path)
|
||||
assert not os.path.exists(file_path)
|
||||
# Call data download script
|
||||
try:
|
||||
subprocess.call(["Rscript", SCRIPT_PATH])
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(e.output)
|
||||
# Check downloaded data
|
||||
DATA_DIM_LIST = [(106139, 19), (83, 12)]
|
||||
COLUMN_NAME_LIST = [["store", "brand", "week", "logmove", "constant",
|
||||
"price1", "price2", "price3", "price4", "price5",
|
||||
"price6", "price7", "price8", "price9", "price10",
|
||||
"price11", "deal", "feat", "profit"],
|
||||
["STORE", "AGE60", "EDUC", "ETHNIC", "INCOME",
|
||||
"HHLARGE", "WORKWOM","HVAL150","SSTRDIST",
|
||||
"SSTRVOL", "CPDIST5", "CPWVOL5"]
|
||||
]
|
||||
for idx, f in enumerate(DATA_FILE_LIST):
|
||||
file_path = os.path.join(DATA_DIR, f)
|
||||
assert os.path.exists(file_path)
|
||||
df = pd.read_csv(file_path, index_col=None)
|
||||
assert df.shape == DATA_DIM_LIST[idx]
|
||||
assert list(df) == COLUMN_NAME_LIST[idx]
|
||||
|
||||
def test_download_energy_data():
|
||||
BENCHMARK_DIR = os.path.join(".", "energy_load", "GEFCom2017_D_Prob_MT_hourly")
|
||||
DATA_DIR = os.path.join(BENCHMARK_DIR, "data")
|
||||
SCRIPT_PATH = os.path.join(BENCHMARK_DIR, "common", "download_data.py")
|
||||
DATA_FILE_LIST = ["2011_smd_hourly.xls", "2012_smd_hourly.xls",
|
||||
"2013_smd_hourly.xls", "2014_smd_hourly.xls",
|
||||
"2015_smd_hourly.xls", "2016_smd_hourly.xls",
|
||||
"2017_smd_hourly.xlsx"]
|
||||
DATA_DIM_LIST = [[(57, 5), (8760, 16)] + [(8760, 14)]*8,
|
||||
[(57, 5), (8784, 16)] + [(8784, 14)]*8,
|
||||
[(59, 5), (8760, 16)] + [(8760, 14)]*8,
|
||||
[(59, 5), (8760, 16)] + [(8760, 14)]*8 + [(0,1)],
|
||||
[(57, 5), (8760, 16)] + [(8760, 14)]*8,
|
||||
[(47, 10), (8784, 17)] + [(8784, 14)]*8,
|
||||
[(51, 13), (8760, 21)] + [(8760, 14)]*8]
|
||||
# Remove data files if they are existed
|
||||
for f in DATA_FILE_LIST:
|
||||
file_path = os.path.join(DATA_DIR, f)
|
||||
if os.path.exists(file_path):
|
||||
os.remove(file_path)
|
||||
assert not os.path.exists(file_path)
|
||||
# Call data download script
|
||||
try:
|
||||
subprocess.check_output(["python", SCRIPT_PATH])
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(e.output)
|
||||
# Check downloaded data (only check dimensions since download_data.py checks column names)
|
||||
for file_idx, f in enumerate(DATA_FILE_LIST):
|
||||
file_path = os.path.join(DATA_DIR, f)
|
||||
assert os.path.exists(file_path)
|
||||
xls = pd.ExcelFile(file_path)
|
||||
for sheet_idx, s in enumerate(xls.sheet_names):
|
||||
assert xls.parse(s).shape == DATA_DIM_LIST[file_idx][sheet_idx]
|
||||
|
Загрузка…
Ссылка в новой задаче