зеркало из https://github.com/mozilla/docker-etl.git
added more tests to prophet_forecast (#264)
* refactored base_forecast and prophet_forecast to enable easier testing * Apply suggestions from code review change signatures of `fit` and `predict` to take arguments that default to attributes Co-authored-by: Brad Ochocki Szasz <bochocki@mozilla.com> * add test for fit * revert signatures * made timezone-aware stamps naive * finished base_forecast tests * added tests for prophet class * linting * fixed divide by zero * linting again * adding tests to funnel_forecast * added tests for funnel_forecast * feat(workday):remove unwanted fields (#249) Co-authored-by: Julio Cezar Moscon <jcmoscon@gmail.com> * fix(exit):Added sys.exit() call (#250) Co-authored-by: Julio Cezar Moscon <jcmoscon@gmail.com> * fix issue with call to _get_crossvalidation_metric * fixed type check * added string case to aggregate_to_period and added tests * revert file * added more tests to prophet_forecast * Update jobs/kpi-forecasting/kpi_forecasting/models/base_forecast.py Co-authored-by: Brad Ochocki Szasz <bochocki@mozilla.com> * Brad easy fixes * remove magic year * feat(code):increasing the max_limit from 10 to 40. (#259) Co-authored-by: Julio Cezar Moscon <jcmoscon@gmail.com> * typo * revert bugfix in _add_regressors * update tests to reflect reversion --------- Co-authored-by: Brad Ochocki Szasz <bochocki@mozilla.com> Co-authored-by: JCMOSCON1976 <167822375+JCMOSCON1976@users.noreply.github.com> Co-authored-by: Julio Cezar Moscon <jcmoscon@gmail.com> Co-authored-by: m-d-bowerman <mbowerman@mozilla.com>
This commit is contained in:
Родитель
bae0202d0b
Коммит
9a2bc3a34e
|
@ -189,7 +189,7 @@ class BaseForecast(abc.ABC):
|
|||
Returns:
|
||||
pd.DataFrame: metric dataframe for all metrics and aggregations
|
||||
"""
|
||||
self.summary_df = pd.concat(
|
||||
summary_df = pd.concat(
|
||||
[
|
||||
self._summarize(
|
||||
self.forecast_df,
|
||||
|
@ -202,4 +202,21 @@ class BaseForecast(abc.ABC):
|
|||
]
|
||||
)
|
||||
|
||||
# add Metric Hub metadata columns
|
||||
summary_df["metric_alias"] = self.metric_hub.alias.lower()
|
||||
summary_df["metric_hub_app_name"] = self.metric_hub.app_name.lower()
|
||||
summary_df["metric_hub_slug"] = self.metric_hub.slug.lower()
|
||||
summary_df["metric_start_date"] = pd.to_datetime(self.metric_hub.min_date)
|
||||
summary_df["metric_end_date"] = pd.to_datetime(self.metric_hub.max_date)
|
||||
summary_df["metric_collected_at"] = self.collected_at
|
||||
|
||||
# add forecast model metadata columns
|
||||
summary_df["forecast_start_date"] = self.start_date
|
||||
summary_df["forecast_end_date"] = self.end_date
|
||||
summary_df["forecast_trained_at"] = self.trained_at
|
||||
summary_df["forecast_predicted_at"] = self.predicted_at
|
||||
summary_df["forecast_parameters"] = self.metadata_params
|
||||
|
||||
self.summary_df = summary_df
|
||||
|
||||
return self.summary_df
|
||||
|
|
|
@ -20,15 +20,20 @@ class ProphetForecast(BaseForecast):
|
|||
def column_names_map(self) -> Dict[str, str]:
|
||||
return {"submission_date": "ds", "value": "y"}
|
||||
|
||||
def _fit(self, observed_df) -> None:
|
||||
self.model = prophet.Prophet(
|
||||
**self.parameters,
|
||||
def _build_model(self, parameter_dict):
|
||||
model = prophet.Prophet(
|
||||
**parameter_dict,
|
||||
uncertainty_samples=self.number_of_simulations,
|
||||
mcmc_samples=0,
|
||||
)
|
||||
|
||||
if self.use_holidays:
|
||||
self.model.add_country_holidays(country_name="US")
|
||||
model.add_country_holidays(country_name="US")
|
||||
|
||||
return model
|
||||
|
||||
def _fit(self, observed_df) -> None:
|
||||
self.model = self._build_model(self.parameters)
|
||||
|
||||
# Modify observed data to have column names that Prophet expects, and fit
|
||||
# the model
|
||||
|
@ -235,24 +240,6 @@ class ProphetForecast(BaseForecast):
|
|||
# add summary metadata columns
|
||||
df["aggregation_period"] = period.lower()
|
||||
|
||||
# reorder columns to make interpretation easier
|
||||
df = df[["submission_date", "aggregation_period", "source", "measure", "value"]]
|
||||
|
||||
# add Metric Hub metadata columns
|
||||
df["metric_alias"] = self.metric_hub.alias.lower()
|
||||
df["metric_hub_app_name"] = self.metric_hub.app_name.lower()
|
||||
df["metric_hub_slug"] = self.metric_hub.slug.lower()
|
||||
df["metric_start_date"] = pd.to_datetime(self.metric_hub.min_date)
|
||||
df["metric_end_date"] = pd.to_datetime(self.metric_hub.max_date)
|
||||
df["metric_collected_at"] = self.collected_at
|
||||
|
||||
# add forecast model metadata columns
|
||||
df["forecast_start_date"] = self.start_date
|
||||
df["forecast_end_date"] = self.end_date
|
||||
df["forecast_trained_at"] = self.trained_at
|
||||
df["forecast_predicted_at"] = self.predicted_at
|
||||
df["forecast_parameters"] = self.metadata_params
|
||||
|
||||
return df
|
||||
|
||||
def _summarize_legacy(self) -> pd.DataFrame:
|
||||
|
|
|
@ -1,14 +1,25 @@
|
|||
from typing import List
|
||||
import collections
|
||||
from datetime import date, datetime
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
import pytest
|
||||
import pandas as pd
|
||||
from dotmap import DotMap
|
||||
import numpy as np
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from datetime import timedelta, timezone
|
||||
|
||||
|
||||
from kpi_forecasting.models.base_forecast import BaseForecast
|
||||
|
||||
# Arbitrarily choose some date to use for the tests
|
||||
TEST_DATE = date(2024, 1, 1)
|
||||
TEST_DATE_STR = TEST_DATE.strftime("%Y-%m-%d")
|
||||
TEST_DATE_NEXT_DAY = date(2024, 1, 2)
|
||||
TEST_DATE_NEXT_DAY_STR = TEST_DATE_NEXT_DAY.strftime("%Y-%m-%d")
|
||||
TEST_PREDICT_END = TEST_DATE + relativedelta(months=2)
|
||||
TEST_PREDICT_END_STR = TEST_PREDICT_END.strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
class BadClass(BaseForecast):
|
||||
pass
|
||||
|
@ -29,8 +40,9 @@ def good_class():
|
|||
self.observed_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
pd.to_datetime("2020-01-01"),
|
||||
pd.to_datetime("1990-01-01"),
|
||||
TEST_DATE,
|
||||
TEST_DATE
|
||||
- relativedelta(years=1), # just an arbitrary date in the past
|
||||
]
|
||||
}
|
||||
)
|
||||
|
@ -76,8 +88,8 @@ def test_not_implemented():
|
|||
|
||||
|
||||
def test_post_init(good_class):
|
||||
start_date = "2124-01-01"
|
||||
end_date = "2124-02-02"
|
||||
start_date = TEST_DATE_STR
|
||||
end_date = TEST_PREDICT_END_STR
|
||||
good_class = good_class(
|
||||
model_type="test",
|
||||
parameters=DotMap(),
|
||||
|
@ -108,7 +120,7 @@ def test_post_init_default_dates(good_class):
|
|||
)
|
||||
# this is the max date of the self.observed_data['submission_date'] plus one day
|
||||
# from the object definion
|
||||
start_date = pd.to_datetime("2020-01-02")
|
||||
start_date = TEST_DATE_NEXT_DAY
|
||||
end_date = (
|
||||
datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(weeks=78)
|
||||
).date()
|
||||
|
@ -123,15 +135,15 @@ def test_fit(good_class):
|
|||
model_type="test",
|
||||
parameters=DotMap(),
|
||||
use_holidays=None,
|
||||
start_date="2124-01-01",
|
||||
end_date="2124-02-02",
|
||||
start_date=TEST_DATE_STR,
|
||||
end_date=TEST_PREDICT_END_STR,
|
||||
metric_hub=None,
|
||||
)
|
||||
good_class.fit()
|
||||
assert good_class.model
|
||||
|
||||
#
|
||||
assert good_class.model.is_fit == pd.to_datetime("2020-01-01")
|
||||
# model sets is_fit to the largest day in the observed data
|
||||
assert good_class.model.is_fit == TEST_DATE
|
||||
|
||||
|
||||
def test_predict_and_validate(good_class):
|
||||
|
@ -139,8 +151,8 @@ def test_predict_and_validate(good_class):
|
|||
model_type="test",
|
||||
parameters=DotMap(),
|
||||
use_holidays=None,
|
||||
start_date="2124-01-01",
|
||||
end_date="2124-02-02",
|
||||
start_date=TEST_DATE_STR,
|
||||
end_date=TEST_PREDICT_END_STR,
|
||||
metric_hub=None,
|
||||
)
|
||||
# overwrite date range set in __post_init__
|
||||
|
@ -154,12 +166,24 @@ def test_summarize(good_class):
|
|||
model_type="test",
|
||||
parameters=DotMap(),
|
||||
use_holidays=None,
|
||||
start_date="2124-01-01",
|
||||
end_date="2124-02-02",
|
||||
start_date=TEST_DATE_STR,
|
||||
end_date=TEST_PREDICT_END_STR,
|
||||
metric_hub=None,
|
||||
)
|
||||
good_class.forecast_df = np.array([1, 2])
|
||||
good_class.observed_df = np.array([3, 4])
|
||||
MetricHub = collections.namedtuple(
|
||||
"MetricHub",
|
||||
["alias", "app_name", "slug", "min_date", "max_date"],
|
||||
)
|
||||
|
||||
dummy_metric_hub = MetricHub("", "", "", TEST_DATE_STR, TEST_DATE_STR)
|
||||
|
||||
# add it here rather than in __init__ so it doesn't try to load data
|
||||
good_class.metric_hub = dummy_metric_hub
|
||||
good_class.trained_at = ""
|
||||
good_class.predicted_at = ""
|
||||
|
||||
number_val = 10
|
||||
output = good_class.summarize(
|
||||
periods=["a", "b", "c"], numpy_aggregations=["sum"], percentiles=["percentiles"]
|
||||
|
@ -170,5 +194,27 @@ def test_summarize(good_class):
|
|||
for el in ["a", "b", "c"]
|
||||
]
|
||||
)
|
||||
assert output.reset_index(drop=True).equals(expected_output)
|
||||
assert good_class.summary_df.reset_index(drop=True).equals(expected_output)
|
||||
# not going to check all the metadata columns
|
||||
# in assert_frame_equal. Just make sure they're there
|
||||
metadata_columns = {
|
||||
"metric_alias",
|
||||
"metric_hub_app_name",
|
||||
"metric_hub_slug",
|
||||
"metric_start_date",
|
||||
"metric_end_date",
|
||||
"metric_collected_at",
|
||||
"forecast_start_date",
|
||||
"forecast_end_date",
|
||||
"forecast_trained_at",
|
||||
"forecast_predicted_at",
|
||||
"forecast_parameters",
|
||||
}
|
||||
assert set(expected_output.columns) | metadata_columns == set(output.columns)
|
||||
|
||||
pd.testing.assert_frame_equal(
|
||||
output[expected_output.columns].reset_index(drop=True), expected_output
|
||||
)
|
||||
pd.testing.assert_frame_equal(
|
||||
good_class.summary_df[expected_output.columns].reset_index(drop=True),
|
||||
expected_output,
|
||||
)
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
"""tests for the funnel forecast module"""
|
||||
|
||||
import collections
|
||||
from datetime import date, datetime
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
import pandas as pd
|
||||
from dotmap import DotMap
|
||||
|
@ -11,13 +13,21 @@ import numpy as np
|
|||
from kpi_forecasting.configs.model_inputs import ProphetRegressor, ProphetHoliday
|
||||
from kpi_forecasting.models.funnel_forecast import SegmentModelSettings, FunnelForecast
|
||||
|
||||
# Arbitrarily choose some date to use for the tests
|
||||
TEST_DATE = date(2024, 1, 1)
|
||||
TEST_DATE_STR = TEST_DATE.strftime("%Y-%m-%d")
|
||||
TEST_DATE_NEXT_DAY = date(2024, 1, 2)
|
||||
TEST_DATE_NEXT_DAY_STR = TEST_DATE_NEXT_DAY.strftime("%Y-%m-%d")
|
||||
TEST_PREDICT_END = TEST_DATE + relativedelta(months=2)
|
||||
TEST_PREDICT_END_STR = TEST_PREDICT_END.strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def forecast():
|
||||
"""This mocks a generic forecast object"""
|
||||
# 2024-01-01 is arbitarily chosen as a future date
|
||||
predict_start_date = "2124-01-01"
|
||||
predict_end_date = "2124-03-01"
|
||||
predict_start_date = TEST_DATE_STR
|
||||
predict_end_date = TEST_PREDICT_END_STR
|
||||
|
||||
forecast = FunnelForecast(
|
||||
model_type="test",
|
||||
|
@ -37,8 +47,8 @@ def segment_info_fit_tests():
|
|||
in the functions that test fit methods"""
|
||||
|
||||
# 2024-01-01 is arbitarily chosen as a future date
|
||||
A1_start_date = "2124-01-01"
|
||||
A2_start_date = "2124-01-02"
|
||||
A1_start_date = TEST_DATE_STR
|
||||
A2_start_date = TEST_DATE_NEXT_DAY_STR
|
||||
|
||||
segment_info_dict = {
|
||||
"A1": {
|
||||
|
@ -83,9 +93,8 @@ def funnel_forecast_for_fit_tests(segment_info_fit_tests, mocker):
|
|||
}
|
||||
|
||||
parameter_dotmap = DotMap(parameter_dict)
|
||||
predict_start_date = "2124-01-01"
|
||||
predict_end_date = "2124-01-02"
|
||||
|
||||
predict_start_date = TEST_DATE_STR
|
||||
predict_end_date = TEST_DATE_NEXT_DAY_STR
|
||||
forecast = FunnelForecast(
|
||||
model_type="test",
|
||||
parameters=parameter_dotmap,
|
||||
|
@ -178,8 +187,8 @@ def test_combine_forecast_observed(mocker, forecast):
|
|||
forecast_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
@ -187,8 +196,8 @@ def test_combine_forecast_observed(mocker, forecast):
|
|||
observed_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"a": ["A1", "A1"],
|
||||
"value": [5, 6],
|
||||
|
@ -238,8 +247,8 @@ def test_under_summarize(mocker, forecast):
|
|||
forecast_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
@ -249,11 +258,11 @@ def test_under_summarize(mocker, forecast):
|
|||
observed_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
pd.to_datetime("2123-01-01").date(),
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE - relativedelta(months=1),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"a": ["A1", "A1", "A1", "A2", "A2"],
|
||||
"value": [10, 20, 30, 40, 50],
|
||||
|
@ -265,7 +274,7 @@ def test_under_summarize(mocker, forecast):
|
|||
["start_date", "forecast_df", "segment", "trained_parameters"],
|
||||
)
|
||||
dummy_segment_settings = SegmentSettings(
|
||||
start_date="2124-01-01",
|
||||
start_date=TEST_DATE_STR,
|
||||
forecast_df=forecast_df.copy(),
|
||||
segment={"a": "A1"},
|
||||
trained_parameters={"trained_parameters": "yes"},
|
||||
|
@ -288,8 +297,8 @@ def test_under_summarize(mocker, forecast):
|
|||
observed_expected_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"a": ["A1", "A1"],
|
||||
"value": [20, 30],
|
||||
|
@ -334,7 +343,7 @@ def test_summarize(mocker, forecast):
|
|||
["alias", "app_name", "slug", "min_date", "max_date"],
|
||||
)
|
||||
|
||||
dummy_metric_hub = MetricHub("", "", "", "2124-01-01", "2124-01-01")
|
||||
dummy_metric_hub = MetricHub("", "", "", TEST_DATE_STR, TEST_DATE_STR)
|
||||
|
||||
# forecast predictions are set with the
|
||||
# mock_aggregate_forecast_observed function so they
|
||||
|
@ -342,8 +351,8 @@ def test_summarize(mocker, forecast):
|
|||
forecast_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
@ -353,11 +362,11 @@ def test_summarize(mocker, forecast):
|
|||
observed_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
pd.to_datetime("2123-01-01").date(),
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE - relativedelta(months=1),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"a": ["A1", "A1", "A1", "A2", "A2"],
|
||||
"value": [10, 20, 30, 40, 50],
|
||||
|
@ -373,7 +382,7 @@ def test_summarize(mocker, forecast):
|
|||
# we're only testing that it is concatenated properly
|
||||
# with the segment data added
|
||||
dummy_segment_settings_A1 = SegmentSettings(
|
||||
start_date="2124-01-01",
|
||||
start_date=TEST_DATE_STR,
|
||||
forecast_df=forecast_df.copy(),
|
||||
segment={"a": "A1"},
|
||||
trained_parameters={"trained_parameters": "yes"},
|
||||
|
@ -381,7 +390,7 @@ def test_summarize(mocker, forecast):
|
|||
)
|
||||
|
||||
dummy_segment_settings_A2 = SegmentSettings(
|
||||
start_date="2124-01-01",
|
||||
start_date=TEST_DATE_STR,
|
||||
forecast_df=forecast_df.copy(),
|
||||
segment={"a": "A2"},
|
||||
trained_parameters={"trained_parameters": "yes"},
|
||||
|
@ -418,10 +427,10 @@ def test_summarize(mocker, forecast):
|
|||
observed_expected_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"a": ["A1", "A1", "A2", "A2"],
|
||||
"value": [20, 30, 40, 50],
|
||||
|
@ -491,8 +500,8 @@ def test_summarize(mocker, forecast):
|
|||
def test_under_predict(mocker):
|
||||
"""testing _predict"""
|
||||
# set segment models
|
||||
# 2124-01-01 chosen as a artibrary date to center tests on
|
||||
A1_start_date = "2124-01-01"
|
||||
|
||||
A1_start_date = TEST_DATE_STR
|
||||
parameter_dict = {
|
||||
"model_setting_split_dim": "a",
|
||||
"segment_settings": {
|
||||
|
@ -508,8 +517,8 @@ def test_under_predict(mocker):
|
|||
}
|
||||
|
||||
parameter_dotmap = DotMap(parameter_dict)
|
||||
predict_start_date = "2124-01-02"
|
||||
predict_end_date = "2124-03-01"
|
||||
predict_start_date = TEST_DATE_NEXT_DAY_STR
|
||||
predict_end_date = TEST_PREDICT_END_STR
|
||||
|
||||
forecast = FunnelForecast(
|
||||
model_type="test",
|
||||
|
@ -535,8 +544,8 @@ def test_under_predict(mocker):
|
|||
"b": ["B1", "B2"],
|
||||
"y": [0, 1],
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
@ -557,8 +566,8 @@ def test_under_predict(mocker):
|
|||
dates_to_predict = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
]
|
||||
}
|
||||
)
|
||||
|
@ -574,8 +583,8 @@ def test_under_predict(mocker):
|
|||
{
|
||||
0: [0, model_value],
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
@ -623,10 +632,10 @@ def test_predict(funnel_forecast_for_fit_tests, segment_info_fit_tests):
|
|||
"b": ["B1", "B2", "B1", "B2"],
|
||||
"y": [-1, 1, -1, 1],
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
@ -652,8 +661,8 @@ def test_predict(funnel_forecast_for_fit_tests, segment_info_fit_tests):
|
|||
{
|
||||
0: [0, model_value],
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
@ -664,7 +673,7 @@ def test_predict(funnel_forecast_for_fit_tests, segment_info_fit_tests):
|
|||
expected_raw["submission_date"]
|
||||
>= pd.to_datetime(funnel_forecast_for_fit_tests.start_date).date()
|
||||
)
|
||||
expected = expected_raw[expected_time_filter]
|
||||
expected = expected_raw[expected_time_filter].reset_index(drop=True)
|
||||
|
||||
forecast_df = segment.forecast_df
|
||||
pd.testing.assert_frame_equal(forecast_df, expected)
|
||||
|
@ -717,8 +726,8 @@ def test_auto_tuning(forecast, mocker):
|
|||
# set one segment with two sets of grid parameters
|
||||
segment_settings = SegmentModelSettings(
|
||||
segment={"a": "A1"},
|
||||
start_date="2124-01-01",
|
||||
end_date="2124-03-01",
|
||||
start_date=TEST_DATE_STR,
|
||||
end_date=TEST_PREDICT_END_STR,
|
||||
holidays=[],
|
||||
regressors=[],
|
||||
grid_parameters={"param1": [1, 2], "param2": [20, 10]},
|
||||
|
@ -738,8 +747,8 @@ def test_auto_tuning(forecast, mocker):
|
|||
"a": ["A1", "A1"],
|
||||
"b": ["B1", "B2"],
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
@ -760,10 +769,10 @@ def test_under_fit(funnel_forecast_for_fit_tests, segment_info_fit_tests):
|
|||
"a": ["A1", "A1", "A2", "A2"],
|
||||
"b": ["B1", "B2", "B1", "B2"],
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
@ -807,10 +816,10 @@ def test_fit(funnel_forecast_for_fit_tests, segment_info_fit_tests):
|
|||
"a": ["A1", "A1", "A2", "A2"],
|
||||
"b": ["B1", "B2", "B1", "B2"],
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
@ -872,8 +881,8 @@ def test_set_segment_models():
|
|||
}
|
||||
|
||||
parameter_dotmap = DotMap(parameter_dict)
|
||||
predict_start_date = "2124-01-01"
|
||||
predict_end_date = "2124-03-01"
|
||||
predict_start_date = TEST_DATE_STR
|
||||
predict_end_date = TEST_PREDICT_END_STR
|
||||
|
||||
forecast = FunnelForecast(
|
||||
model_type="test",
|
||||
|
@ -951,8 +960,8 @@ def test_set_segment_models_exception():
|
|||
}
|
||||
|
||||
parameter_dotmap = DotMap(parameter_dict)
|
||||
predict_start_date = "2124-01-01"
|
||||
predict_end_date = "2124-03-01"
|
||||
predict_start_date = TEST_DATE_STR
|
||||
predict_end_date = TEST_PREDICT_END_STR
|
||||
|
||||
forecast = FunnelForecast(
|
||||
model_type="test",
|
||||
|
@ -982,6 +991,14 @@ def test_fill_regressor_dates(forecast):
|
|||
"""test _fill_regressor_dates
|
||||
the name in the regressor info indicates which case is being tested
|
||||
Dates are chosen arbitrarily"""
|
||||
# get the set start and end dates for the forecast fixture
|
||||
# as datetime objects
|
||||
default_start_datetime = datetime(TEST_DATE.year, TEST_DATE.month, TEST_DATE.day)
|
||||
default_end_datetime = datetime(
|
||||
TEST_PREDICT_END.year, TEST_PREDICT_END.month, TEST_PREDICT_END.day
|
||||
)
|
||||
|
||||
# set the start date with an arbitrary date
|
||||
regressor_info = {
|
||||
"name": "only_start",
|
||||
"description": "only has a start",
|
||||
|
@ -990,8 +1007,11 @@ def test_fill_regressor_dates(forecast):
|
|||
regressor = ProphetRegressor(**regressor_info)
|
||||
forecast._fill_regressor_dates(regressor)
|
||||
assert regressor.start_date == pd.to_datetime("2020-08-15")
|
||||
assert regressor.end_date == pd.to_datetime("2124-03-01")
|
||||
|
||||
# this is the end dat for the forecast fixture
|
||||
assert regressor.end_date == default_end_datetime
|
||||
|
||||
# set the end date with an arbitrary date
|
||||
regressor_info = {
|
||||
"name": "only_end",
|
||||
"description": "only has a end",
|
||||
|
@ -999,9 +1019,11 @@ def test_fill_regressor_dates(forecast):
|
|||
}
|
||||
regressor = ProphetRegressor(**regressor_info)
|
||||
forecast._fill_regressor_dates(regressor)
|
||||
assert regressor.start_date == pd.to_datetime("2124-01-01")
|
||||
# the start date for the forecast fixture is TEST_DATE
|
||||
assert regressor.start_date == default_start_datetime
|
||||
assert regressor.end_date == pd.to_datetime("2125-08-15")
|
||||
|
||||
# set both the start and end dates to arbitrary dates
|
||||
regressor_info = {
|
||||
"name": "both",
|
||||
"description": "only has a start",
|
||||
|
@ -1013,15 +1035,17 @@ def test_fill_regressor_dates(forecast):
|
|||
assert regressor.start_date == pd.to_datetime("2020-08-15")
|
||||
assert regressor.end_date == pd.to_datetime("2020-09-15")
|
||||
|
||||
# use the defaults for both
|
||||
regressor_info = {
|
||||
"name": "neither",
|
||||
"description": "nothin to see here",
|
||||
}
|
||||
regressor = ProphetRegressor(**regressor_info)
|
||||
forecast._fill_regressor_dates(regressor)
|
||||
assert regressor.start_date == pd.to_datetime("2124-01-01")
|
||||
assert regressor.end_date == pd.to_datetime("2124-03-01")
|
||||
assert regressor.start_date == default_start_datetime
|
||||
assert regressor.end_date == default_end_datetime
|
||||
|
||||
# use arbitrary out of order dates to set
|
||||
regressor_info = {
|
||||
"name": "out_of_order",
|
||||
"description": "best better break",
|
||||
|
@ -1039,6 +1063,11 @@ def test_fill_regressor_dates(forecast):
|
|||
def test_add_regressors(forecast):
|
||||
"""test add regressors
|
||||
test case for each element of regressor_list_raw is indicated in name"""
|
||||
|
||||
# choose arbitrary dates for dates
|
||||
# name indicates the relationship of the window
|
||||
# to the timeframe of the data as defined in the ds
|
||||
# column of df below
|
||||
regressor_list_raw = [
|
||||
{
|
||||
"name": "all_in",
|
||||
|
@ -1120,8 +1149,8 @@ def test_build_train_dataframe_no_regressors(forecast):
|
|||
}
|
||||
segment_settings = SegmentModelSettings(
|
||||
segment={"a": 1, "b": 2},
|
||||
start_date="2124-01-01",
|
||||
end_date="2124-02-01",
|
||||
start_date=TEST_DATE_STR,
|
||||
end_date=TEST_PREDICT_END_STR,
|
||||
holidays=[],
|
||||
regressors=[ProphetRegressor(**r) for r in regressor_list],
|
||||
grid_parameters=grid_parameters,
|
||||
|
@ -1134,12 +1163,12 @@ def test_build_train_dataframe_no_regressors(forecast):
|
|||
"b": [1, 1, 2, 2, 2, 2],
|
||||
"y": [1, 2, 3, 4, 5, 6],
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-12-01").date(),
|
||||
pd.to_datetime("2124-12-02").date(),
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
pd.to_datetime("2123-01-01").date(),
|
||||
pd.to_datetime("2123-01-02").date(),
|
||||
TEST_DATE - relativedelta(months=1),
|
||||
TEST_DATE_NEXT_DAY - relativedelta(months=1),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE + relativedelta(months=1),
|
||||
TEST_DATE_NEXT_DAY + relativedelta(months=1),
|
||||
],
|
||||
}
|
||||
)
|
||||
|
@ -1153,8 +1182,8 @@ def test_build_train_dataframe_no_regressors(forecast):
|
|||
"b": [2, 2],
|
||||
"y": [3, 4],
|
||||
"ds": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
@ -1172,8 +1201,8 @@ def test_build_train_dataframe_no_regressors(forecast):
|
|||
"b": [2, 2],
|
||||
"y": [3, 4],
|
||||
"ds": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"floor": [1.5, 1.5],
|
||||
"cap": [6.0, 6.0],
|
||||
|
@ -1193,20 +1222,24 @@ def test_build_train_dataframe(forecast):
|
|||
{
|
||||
"name": "all_in",
|
||||
"description": "it's all in",
|
||||
"start_date": "2124-01-01",
|
||||
"end_date": "2124-01-06",
|
||||
"start_date": TEST_DATE_STR,
|
||||
"end_date": (TEST_DATE + relativedelta(days=6)).strftime("%Y-%m-%d"),
|
||||
},
|
||||
{
|
||||
"name": "all_out",
|
||||
"description": "it's all in",
|
||||
"start_date": "2124-02-01",
|
||||
"end_date": "2124-02-06",
|
||||
"start_date": (TEST_DATE + relativedelta(months=1)).strftime("%Y-%m-%d"),
|
||||
"end_date": (TEST_DATE + relativedelta(months=1, days=6)).strftime(
|
||||
"%Y-%m-%d"
|
||||
),
|
||||
},
|
||||
{
|
||||
"name": "just_end",
|
||||
"description": "just the second one",
|
||||
"start_date": "2124-01-02",
|
||||
"end_date": "2124-02-06",
|
||||
"start_date": (TEST_DATE + relativedelta(days=1)).strftime("%Y-%m-%d"),
|
||||
"end_date": (TEST_DATE + relativedelta(months=1, days=6)).strftime(
|
||||
"%Y-%m-%d"
|
||||
),
|
||||
},
|
||||
]
|
||||
|
||||
|
@ -1226,8 +1259,8 @@ def test_build_train_dataframe(forecast):
|
|||
}
|
||||
segment_settings = SegmentModelSettings(
|
||||
segment={"a": 1, "b": 2},
|
||||
start_date="2124-01-01",
|
||||
end_date="2124-02-01",
|
||||
start_date=TEST_DATE_STR,
|
||||
end_date=(TEST_DATE + relativedelta(months=1)).strftime("%Y-%m-%d"),
|
||||
holidays=[],
|
||||
regressors=[ProphetRegressor(**r) for r in regressor_list],
|
||||
grid_parameters=grid_parameters,
|
||||
|
@ -1240,12 +1273,12 @@ def test_build_train_dataframe(forecast):
|
|||
"b": [1, 1, 2, 2, 2, 2],
|
||||
"y": [1, 2, 3, 4, 5, 6],
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-12-01").date(),
|
||||
pd.to_datetime("2124-12-02").date(),
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
pd.to_datetime("2123-01-01").date(),
|
||||
pd.to_datetime("2123-01-02").date(),
|
||||
TEST_DATE - relativedelta(months=1),
|
||||
TEST_DATE_NEXT_DAY - relativedelta(months=1),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE + relativedelta(months=1),
|
||||
TEST_DATE_NEXT_DAY + relativedelta(months=1),
|
||||
],
|
||||
}
|
||||
)
|
||||
|
@ -1258,8 +1291,8 @@ def test_build_train_dataframe(forecast):
|
|||
"b": [2, 2],
|
||||
"y": [3, 4],
|
||||
"ds": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"all_in": [0, 0],
|
||||
"all_out": [
|
||||
|
@ -1282,8 +1315,8 @@ def test_build_train_dataframe(forecast):
|
|||
"b": [2, 2],
|
||||
"y": [3, 4],
|
||||
"ds": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"all_in": [0, 0],
|
||||
"all_out": [1, 1],
|
||||
|
@ -1320,8 +1353,8 @@ def test_build_predict_dataframe_no_regressors(forecast):
|
|||
}
|
||||
segment_settings = SegmentModelSettings(
|
||||
segment={"a": 1, "b": 2},
|
||||
start_date="2124-01-01",
|
||||
end_date="2124-02-01",
|
||||
start_date=TEST_DATE_STR,
|
||||
end_date=TEST_PREDICT_END_STR,
|
||||
holidays=[],
|
||||
regressors=[ProphetRegressor(**r) for r in regressor_list],
|
||||
grid_parameters=grid_parameters,
|
||||
|
@ -1334,12 +1367,12 @@ def test_build_predict_dataframe_no_regressors(forecast):
|
|||
dates_to_predict = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-12-01").date(),
|
||||
pd.to_datetime("2124-12-02").date(),
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
pd.to_datetime("2123-01-01").date(),
|
||||
pd.to_datetime("2123-01-02").date(),
|
||||
TEST_DATE - relativedelta(months=1),
|
||||
TEST_DATE_NEXT_DAY - relativedelta(months=1),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
@ -1350,12 +1383,12 @@ def test_build_predict_dataframe_no_regressors(forecast):
|
|||
expected_predict_df = pd.DataFrame(
|
||||
{
|
||||
"ds": [
|
||||
pd.to_datetime("2124-12-01").date(),
|
||||
pd.to_datetime("2124-12-02").date(),
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
pd.to_datetime("2123-01-01").date(),
|
||||
pd.to_datetime("2123-01-02").date(),
|
||||
TEST_DATE - relativedelta(months=1),
|
||||
TEST_DATE_NEXT_DAY - relativedelta(months=1),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
@ -1372,12 +1405,12 @@ def test_build_predict_dataframe_no_regressors(forecast):
|
|||
expected_predict_wlog_df = pd.DataFrame(
|
||||
{
|
||||
"ds": [
|
||||
pd.to_datetime("2124-12-01").date(),
|
||||
pd.to_datetime("2124-12-02").date(),
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
pd.to_datetime("2123-01-01").date(),
|
||||
pd.to_datetime("2123-01-02").date(),
|
||||
TEST_DATE - relativedelta(months=1),
|
||||
TEST_DATE_NEXT_DAY - relativedelta(months=1),
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"floor": [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0],
|
||||
"cap": [10.0, 10.0, 10.0, 10.0, 10.0, 10.0],
|
||||
|
@ -1397,20 +1430,24 @@ def test_build_predict_dataframe(forecast):
|
|||
{
|
||||
"name": "all_in",
|
||||
"description": "it's all in",
|
||||
"start_date": "2124-01-01",
|
||||
"end_date": "2124-01-06",
|
||||
"start_date": TEST_DATE_STR,
|
||||
"end_date": (TEST_DATE + relativedelta(days=6)).strftime("%Y-%m-%d"),
|
||||
},
|
||||
{
|
||||
"name": "all_out",
|
||||
"description": "it's all in",
|
||||
"start_date": "2124-02-01",
|
||||
"end_date": "2124-02-06",
|
||||
"start_date": (TEST_DATE + relativedelta(months=1)).strftime("%Y-%m-%d"),
|
||||
"end_date": (TEST_DATE + relativedelta(months=1, days=6)).strftime(
|
||||
"%Y-%m-%d"
|
||||
),
|
||||
},
|
||||
{
|
||||
"name": "just_end",
|
||||
"description": "just the second one",
|
||||
"start_date": "2124-01-02",
|
||||
"end_date": "2124-02-06",
|
||||
"start_date": (TEST_DATE + relativedelta(days=1)).strftime("%Y-%m-%d"),
|
||||
"end_date": (TEST_DATE + relativedelta(months=1, days=6)).strftime(
|
||||
"%Y-%m-%d"
|
||||
),
|
||||
},
|
||||
]
|
||||
|
||||
|
@ -1430,8 +1467,8 @@ def test_build_predict_dataframe(forecast):
|
|||
}
|
||||
segment_settings = SegmentModelSettings(
|
||||
segment={"a": 1, "b": 2},
|
||||
start_date="2124-01-01",
|
||||
end_date="2124-02-01",
|
||||
start_date=TEST_DATE_STR,
|
||||
end_date=TEST_PREDICT_END_STR,
|
||||
holidays=[],
|
||||
regressors=[ProphetRegressor(**r) for r in regressor_list],
|
||||
grid_parameters=grid_parameters,
|
||||
|
@ -1443,10 +1480,7 @@ def test_build_predict_dataframe(forecast):
|
|||
|
||||
dates_to_predict = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
],
|
||||
"submission_date": [TEST_DATE, TEST_DATE_NEXT_DAY],
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -1456,10 +1490,7 @@ def test_build_predict_dataframe(forecast):
|
|||
)
|
||||
expected_train_df = pd.DataFrame(
|
||||
{
|
||||
"ds": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
],
|
||||
"ds": [TEST_DATE, TEST_DATE_NEXT_DAY],
|
||||
"all_in": [0, 0],
|
||||
"all_out": [1, 1],
|
||||
"just_end": [1, 0],
|
||||
|
@ -1477,10 +1508,7 @@ def test_build_predict_dataframe(forecast):
|
|||
)
|
||||
expected_train_wlog_df = pd.DataFrame(
|
||||
{
|
||||
"ds": [
|
||||
pd.to_datetime("2124-01-01").date(),
|
||||
pd.to_datetime("2124-01-02").date(),
|
||||
],
|
||||
"ds": [TEST_DATE, TEST_DATE_NEXT_DAY],
|
||||
"all_in": [0, 0],
|
||||
"all_out": [1, 1],
|
||||
"just_end": [1, 0],
|
||||
|
@ -1503,23 +1531,28 @@ def test_build_model(forecast):
|
|||
{
|
||||
"name": "all_in",
|
||||
"description": "it's all in",
|
||||
"start_date": "2124-01-01",
|
||||
"end_date": "2124-01-06",
|
||||
"start_date": TEST_DATE_STR,
|
||||
"end_date": (TEST_DATE + relativedelta(days=6)).strftime("%Y-%m-%d"),
|
||||
},
|
||||
{
|
||||
"name": "all_out",
|
||||
"description": "it's all in",
|
||||
"start_date": "2124-02-01",
|
||||
"end_date": "2124-02-06",
|
||||
"start_date": (TEST_DATE + relativedelta(months=1)).strftime("%Y-%m-%d"),
|
||||
"end_date": (TEST_DATE + relativedelta(months=1, days=6)).strftime(
|
||||
"%Y-%m-%d"
|
||||
),
|
||||
},
|
||||
{
|
||||
"name": "just_end",
|
||||
"description": "just the second one",
|
||||
"start_date": "2124-01-02",
|
||||
"end_date": "2124-02-06",
|
||||
"start_date": (TEST_DATE + relativedelta(days=1)).strftime("%Y-%m-%d"),
|
||||
"end_date": (TEST_DATE + relativedelta(months=1, days=6)).strftime(
|
||||
"%Y-%m-%d"
|
||||
),
|
||||
},
|
||||
]
|
||||
|
||||
# use holidays from holiday config file
|
||||
holiday_list = {
|
||||
"easter": {
|
||||
"name": "easter",
|
||||
|
@ -1568,8 +1601,8 @@ def test_build_model(forecast):
|
|||
}
|
||||
segment_settings = SegmentModelSettings(
|
||||
segment={"a": 1, "b": 2},
|
||||
start_date="2124-01-01",
|
||||
end_date="2124-02-01",
|
||||
start_date=TEST_DATE_STR,
|
||||
end_date=TEST_PREDICT_END_STR,
|
||||
holidays=[ProphetHoliday(**h) for h in holiday_list.values()],
|
||||
regressors=[ProphetRegressor(**r) for r in regressor_list],
|
||||
grid_parameters=grid_parameters,
|
||||
|
|
|
@ -1,17 +1,552 @@
|
|||
from datetime import date
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
import pandas as pd
|
||||
from dotmap import DotMap
|
||||
import numpy as np
|
||||
import pytest
|
||||
import collections
|
||||
|
||||
|
||||
from kpi_forecasting.models.prophet_forecast import ProphetForecast
|
||||
|
||||
# Arbitrarily choose some date to use for the tests
|
||||
TEST_DATE = date(2024, 1, 1)
|
||||
TEST_DATE_STR = TEST_DATE.strftime("%Y-%m-%d")
|
||||
TEST_DATE_NEXT_DAY = date(2024, 1, 1)
|
||||
TEST_DATE_NEXT_DAY_STR = TEST_DATE_NEXT_DAY.strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def forecast():
|
||||
A1_start_date = TEST_DATE_STR
|
||||
parameter_dict = {
|
||||
"model_setting_split_dim": "a",
|
||||
"segment_settings": {
|
||||
"A1": {
|
||||
"start_date": A1_start_date,
|
||||
"end_date": None,
|
||||
"holidays": [],
|
||||
"regressors": [],
|
||||
"grid_parameters": {"param1": [1, 2], "param2": [20, 10]},
|
||||
"cv_settings": {},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
parameter_dotmap = DotMap(parameter_dict)
|
||||
predict_start_date = TEST_DATE_NEXT_DAY_STR
|
||||
# arbitarily set it a couple months in the future
|
||||
predict_end_date = (TEST_DATE + relativedelta(months=2)).strftime("%Y-%m-%d")
|
||||
return ProphetForecast(
|
||||
model_type="test",
|
||||
parameters=parameter_dotmap,
|
||||
use_holidays=None,
|
||||
start_date=predict_start_date,
|
||||
end_date=predict_end_date,
|
||||
metric_hub=None,
|
||||
)
|
||||
|
||||
|
||||
class MockModel:
|
||||
"""Used in place of prophet.Prophet for testing purposes"""
|
||||
|
||||
def __init__(self, param1=0, param2=0, **kwargs):
|
||||
self.value = param1 * param2
|
||||
self.history = None
|
||||
|
||||
def fit(self, df, *args, **kwargs):
|
||||
self.history = df
|
||||
return None
|
||||
|
||||
def predict(self, dates_to_predict):
|
||||
output = dates_to_predict.copy()
|
||||
|
||||
output[
|
||||
[
|
||||
"yhat",
|
||||
"trend",
|
||||
"trend_upper",
|
||||
"trend_lower",
|
||||
"weekly",
|
||||
"weekly_upper",
|
||||
"weekly_lower",
|
||||
"yearly",
|
||||
"yearly_upper",
|
||||
"yearly_lower",
|
||||
]
|
||||
] = 0 # some dummy value so it has the right shape
|
||||
|
||||
return output
|
||||
|
||||
def predictive_samples(self, dates_to_predict):
|
||||
# prophet function outputs dict of numpy arrays
|
||||
# only element we care about is `yhat`
|
||||
output = np.arange(len(dates_to_predict)) * self.value
|
||||
return {"yhat": {0: output}}
|
||||
|
||||
|
||||
def mock_build_model(parameters):
|
||||
"""mocks the FunnelForecast build_model method"""
|
||||
return MockModel(
|
||||
**parameters,
|
||||
)
|
||||
|
||||
|
||||
def mock_aggregate_forecast_observed(
|
||||
forecast_df, observed_df, period, numpy_aggregations, percentiles
|
||||
):
|
||||
"""Mocks the aggregate_forecast_observed function defined in ProphetForecast
|
||||
and inherited in FunnelForecast.
|
||||
This function is tested extensively in test_prophet_forecast
|
||||
so we can make dummy outputs for tests related to it"""
|
||||
|
||||
# add dummy columns where aggregated metrics woudl go
|
||||
percentile_columns = [f"p{el}" for el in percentiles]
|
||||
output_forecast_df = forecast_df.copy()
|
||||
output_forecast_df[numpy_aggregations + percentile_columns] = 0
|
||||
return output_forecast_df, observed_df.copy()
|
||||
|
||||
|
||||
def test_under_fit(forecast, mocker):
|
||||
"""test the _fit method"""
|
||||
|
||||
observed_data = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
mocker.patch.object(forecast, "_build_model", mock_build_model)
|
||||
|
||||
forecast._fit(observed_data)
|
||||
|
||||
# checking that history is set in the mocked Model ensures fit was called on it
|
||||
pd.testing.assert_frame_equal(
|
||||
observed_data.rename(columns={"submission_date": "ds"}), forecast.model.history
|
||||
)
|
||||
|
||||
|
||||
def test_fit(forecast, mocker):
|
||||
"""test the fit function. It is inherited from BaseForecast
|
||||
and calls _fit with the proper object attributes. Test looks very
|
||||
similar to that for _fit"""
|
||||
observed_data = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
mocker.patch.object(forecast, "_build_model", mock_build_model)
|
||||
|
||||
forecast.observed_df = observed_data
|
||||
forecast.fit()
|
||||
|
||||
# checking that history is set in the mocked Model ensures fit was called on it
|
||||
pd.testing.assert_frame_equal(
|
||||
observed_data.rename(columns={"submission_date": "ds"}), forecast.model.history
|
||||
)
|
||||
|
||||
assert forecast.trained_at is not None
|
||||
|
||||
|
||||
def test_combine_forecast_observed(mocker, forecast):
|
||||
"""tests the _combine_forecast_observed method"""
|
||||
# forecast predictions are set with the
|
||||
# mock_aggregate_forecast_observed function so they
|
||||
# can be ommited here
|
||||
forecast_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
# rows with negative values are those expected to be removed
|
||||
# by filters in summarize
|
||||
observed_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"value": [10, 20],
|
||||
}
|
||||
)
|
||||
|
||||
mocker.patch.object(
|
||||
forecast, "_aggregate_forecast_observed", mock_aggregate_forecast_observed
|
||||
)
|
||||
|
||||
numpy_aggregations = ["mean"]
|
||||
percentiles = [10, 50, 90]
|
||||
output_df = forecast._combine_forecast_observed(
|
||||
forecast_df,
|
||||
observed_df,
|
||||
period="period",
|
||||
numpy_aggregations=numpy_aggregations,
|
||||
percentiles=percentiles,
|
||||
)
|
||||
observed_expected_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"value": [10, 20],
|
||||
"measure": ["observed", "observed"],
|
||||
"source": ["historical", "historical"],
|
||||
}
|
||||
)
|
||||
|
||||
# 4x2 columns, 4 metrics (mean, p10, p50, p90)
|
||||
forecast_expected_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"measure": ["mean", "mean", "p10", "p10", "p50", "p50", "p90", "p90"],
|
||||
"value": [0] * 8,
|
||||
"source": ["forecast"] * 8,
|
||||
}
|
||||
)
|
||||
|
||||
# concat in same order to make our lives easier
|
||||
expected = pd.concat([observed_expected_df, forecast_expected_df]).sort_values(
|
||||
["submission_date", "measure"]
|
||||
)
|
||||
assert set(expected.columns) == set(output_df.columns)
|
||||
# force value columns to be floats in both cases to make check easier
|
||||
numeric_cols = ["value", "value_low", "value_mid", "value_high"]
|
||||
pd.testing.assert_frame_equal(
|
||||
output_df.sort_values(["submission_date", "measure"]).reset_index(drop=True),
|
||||
expected[output_df.columns].reset_index(drop=True),
|
||||
)
|
||||
|
||||
# should not be any nulls outside the metric column
|
||||
non_metric_columns = [el for el in output_df.columns if el not in numeric_cols]
|
||||
assert not pd.isna(output_df[non_metric_columns]).any(axis=None)
|
||||
|
||||
|
||||
def test_under_summarize(mocker, forecast):
|
||||
"""testing _summarize"""
|
||||
# forecast predictions are set with the
|
||||
# mock_aggregate_forecast_observed function so they
|
||||
# can be ommited here
|
||||
forecast_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
# rows with negative values are those expected to be removed
|
||||
# by filters in summarize
|
||||
observed_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"value": [10, 20],
|
||||
}
|
||||
)
|
||||
|
||||
mocker.patch.object(
|
||||
forecast, "_aggregate_forecast_observed", mock_aggregate_forecast_observed
|
||||
)
|
||||
|
||||
numpy_aggregations = ["mean"]
|
||||
percentiles = [10, 50, 90]
|
||||
output_df = forecast._summarize(
|
||||
forecast_df,
|
||||
observed_df,
|
||||
period="period",
|
||||
numpy_aggregations=numpy_aggregations,
|
||||
percentiles=percentiles,
|
||||
)
|
||||
observed_expected_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"value": [10, 20],
|
||||
"measure": ["observed", "observed"],
|
||||
"source": ["historical", "historical"],
|
||||
}
|
||||
)
|
||||
|
||||
# 4x2 columns, 4 metrics (mean, p10, p50, p90)
|
||||
forecast_expected_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"measure": ["mean", "mean", "p10", "p10", "p50", "p50", "p90", "p90"],
|
||||
"value": [0] * 8,
|
||||
"source": ["forecast"] * 8,
|
||||
}
|
||||
)
|
||||
|
||||
# concat in same order to make our lives easier
|
||||
expected = pd.concat([observed_expected_df, forecast_expected_df]).sort_values(
|
||||
["submission_date", "measure"]
|
||||
)
|
||||
expected["aggregation_period"] = "period"
|
||||
|
||||
assert set(expected.columns) == set(output_df.columns)
|
||||
# force value columns to be floats in both cases to make check easier
|
||||
numeric_cols = ["value", "value_low", "value_mid", "value_high"]
|
||||
pd.testing.assert_frame_equal(
|
||||
output_df.sort_values(["submission_date", "measure"]).reset_index(drop=True),
|
||||
expected[output_df.columns].reset_index(drop=True),
|
||||
)
|
||||
|
||||
# should not be any nulls outside the metric column
|
||||
non_metric_columns = [el for el in output_df.columns if el not in numeric_cols]
|
||||
assert not pd.isna(output_df[non_metric_columns]).any(axis=None)
|
||||
|
||||
|
||||
def test_summarize(mocker, forecast):
|
||||
"""testing summarize"""
|
||||
# create dummy metric hub object to when meta data from
|
||||
# it is added we don't get an error
|
||||
MetricHub = collections.namedtuple(
|
||||
"MetricHub",
|
||||
["alias", "app_name", "slug", "min_date", "max_date"],
|
||||
)
|
||||
|
||||
dummy_metric_hub = MetricHub("", "", "", TEST_DATE_STR, TEST_DATE_STR)
|
||||
|
||||
# forecast predictions are set with the
|
||||
# mock_aggregate_forecast_observed function so they
|
||||
# can be ommited here
|
||||
forecast_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
# rows with negative values are those expected to be removed
|
||||
# by filters in summarize
|
||||
observed_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"value": [10, 20],
|
||||
}
|
||||
)
|
||||
|
||||
mocker.patch.object(
|
||||
forecast, "_aggregate_forecast_observed", mock_aggregate_forecast_observed
|
||||
)
|
||||
|
||||
numpy_aggregations = ["mean"]
|
||||
percentiles = [10, 50, 90]
|
||||
|
||||
forecast.observed_df = observed_df
|
||||
forecast.forecast_df = forecast_df
|
||||
forecast.metric_hub = dummy_metric_hub
|
||||
|
||||
# timestamp attributes created by fit and predict
|
||||
# must be added manuall
|
||||
forecast.collected_at = ""
|
||||
forecast.trained_at = ""
|
||||
forecast.predicted_at = ""
|
||||
forecast.metadata_params = ""
|
||||
|
||||
numpy_aggregations = ["mean"]
|
||||
percentiles = [10, 50, 90]
|
||||
forecast.summarize(
|
||||
periods=["period1", "period2"],
|
||||
numpy_aggregations=numpy_aggregations,
|
||||
percentiles=percentiles,
|
||||
)
|
||||
|
||||
output_df = forecast.summary_df
|
||||
|
||||
observed_expected_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"value": [10, 20],
|
||||
"measure": ["observed", "observed"],
|
||||
"source": ["historical", "historical"],
|
||||
}
|
||||
)
|
||||
|
||||
# 4x2 columns, 4 metrics (mean, p10, p50, p90)
|
||||
forecast_expected_df = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
"measure": ["mean", "mean", "p10", "p10", "p50", "p50", "p90", "p90"],
|
||||
"value": [0] * 8,
|
||||
"source": ["forecast"] * 8,
|
||||
}
|
||||
)
|
||||
|
||||
# concat in same order to make our lives easier
|
||||
expected = pd.concat([observed_expected_df, forecast_expected_df]).sort_values(
|
||||
["submission_date", "measure"]
|
||||
)
|
||||
expected1 = expected.copy()
|
||||
expected2 = expected.copy()
|
||||
expected1["aggregation_period"] = "period1"
|
||||
expected2["aggregation_period"] = "period2"
|
||||
|
||||
expected = pd.concat([expected1, expected2])
|
||||
|
||||
# not going to check all the metadata columns
|
||||
# in assert_frame_equal. Just make sure they're there
|
||||
metadata_columns = {
|
||||
"metric_alias",
|
||||
"metric_hub_app_name",
|
||||
"metric_hub_slug",
|
||||
"metric_start_date",
|
||||
"metric_end_date",
|
||||
"metric_collected_at",
|
||||
"forecast_start_date",
|
||||
"forecast_end_date",
|
||||
"forecast_trained_at",
|
||||
"forecast_predicted_at",
|
||||
"forecast_parameters",
|
||||
}
|
||||
assert set(expected.columns) | metadata_columns == set(output_df.columns)
|
||||
# force value columns to be floats in both cases to make check easier
|
||||
numeric_cols = ["value", "value_low", "value_mid", "value_high"]
|
||||
pd.testing.assert_frame_equal(
|
||||
output_df.sort_values(["submission_date", "aggregation_period", "measure"])[
|
||||
expected.columns
|
||||
].reset_index(drop=True),
|
||||
expected.sort_values(
|
||||
["submission_date", "aggregation_period", "measure"]
|
||||
).reset_index(drop=True),
|
||||
)
|
||||
|
||||
# should not be any nulls outside the metric column
|
||||
non_metric_columns = [el for el in output_df.columns if el not in numeric_cols]
|
||||
assert not pd.isna(output_df[non_metric_columns]).any(axis=None)
|
||||
|
||||
|
||||
def test_under_predict(mocker, forecast):
|
||||
"""testing _predict"""
|
||||
# this ensures forecast is using MockModel
|
||||
mocker.patch.object(forecast, "_build_model", mock_build_model)
|
||||
|
||||
observed_df = pd.DataFrame(
|
||||
{
|
||||
"y": [0, 1],
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
dates_to_predict = pd.DataFrame(
|
||||
{
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
]
|
||||
}
|
||||
)
|
||||
forecast.observed_df = observed_df
|
||||
forecast.parameters = {"param1": 1, "param2": 2}
|
||||
forecast.fit()
|
||||
out = forecast._predict(dates_to_predict).reset_index(drop=True)
|
||||
|
||||
# in MockModel, the predictive_samples method sets the output to
|
||||
# np.arange(len(dates_to_predict)) * self.value for one column called 0
|
||||
# this helps ensure the forecast_df in segment_models is set properly
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
0: [0, 2],
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
pd.testing.assert_frame_equal(out, expected)
|
||||
|
||||
# test predict while we're here
|
||||
|
||||
forecast.dates_to_predict = dates_to_predict
|
||||
forecast.number_of_simulations = 1 # so that _validate doesn't break
|
||||
forecast.predict()
|
||||
|
||||
out = forecast.forecast_df
|
||||
|
||||
# in MockModel, the predictive_samples method sets the output to
|
||||
# np.arange(len(dates_to_predict)) * self.value for one column called 0
|
||||
# this helps ensure the forecast_df in segment_models is set properly
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
0: [0, 2],
|
||||
"submission_date": [
|
||||
TEST_DATE,
|
||||
TEST_DATE_NEXT_DAY,
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
pd.testing.assert_frame_equal(out, expected)
|
||||
assert forecast.predicted_at is not None
|
||||
|
||||
|
||||
def test_summarize_non_overlapping_day():
|
||||
observed_start_date = "2124-01-01"
|
||||
observed_end_date = "2124-02-01"
|
||||
observed_start_date = TEST_DATE_STR
|
||||
observed_end_date = (TEST_DATE + relativedelta(months=1)).strftime("%Y-%m-%d")
|
||||
|
||||
predict_start_date = "2124-02-02"
|
||||
predict_end_date = "2124-03-01"
|
||||
predict_start_date = (TEST_DATE + relativedelta(months=1, days=1)).strftime(
|
||||
"%Y-%m-%d"
|
||||
)
|
||||
predict_end_date = (TEST_DATE + relativedelta(months=2)).strftime("%Y-%m-%d")
|
||||
|
||||
forecast = ProphetForecast(
|
||||
model_type="test",
|
||||
|
@ -33,10 +568,15 @@ def test_summarize_non_overlapping_day():
|
|||
}
|
||||
)
|
||||
|
||||
# there are the samples generated
|
||||
# the mean and median are the aggregates used
|
||||
test_samples = np.array([1, 1, 2, 3, 5, 8, 13])
|
||||
test_mean = np.mean(test_samples)
|
||||
test_median = np.median(test_samples)
|
||||
|
||||
# mean and median scale with a factor
|
||||
# so a factor is multiplied on to make sure the aggregation is working
|
||||
# across rows properly
|
||||
forecast_array = np.stack(
|
||||
[test_samples * i for i in range(1, 1 + len(predict_submission_dates))],
|
||||
axis=0,
|
||||
|
@ -110,12 +650,22 @@ def test_summarize_non_overlapping_day():
|
|||
|
||||
|
||||
def test_summarize_non_overlapping_month():
|
||||
# choose arbitrary year for the start and end dates
|
||||
# two full months (Jan and Feb )
|
||||
# are in the observed data, the number of days (31 and 28 days respectively)
|
||||
# in each month is used in the checks
|
||||
observed_start_date = "2124-01-01"
|
||||
observed_end_date = "2124-02-28"
|
||||
|
||||
# two full months (April and May )
|
||||
# are in the observed data, the number of days (28 and 31 days respectively)
|
||||
# in each month is used in the checks
|
||||
predict_start_date = "2124-04-01"
|
||||
predict_end_date = "2124-05-31"
|
||||
|
||||
print(observed_start_date, observed_end_date)
|
||||
print(predict_start_date, predict_end_date)
|
||||
|
||||
forecast = ProphetForecast(
|
||||
model_type="test",
|
||||
parameters=DotMap(),
|
||||
|
@ -229,11 +779,11 @@ def test_summarize_non_overlapping_month():
|
|||
|
||||
|
||||
def test_summarize_overlapping_day():
|
||||
observed_start_date = "2124-01-01"
|
||||
observed_end_date = "2124-02-01"
|
||||
observed_start_date = TEST_DATE_STR
|
||||
observed_end_date = (TEST_DATE + relativedelta(months=1)).strftime("%Y-%m-%d")
|
||||
|
||||
predict_start_date = "2124-01-01"
|
||||
predict_end_date = "2124-02-01"
|
||||
predict_start_date = TEST_DATE_STR
|
||||
predict_end_date = (TEST_DATE + relativedelta(months=1)).strftime("%Y-%m-%d")
|
||||
|
||||
forecast = ProphetForecast(
|
||||
model_type="test",
|
||||
|
@ -255,10 +805,15 @@ def test_summarize_overlapping_day():
|
|||
}
|
||||
)
|
||||
|
||||
# there are the samples generated
|
||||
# the mean and median are the aggregates used
|
||||
test_samples = np.array([1, 1, 2, 3, 5, 8, 13])
|
||||
test_mean = np.mean(test_samples)
|
||||
test_median = np.median(test_samples)
|
||||
|
||||
# mean and median scale with a factor
|
||||
# so a factor is multiplied on to make sure the aggregation is working
|
||||
# across rows properly
|
||||
forecast_array = np.stack(
|
||||
[test_samples * i for i in range(1, 1 + len(predict_submission_dates))],
|
||||
axis=0,
|
||||
|
@ -334,6 +889,10 @@ def test_summarize_overlapping_day():
|
|||
|
||||
|
||||
def test_summarize_overlapping_month():
|
||||
# choose arbitrary year for the start and end dates
|
||||
# two full months (Jan and Feb )
|
||||
# are in the observed data, the number of days (31 and 28 days respectively)
|
||||
# in each month is used in the checks
|
||||
observed_start_date = "2124-01-01"
|
||||
observed_end_date = "2124-02-28"
|
||||
|
||||
|
@ -360,10 +919,15 @@ def test_summarize_overlapping_month():
|
|||
}
|
||||
)
|
||||
|
||||
# there are the samples generated
|
||||
# the mean and median are the aggregates used
|
||||
test_samples = np.array([1, 1, 2, 3, 5, 8, 13])
|
||||
test_mean = np.mean(test_samples)
|
||||
test_median = np.median(test_samples)
|
||||
|
||||
# mean and median scale with a factor
|
||||
# so a factor is multiplied on to make sure the aggregation is working
|
||||
# across rows properly
|
||||
forecast_array = np.stack(
|
||||
[test_samples] * len(predict_submission_dates),
|
||||
axis=0,
|
||||
|
|
Загрузка…
Ссылка в новой задаче