added more tests to prophet_forecast (#264)

* refactored base_forecast and prophet_forecast to enable easier testing

* Apply suggestions from code review

change signatures of `fit` and `predict` to take arguments that default to attributes

Co-authored-by: Brad Ochocki Szasz <bochocki@mozilla.com>

* add test for fit

* revert signatures

* made timezone-aware stamps naive

* finished base_forecast tests

* added tests for prophet class

* linting

* fixed divide by zero

* linting again

* adding tests to funnel_forecast

* added tests for funnel_forecast

* feat(workday):remove unwanted fields (#249)

Co-authored-by: Julio Cezar Moscon <jcmoscon@gmail.com>

* fix(exit):Added sys.exit() call (#250)

Co-authored-by: Julio Cezar Moscon <jcmoscon@gmail.com>

* fix issue with call to _get_crossvalidation_metric

* fixed type check

* added string case to aggregate_to_period and added tests

* revert file

* added more tests to prophet_forecast

* Update jobs/kpi-forecasting/kpi_forecasting/models/base_forecast.py

Co-authored-by: Brad Ochocki Szasz <bochocki@mozilla.com>

* Brad easy fixes

* remove magic year

* feat(code):increasing the max_limit from 10 to 40. (#259)

Co-authored-by: Julio Cezar Moscon <jcmoscon@gmail.com>

* typo

* revert bugfix in _add_regressors

* update tests to reflect reversion

---------

Co-authored-by: Brad Ochocki Szasz <bochocki@mozilla.com>
Co-authored-by: JCMOSCON1976 <167822375+JCMOSCON1976@users.noreply.github.com>
Co-authored-by: Julio Cezar Moscon <jcmoscon@gmail.com>
Co-authored-by: m-d-bowerman <mbowerman@mozilla.com>
This commit is contained in:
Jared Snyder 2024-08-14 09:30:00 -05:00 коммит произвёл GitHub
Родитель bae0202d0b
Коммит 9a2bc3a34e
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
5 изменённых файлов: 844 добавлений и 197 удалений

Просмотреть файл

@ -189,7 +189,7 @@ class BaseForecast(abc.ABC):
Returns:
pd.DataFrame: metric dataframe for all metrics and aggregations
"""
self.summary_df = pd.concat(
summary_df = pd.concat(
[
self._summarize(
self.forecast_df,
@ -202,4 +202,21 @@ class BaseForecast(abc.ABC):
]
)
# add Metric Hub metadata columns
summary_df["metric_alias"] = self.metric_hub.alias.lower()
summary_df["metric_hub_app_name"] = self.metric_hub.app_name.lower()
summary_df["metric_hub_slug"] = self.metric_hub.slug.lower()
summary_df["metric_start_date"] = pd.to_datetime(self.metric_hub.min_date)
summary_df["metric_end_date"] = pd.to_datetime(self.metric_hub.max_date)
summary_df["metric_collected_at"] = self.collected_at
# add forecast model metadata columns
summary_df["forecast_start_date"] = self.start_date
summary_df["forecast_end_date"] = self.end_date
summary_df["forecast_trained_at"] = self.trained_at
summary_df["forecast_predicted_at"] = self.predicted_at
summary_df["forecast_parameters"] = self.metadata_params
self.summary_df = summary_df
return self.summary_df

Просмотреть файл

@ -20,15 +20,20 @@ class ProphetForecast(BaseForecast):
def column_names_map(self) -> Dict[str, str]:
return {"submission_date": "ds", "value": "y"}
def _fit(self, observed_df) -> None:
self.model = prophet.Prophet(
**self.parameters,
def _build_model(self, parameter_dict):
model = prophet.Prophet(
**parameter_dict,
uncertainty_samples=self.number_of_simulations,
mcmc_samples=0,
)
if self.use_holidays:
self.model.add_country_holidays(country_name="US")
model.add_country_holidays(country_name="US")
return model
def _fit(self, observed_df) -> None:
self.model = self._build_model(self.parameters)
# Modify observed data to have column names that Prophet expects, and fit
# the model
@ -235,24 +240,6 @@ class ProphetForecast(BaseForecast):
# add summary metadata columns
df["aggregation_period"] = period.lower()
# reorder columns to make interpretation easier
df = df[["submission_date", "aggregation_period", "source", "measure", "value"]]
# add Metric Hub metadata columns
df["metric_alias"] = self.metric_hub.alias.lower()
df["metric_hub_app_name"] = self.metric_hub.app_name.lower()
df["metric_hub_slug"] = self.metric_hub.slug.lower()
df["metric_start_date"] = pd.to_datetime(self.metric_hub.min_date)
df["metric_end_date"] = pd.to_datetime(self.metric_hub.max_date)
df["metric_collected_at"] = self.collected_at
# add forecast model metadata columns
df["forecast_start_date"] = self.start_date
df["forecast_end_date"] = self.end_date
df["forecast_trained_at"] = self.trained_at
df["forecast_predicted_at"] = self.predicted_at
df["forecast_parameters"] = self.metadata_params
return df
def _summarize_legacy(self) -> pd.DataFrame:

Просмотреть файл

@ -1,14 +1,25 @@
from typing import List
import collections
from datetime import date, datetime
from dateutil.relativedelta import relativedelta
import pytest
import pandas as pd
from dotmap import DotMap
import numpy as np
from datetime import datetime, timedelta, timezone
from datetime import timedelta, timezone
from kpi_forecasting.models.base_forecast import BaseForecast
# Arbitrarily choose some date to use for the tests
TEST_DATE = date(2024, 1, 1)
TEST_DATE_STR = TEST_DATE.strftime("%Y-%m-%d")
TEST_DATE_NEXT_DAY = date(2024, 1, 2)
TEST_DATE_NEXT_DAY_STR = TEST_DATE_NEXT_DAY.strftime("%Y-%m-%d")
TEST_PREDICT_END = TEST_DATE + relativedelta(months=2)
TEST_PREDICT_END_STR = TEST_PREDICT_END.strftime("%Y-%m-%d")
class BadClass(BaseForecast):
pass
@ -29,8 +40,9 @@ def good_class():
self.observed_df = pd.DataFrame(
{
"submission_date": [
pd.to_datetime("2020-01-01"),
pd.to_datetime("1990-01-01"),
TEST_DATE,
TEST_DATE
- relativedelta(years=1), # just an arbitrary date in the past
]
}
)
@ -76,8 +88,8 @@ def test_not_implemented():
def test_post_init(good_class):
start_date = "2124-01-01"
end_date = "2124-02-02"
start_date = TEST_DATE_STR
end_date = TEST_PREDICT_END_STR
good_class = good_class(
model_type="test",
parameters=DotMap(),
@ -108,7 +120,7 @@ def test_post_init_default_dates(good_class):
)
# this is the max date of the self.observed_data['submission_date'] plus one day
# from the object definion
start_date = pd.to_datetime("2020-01-02")
start_date = TEST_DATE_NEXT_DAY
end_date = (
datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(weeks=78)
).date()
@ -123,15 +135,15 @@ def test_fit(good_class):
model_type="test",
parameters=DotMap(),
use_holidays=None,
start_date="2124-01-01",
end_date="2124-02-02",
start_date=TEST_DATE_STR,
end_date=TEST_PREDICT_END_STR,
metric_hub=None,
)
good_class.fit()
assert good_class.model
#
assert good_class.model.is_fit == pd.to_datetime("2020-01-01")
# model sets is_fit to the largest day in the observed data
assert good_class.model.is_fit == TEST_DATE
def test_predict_and_validate(good_class):
@ -139,8 +151,8 @@ def test_predict_and_validate(good_class):
model_type="test",
parameters=DotMap(),
use_holidays=None,
start_date="2124-01-01",
end_date="2124-02-02",
start_date=TEST_DATE_STR,
end_date=TEST_PREDICT_END_STR,
metric_hub=None,
)
# overwrite date range set in __post_init__
@ -154,12 +166,24 @@ def test_summarize(good_class):
model_type="test",
parameters=DotMap(),
use_holidays=None,
start_date="2124-01-01",
end_date="2124-02-02",
start_date=TEST_DATE_STR,
end_date=TEST_PREDICT_END_STR,
metric_hub=None,
)
good_class.forecast_df = np.array([1, 2])
good_class.observed_df = np.array([3, 4])
MetricHub = collections.namedtuple(
"MetricHub",
["alias", "app_name", "slug", "min_date", "max_date"],
)
dummy_metric_hub = MetricHub("", "", "", TEST_DATE_STR, TEST_DATE_STR)
# add it here rather than in __init__ so it doesn't try to load data
good_class.metric_hub = dummy_metric_hub
good_class.trained_at = ""
good_class.predicted_at = ""
number_val = 10
output = good_class.summarize(
periods=["a", "b", "c"], numpy_aggregations=["sum"], percentiles=["percentiles"]
@ -170,5 +194,27 @@ def test_summarize(good_class):
for el in ["a", "b", "c"]
]
)
assert output.reset_index(drop=True).equals(expected_output)
assert good_class.summary_df.reset_index(drop=True).equals(expected_output)
# not going to check all the metadata columns
# in assert_frame_equal. Just make sure they're there
metadata_columns = {
"metric_alias",
"metric_hub_app_name",
"metric_hub_slug",
"metric_start_date",
"metric_end_date",
"metric_collected_at",
"forecast_start_date",
"forecast_end_date",
"forecast_trained_at",
"forecast_predicted_at",
"forecast_parameters",
}
assert set(expected_output.columns) | metadata_columns == set(output.columns)
pd.testing.assert_frame_equal(
output[expected_output.columns].reset_index(drop=True), expected_output
)
pd.testing.assert_frame_equal(
good_class.summary_df[expected_output.columns].reset_index(drop=True),
expected_output,
)

Просмотреть файл

@ -1,6 +1,8 @@
"""tests for the funnel forecast module"""
import collections
from datetime import date, datetime
from dateutil.relativedelta import relativedelta
import pandas as pd
from dotmap import DotMap
@ -11,13 +13,21 @@ import numpy as np
from kpi_forecasting.configs.model_inputs import ProphetRegressor, ProphetHoliday
from kpi_forecasting.models.funnel_forecast import SegmentModelSettings, FunnelForecast
# Arbitrarily choose some date to use for the tests
TEST_DATE = date(2024, 1, 1)
TEST_DATE_STR = TEST_DATE.strftime("%Y-%m-%d")
TEST_DATE_NEXT_DAY = date(2024, 1, 2)
TEST_DATE_NEXT_DAY_STR = TEST_DATE_NEXT_DAY.strftime("%Y-%m-%d")
TEST_PREDICT_END = TEST_DATE + relativedelta(months=2)
TEST_PREDICT_END_STR = TEST_PREDICT_END.strftime("%Y-%m-%d")
@pytest.fixture()
def forecast():
"""This mocks a generic forecast object"""
# 2024-01-01 is arbitarily chosen as a future date
predict_start_date = "2124-01-01"
predict_end_date = "2124-03-01"
predict_start_date = TEST_DATE_STR
predict_end_date = TEST_PREDICT_END_STR
forecast = FunnelForecast(
model_type="test",
@ -37,8 +47,8 @@ def segment_info_fit_tests():
in the functions that test fit methods"""
# 2024-01-01 is arbitarily chosen as a future date
A1_start_date = "2124-01-01"
A2_start_date = "2124-01-02"
A1_start_date = TEST_DATE_STR
A2_start_date = TEST_DATE_NEXT_DAY_STR
segment_info_dict = {
"A1": {
@ -83,9 +93,8 @@ def funnel_forecast_for_fit_tests(segment_info_fit_tests, mocker):
}
parameter_dotmap = DotMap(parameter_dict)
predict_start_date = "2124-01-01"
predict_end_date = "2124-01-02"
predict_start_date = TEST_DATE_STR
predict_end_date = TEST_DATE_NEXT_DAY_STR
forecast = FunnelForecast(
model_type="test",
parameters=parameter_dotmap,
@ -178,8 +187,8 @@ def test_combine_forecast_observed(mocker, forecast):
forecast_df = pd.DataFrame(
{
"submission_date": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
@ -187,8 +196,8 @@ def test_combine_forecast_observed(mocker, forecast):
observed_df = pd.DataFrame(
{
"submission_date": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"a": ["A1", "A1"],
"value": [5, 6],
@ -238,8 +247,8 @@ def test_under_summarize(mocker, forecast):
forecast_df = pd.DataFrame(
{
"submission_date": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
@ -249,11 +258,11 @@ def test_under_summarize(mocker, forecast):
observed_df = pd.DataFrame(
{
"submission_date": [
pd.to_datetime("2123-01-01").date(),
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE - relativedelta(months=1),
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"a": ["A1", "A1", "A1", "A2", "A2"],
"value": [10, 20, 30, 40, 50],
@ -265,7 +274,7 @@ def test_under_summarize(mocker, forecast):
["start_date", "forecast_df", "segment", "trained_parameters"],
)
dummy_segment_settings = SegmentSettings(
start_date="2124-01-01",
start_date=TEST_DATE_STR,
forecast_df=forecast_df.copy(),
segment={"a": "A1"},
trained_parameters={"trained_parameters": "yes"},
@ -288,8 +297,8 @@ def test_under_summarize(mocker, forecast):
observed_expected_df = pd.DataFrame(
{
"submission_date": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"a": ["A1", "A1"],
"value": [20, 30],
@ -334,7 +343,7 @@ def test_summarize(mocker, forecast):
["alias", "app_name", "slug", "min_date", "max_date"],
)
dummy_metric_hub = MetricHub("", "", "", "2124-01-01", "2124-01-01")
dummy_metric_hub = MetricHub("", "", "", TEST_DATE_STR, TEST_DATE_STR)
# forecast predictions are set with the
# mock_aggregate_forecast_observed function so they
@ -342,8 +351,8 @@ def test_summarize(mocker, forecast):
forecast_df = pd.DataFrame(
{
"submission_date": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
@ -353,11 +362,11 @@ def test_summarize(mocker, forecast):
observed_df = pd.DataFrame(
{
"submission_date": [
pd.to_datetime("2123-01-01").date(),
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE - relativedelta(months=1),
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"a": ["A1", "A1", "A1", "A2", "A2"],
"value": [10, 20, 30, 40, 50],
@ -373,7 +382,7 @@ def test_summarize(mocker, forecast):
# we're only testing that it is concatenated properly
# with the segment data added
dummy_segment_settings_A1 = SegmentSettings(
start_date="2124-01-01",
start_date=TEST_DATE_STR,
forecast_df=forecast_df.copy(),
segment={"a": "A1"},
trained_parameters={"trained_parameters": "yes"},
@ -381,7 +390,7 @@ def test_summarize(mocker, forecast):
)
dummy_segment_settings_A2 = SegmentSettings(
start_date="2124-01-01",
start_date=TEST_DATE_STR,
forecast_df=forecast_df.copy(),
segment={"a": "A2"},
trained_parameters={"trained_parameters": "yes"},
@ -418,10 +427,10 @@ def test_summarize(mocker, forecast):
observed_expected_df = pd.DataFrame(
{
"submission_date": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"a": ["A1", "A1", "A2", "A2"],
"value": [20, 30, 40, 50],
@ -491,8 +500,8 @@ def test_summarize(mocker, forecast):
def test_under_predict(mocker):
"""testing _predict"""
# set segment models
# 2124-01-01 chosen as a artibrary date to center tests on
A1_start_date = "2124-01-01"
A1_start_date = TEST_DATE_STR
parameter_dict = {
"model_setting_split_dim": "a",
"segment_settings": {
@ -508,8 +517,8 @@ def test_under_predict(mocker):
}
parameter_dotmap = DotMap(parameter_dict)
predict_start_date = "2124-01-02"
predict_end_date = "2124-03-01"
predict_start_date = TEST_DATE_NEXT_DAY_STR
predict_end_date = TEST_PREDICT_END_STR
forecast = FunnelForecast(
model_type="test",
@ -535,8 +544,8 @@ def test_under_predict(mocker):
"b": ["B1", "B2"],
"y": [0, 1],
"submission_date": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
@ -557,8 +566,8 @@ def test_under_predict(mocker):
dates_to_predict = pd.DataFrame(
{
"submission_date": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
]
}
)
@ -574,8 +583,8 @@ def test_under_predict(mocker):
{
0: [0, model_value],
"submission_date": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
@ -623,10 +632,10 @@ def test_predict(funnel_forecast_for_fit_tests, segment_info_fit_tests):
"b": ["B1", "B2", "B1", "B2"],
"y": [-1, 1, -1, 1],
"submission_date": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
@ -652,8 +661,8 @@ def test_predict(funnel_forecast_for_fit_tests, segment_info_fit_tests):
{
0: [0, model_value],
"submission_date": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
@ -664,7 +673,7 @@ def test_predict(funnel_forecast_for_fit_tests, segment_info_fit_tests):
expected_raw["submission_date"]
>= pd.to_datetime(funnel_forecast_for_fit_tests.start_date).date()
)
expected = expected_raw[expected_time_filter]
expected = expected_raw[expected_time_filter].reset_index(drop=True)
forecast_df = segment.forecast_df
pd.testing.assert_frame_equal(forecast_df, expected)
@ -717,8 +726,8 @@ def test_auto_tuning(forecast, mocker):
# set one segment with two sets of grid parameters
segment_settings = SegmentModelSettings(
segment={"a": "A1"},
start_date="2124-01-01",
end_date="2124-03-01",
start_date=TEST_DATE_STR,
end_date=TEST_PREDICT_END_STR,
holidays=[],
regressors=[],
grid_parameters={"param1": [1, 2], "param2": [20, 10]},
@ -738,8 +747,8 @@ def test_auto_tuning(forecast, mocker):
"a": ["A1", "A1"],
"b": ["B1", "B2"],
"submission_date": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-01").date(),
TEST_DATE,
TEST_DATE,
],
}
)
@ -760,10 +769,10 @@ def test_under_fit(funnel_forecast_for_fit_tests, segment_info_fit_tests):
"a": ["A1", "A1", "A2", "A2"],
"b": ["B1", "B2", "B1", "B2"],
"submission_date": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
@ -807,10 +816,10 @@ def test_fit(funnel_forecast_for_fit_tests, segment_info_fit_tests):
"a": ["A1", "A1", "A2", "A2"],
"b": ["B1", "B2", "B1", "B2"],
"submission_date": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
@ -872,8 +881,8 @@ def test_set_segment_models():
}
parameter_dotmap = DotMap(parameter_dict)
predict_start_date = "2124-01-01"
predict_end_date = "2124-03-01"
predict_start_date = TEST_DATE_STR
predict_end_date = TEST_PREDICT_END_STR
forecast = FunnelForecast(
model_type="test",
@ -951,8 +960,8 @@ def test_set_segment_models_exception():
}
parameter_dotmap = DotMap(parameter_dict)
predict_start_date = "2124-01-01"
predict_end_date = "2124-03-01"
predict_start_date = TEST_DATE_STR
predict_end_date = TEST_PREDICT_END_STR
forecast = FunnelForecast(
model_type="test",
@ -982,6 +991,14 @@ def test_fill_regressor_dates(forecast):
"""test _fill_regressor_dates
the name in the regressor info indicates which case is being tested
Dates are chosen arbitrarily"""
# get the set start and end dates for the forecast fixture
# as datetime objects
default_start_datetime = datetime(TEST_DATE.year, TEST_DATE.month, TEST_DATE.day)
default_end_datetime = datetime(
TEST_PREDICT_END.year, TEST_PREDICT_END.month, TEST_PREDICT_END.day
)
# set the start date with an arbitrary date
regressor_info = {
"name": "only_start",
"description": "only has a start",
@ -990,8 +1007,11 @@ def test_fill_regressor_dates(forecast):
regressor = ProphetRegressor(**regressor_info)
forecast._fill_regressor_dates(regressor)
assert regressor.start_date == pd.to_datetime("2020-08-15")
assert regressor.end_date == pd.to_datetime("2124-03-01")
# this is the end dat for the forecast fixture
assert regressor.end_date == default_end_datetime
# set the end date with an arbitrary date
regressor_info = {
"name": "only_end",
"description": "only has a end",
@ -999,9 +1019,11 @@ def test_fill_regressor_dates(forecast):
}
regressor = ProphetRegressor(**regressor_info)
forecast._fill_regressor_dates(regressor)
assert regressor.start_date == pd.to_datetime("2124-01-01")
# the start date for the forecast fixture is TEST_DATE
assert regressor.start_date == default_start_datetime
assert regressor.end_date == pd.to_datetime("2125-08-15")
# set both the start and end dates to arbitrary dates
regressor_info = {
"name": "both",
"description": "only has a start",
@ -1013,15 +1035,17 @@ def test_fill_regressor_dates(forecast):
assert regressor.start_date == pd.to_datetime("2020-08-15")
assert regressor.end_date == pd.to_datetime("2020-09-15")
# use the defaults for both
regressor_info = {
"name": "neither",
"description": "nothin to see here",
}
regressor = ProphetRegressor(**regressor_info)
forecast._fill_regressor_dates(regressor)
assert regressor.start_date == pd.to_datetime("2124-01-01")
assert regressor.end_date == pd.to_datetime("2124-03-01")
assert regressor.start_date == default_start_datetime
assert regressor.end_date == default_end_datetime
# use arbitrary out of order dates to set
regressor_info = {
"name": "out_of_order",
"description": "best better break",
@ -1039,6 +1063,11 @@ def test_fill_regressor_dates(forecast):
def test_add_regressors(forecast):
"""test add regressors
test case for each element of regressor_list_raw is indicated in name"""
# choose arbitrary dates for dates
# name indicates the relationship of the window
# to the timeframe of the data as defined in the ds
# column of df below
regressor_list_raw = [
{
"name": "all_in",
@ -1120,8 +1149,8 @@ def test_build_train_dataframe_no_regressors(forecast):
}
segment_settings = SegmentModelSettings(
segment={"a": 1, "b": 2},
start_date="2124-01-01",
end_date="2124-02-01",
start_date=TEST_DATE_STR,
end_date=TEST_PREDICT_END_STR,
holidays=[],
regressors=[ProphetRegressor(**r) for r in regressor_list],
grid_parameters=grid_parameters,
@ -1134,12 +1163,12 @@ def test_build_train_dataframe_no_regressors(forecast):
"b": [1, 1, 2, 2, 2, 2],
"y": [1, 2, 3, 4, 5, 6],
"submission_date": [
pd.to_datetime("2124-12-01").date(),
pd.to_datetime("2124-12-02").date(),
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
pd.to_datetime("2123-01-01").date(),
pd.to_datetime("2123-01-02").date(),
TEST_DATE - relativedelta(months=1),
TEST_DATE_NEXT_DAY - relativedelta(months=1),
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE + relativedelta(months=1),
TEST_DATE_NEXT_DAY + relativedelta(months=1),
],
}
)
@ -1153,8 +1182,8 @@ def test_build_train_dataframe_no_regressors(forecast):
"b": [2, 2],
"y": [3, 4],
"ds": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
@ -1172,8 +1201,8 @@ def test_build_train_dataframe_no_regressors(forecast):
"b": [2, 2],
"y": [3, 4],
"ds": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"floor": [1.5, 1.5],
"cap": [6.0, 6.0],
@ -1193,20 +1222,24 @@ def test_build_train_dataframe(forecast):
{
"name": "all_in",
"description": "it's all in",
"start_date": "2124-01-01",
"end_date": "2124-01-06",
"start_date": TEST_DATE_STR,
"end_date": (TEST_DATE + relativedelta(days=6)).strftime("%Y-%m-%d"),
},
{
"name": "all_out",
"description": "it's all in",
"start_date": "2124-02-01",
"end_date": "2124-02-06",
"start_date": (TEST_DATE + relativedelta(months=1)).strftime("%Y-%m-%d"),
"end_date": (TEST_DATE + relativedelta(months=1, days=6)).strftime(
"%Y-%m-%d"
),
},
{
"name": "just_end",
"description": "just the second one",
"start_date": "2124-01-02",
"end_date": "2124-02-06",
"start_date": (TEST_DATE + relativedelta(days=1)).strftime("%Y-%m-%d"),
"end_date": (TEST_DATE + relativedelta(months=1, days=6)).strftime(
"%Y-%m-%d"
),
},
]
@ -1226,8 +1259,8 @@ def test_build_train_dataframe(forecast):
}
segment_settings = SegmentModelSettings(
segment={"a": 1, "b": 2},
start_date="2124-01-01",
end_date="2124-02-01",
start_date=TEST_DATE_STR,
end_date=(TEST_DATE + relativedelta(months=1)).strftime("%Y-%m-%d"),
holidays=[],
regressors=[ProphetRegressor(**r) for r in regressor_list],
grid_parameters=grid_parameters,
@ -1240,12 +1273,12 @@ def test_build_train_dataframe(forecast):
"b": [1, 1, 2, 2, 2, 2],
"y": [1, 2, 3, 4, 5, 6],
"submission_date": [
pd.to_datetime("2124-12-01").date(),
pd.to_datetime("2124-12-02").date(),
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
pd.to_datetime("2123-01-01").date(),
pd.to_datetime("2123-01-02").date(),
TEST_DATE - relativedelta(months=1),
TEST_DATE_NEXT_DAY - relativedelta(months=1),
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE + relativedelta(months=1),
TEST_DATE_NEXT_DAY + relativedelta(months=1),
],
}
)
@ -1258,8 +1291,8 @@ def test_build_train_dataframe(forecast):
"b": [2, 2],
"y": [3, 4],
"ds": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"all_in": [0, 0],
"all_out": [
@ -1282,8 +1315,8 @@ def test_build_train_dataframe(forecast):
"b": [2, 2],
"y": [3, 4],
"ds": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"all_in": [0, 0],
"all_out": [1, 1],
@ -1320,8 +1353,8 @@ def test_build_predict_dataframe_no_regressors(forecast):
}
segment_settings = SegmentModelSettings(
segment={"a": 1, "b": 2},
start_date="2124-01-01",
end_date="2124-02-01",
start_date=TEST_DATE_STR,
end_date=TEST_PREDICT_END_STR,
holidays=[],
regressors=[ProphetRegressor(**r) for r in regressor_list],
grid_parameters=grid_parameters,
@ -1334,12 +1367,12 @@ def test_build_predict_dataframe_no_regressors(forecast):
dates_to_predict = pd.DataFrame(
{
"submission_date": [
pd.to_datetime("2124-12-01").date(),
pd.to_datetime("2124-12-02").date(),
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
pd.to_datetime("2123-01-01").date(),
pd.to_datetime("2123-01-02").date(),
TEST_DATE - relativedelta(months=1),
TEST_DATE_NEXT_DAY - relativedelta(months=1),
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
@ -1350,12 +1383,12 @@ def test_build_predict_dataframe_no_regressors(forecast):
expected_predict_df = pd.DataFrame(
{
"ds": [
pd.to_datetime("2124-12-01").date(),
pd.to_datetime("2124-12-02").date(),
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
pd.to_datetime("2123-01-01").date(),
pd.to_datetime("2123-01-02").date(),
TEST_DATE - relativedelta(months=1),
TEST_DATE_NEXT_DAY - relativedelta(months=1),
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
@ -1372,12 +1405,12 @@ def test_build_predict_dataframe_no_regressors(forecast):
expected_predict_wlog_df = pd.DataFrame(
{
"ds": [
pd.to_datetime("2124-12-01").date(),
pd.to_datetime("2124-12-02").date(),
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
pd.to_datetime("2123-01-01").date(),
pd.to_datetime("2123-01-02").date(),
TEST_DATE - relativedelta(months=1),
TEST_DATE_NEXT_DAY - relativedelta(months=1),
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"floor": [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0],
"cap": [10.0, 10.0, 10.0, 10.0, 10.0, 10.0],
@ -1397,20 +1430,24 @@ def test_build_predict_dataframe(forecast):
{
"name": "all_in",
"description": "it's all in",
"start_date": "2124-01-01",
"end_date": "2124-01-06",
"start_date": TEST_DATE_STR,
"end_date": (TEST_DATE + relativedelta(days=6)).strftime("%Y-%m-%d"),
},
{
"name": "all_out",
"description": "it's all in",
"start_date": "2124-02-01",
"end_date": "2124-02-06",
"start_date": (TEST_DATE + relativedelta(months=1)).strftime("%Y-%m-%d"),
"end_date": (TEST_DATE + relativedelta(months=1, days=6)).strftime(
"%Y-%m-%d"
),
},
{
"name": "just_end",
"description": "just the second one",
"start_date": "2124-01-02",
"end_date": "2124-02-06",
"start_date": (TEST_DATE + relativedelta(days=1)).strftime("%Y-%m-%d"),
"end_date": (TEST_DATE + relativedelta(months=1, days=6)).strftime(
"%Y-%m-%d"
),
},
]
@ -1430,8 +1467,8 @@ def test_build_predict_dataframe(forecast):
}
segment_settings = SegmentModelSettings(
segment={"a": 1, "b": 2},
start_date="2124-01-01",
end_date="2124-02-01",
start_date=TEST_DATE_STR,
end_date=TEST_PREDICT_END_STR,
holidays=[],
regressors=[ProphetRegressor(**r) for r in regressor_list],
grid_parameters=grid_parameters,
@ -1443,10 +1480,7 @@ def test_build_predict_dataframe(forecast):
dates_to_predict = pd.DataFrame(
{
"submission_date": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
],
"submission_date": [TEST_DATE, TEST_DATE_NEXT_DAY],
}
)
@ -1456,10 +1490,7 @@ def test_build_predict_dataframe(forecast):
)
expected_train_df = pd.DataFrame(
{
"ds": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
],
"ds": [TEST_DATE, TEST_DATE_NEXT_DAY],
"all_in": [0, 0],
"all_out": [1, 1],
"just_end": [1, 0],
@ -1477,10 +1508,7 @@ def test_build_predict_dataframe(forecast):
)
expected_train_wlog_df = pd.DataFrame(
{
"ds": [
pd.to_datetime("2124-01-01").date(),
pd.to_datetime("2124-01-02").date(),
],
"ds": [TEST_DATE, TEST_DATE_NEXT_DAY],
"all_in": [0, 0],
"all_out": [1, 1],
"just_end": [1, 0],
@ -1503,23 +1531,28 @@ def test_build_model(forecast):
{
"name": "all_in",
"description": "it's all in",
"start_date": "2124-01-01",
"end_date": "2124-01-06",
"start_date": TEST_DATE_STR,
"end_date": (TEST_DATE + relativedelta(days=6)).strftime("%Y-%m-%d"),
},
{
"name": "all_out",
"description": "it's all in",
"start_date": "2124-02-01",
"end_date": "2124-02-06",
"start_date": (TEST_DATE + relativedelta(months=1)).strftime("%Y-%m-%d"),
"end_date": (TEST_DATE + relativedelta(months=1, days=6)).strftime(
"%Y-%m-%d"
),
},
{
"name": "just_end",
"description": "just the second one",
"start_date": "2124-01-02",
"end_date": "2124-02-06",
"start_date": (TEST_DATE + relativedelta(days=1)).strftime("%Y-%m-%d"),
"end_date": (TEST_DATE + relativedelta(months=1, days=6)).strftime(
"%Y-%m-%d"
),
},
]
# use holidays from holiday config file
holiday_list = {
"easter": {
"name": "easter",
@ -1568,8 +1601,8 @@ def test_build_model(forecast):
}
segment_settings = SegmentModelSettings(
segment={"a": 1, "b": 2},
start_date="2124-01-01",
end_date="2124-02-01",
start_date=TEST_DATE_STR,
end_date=TEST_PREDICT_END_STR,
holidays=[ProphetHoliday(**h) for h in holiday_list.values()],
regressors=[ProphetRegressor(**r) for r in regressor_list],
grid_parameters=grid_parameters,

Просмотреть файл

@ -1,17 +1,552 @@
from datetime import date
from dateutil.relativedelta import relativedelta
import pandas as pd
from dotmap import DotMap
import numpy as np
import pytest
import collections
from kpi_forecasting.models.prophet_forecast import ProphetForecast
# Arbitrarily choose some date to use for the tests
TEST_DATE = date(2024, 1, 1)
TEST_DATE_STR = TEST_DATE.strftime("%Y-%m-%d")
TEST_DATE_NEXT_DAY = date(2024, 1, 1)
TEST_DATE_NEXT_DAY_STR = TEST_DATE_NEXT_DAY.strftime("%Y-%m-%d")
@pytest.fixture
def forecast():
A1_start_date = TEST_DATE_STR
parameter_dict = {
"model_setting_split_dim": "a",
"segment_settings": {
"A1": {
"start_date": A1_start_date,
"end_date": None,
"holidays": [],
"regressors": [],
"grid_parameters": {"param1": [1, 2], "param2": [20, 10]},
"cv_settings": {},
},
},
}
parameter_dotmap = DotMap(parameter_dict)
predict_start_date = TEST_DATE_NEXT_DAY_STR
# arbitarily set it a couple months in the future
predict_end_date = (TEST_DATE + relativedelta(months=2)).strftime("%Y-%m-%d")
return ProphetForecast(
model_type="test",
parameters=parameter_dotmap,
use_holidays=None,
start_date=predict_start_date,
end_date=predict_end_date,
metric_hub=None,
)
class MockModel:
"""Used in place of prophet.Prophet for testing purposes"""
def __init__(self, param1=0, param2=0, **kwargs):
self.value = param1 * param2
self.history = None
def fit(self, df, *args, **kwargs):
self.history = df
return None
def predict(self, dates_to_predict):
output = dates_to_predict.copy()
output[
[
"yhat",
"trend",
"trend_upper",
"trend_lower",
"weekly",
"weekly_upper",
"weekly_lower",
"yearly",
"yearly_upper",
"yearly_lower",
]
] = 0 # some dummy value so it has the right shape
return output
def predictive_samples(self, dates_to_predict):
# prophet function outputs dict of numpy arrays
# only element we care about is `yhat`
output = np.arange(len(dates_to_predict)) * self.value
return {"yhat": {0: output}}
def mock_build_model(parameters):
"""mocks the FunnelForecast build_model method"""
return MockModel(
**parameters,
)
def mock_aggregate_forecast_observed(
forecast_df, observed_df, period, numpy_aggregations, percentiles
):
"""Mocks the aggregate_forecast_observed function defined in ProphetForecast
and inherited in FunnelForecast.
This function is tested extensively in test_prophet_forecast
so we can make dummy outputs for tests related to it"""
# add dummy columns where aggregated metrics woudl go
percentile_columns = [f"p{el}" for el in percentiles]
output_forecast_df = forecast_df.copy()
output_forecast_df[numpy_aggregations + percentile_columns] = 0
return output_forecast_df, observed_df.copy()
def test_under_fit(forecast, mocker):
"""test the _fit method"""
observed_data = pd.DataFrame(
{
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
mocker.patch.object(forecast, "_build_model", mock_build_model)
forecast._fit(observed_data)
# checking that history is set in the mocked Model ensures fit was called on it
pd.testing.assert_frame_equal(
observed_data.rename(columns={"submission_date": "ds"}), forecast.model.history
)
def test_fit(forecast, mocker):
"""test the fit function. It is inherited from BaseForecast
and calls _fit with the proper object attributes. Test looks very
similar to that for _fit"""
observed_data = pd.DataFrame(
{
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
mocker.patch.object(forecast, "_build_model", mock_build_model)
forecast.observed_df = observed_data
forecast.fit()
# checking that history is set in the mocked Model ensures fit was called on it
pd.testing.assert_frame_equal(
observed_data.rename(columns={"submission_date": "ds"}), forecast.model.history
)
assert forecast.trained_at is not None
def test_combine_forecast_observed(mocker, forecast):
"""tests the _combine_forecast_observed method"""
# forecast predictions are set with the
# mock_aggregate_forecast_observed function so they
# can be ommited here
forecast_df = pd.DataFrame(
{
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
# rows with negative values are those expected to be removed
# by filters in summarize
observed_df = pd.DataFrame(
{
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"value": [10, 20],
}
)
mocker.patch.object(
forecast, "_aggregate_forecast_observed", mock_aggregate_forecast_observed
)
numpy_aggregations = ["mean"]
percentiles = [10, 50, 90]
output_df = forecast._combine_forecast_observed(
forecast_df,
observed_df,
period="period",
numpy_aggregations=numpy_aggregations,
percentiles=percentiles,
)
observed_expected_df = pd.DataFrame(
{
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"value": [10, 20],
"measure": ["observed", "observed"],
"source": ["historical", "historical"],
}
)
# 4x2 columns, 4 metrics (mean, p10, p50, p90)
forecast_expected_df = pd.DataFrame(
{
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"measure": ["mean", "mean", "p10", "p10", "p50", "p50", "p90", "p90"],
"value": [0] * 8,
"source": ["forecast"] * 8,
}
)
# concat in same order to make our lives easier
expected = pd.concat([observed_expected_df, forecast_expected_df]).sort_values(
["submission_date", "measure"]
)
assert set(expected.columns) == set(output_df.columns)
# force value columns to be floats in both cases to make check easier
numeric_cols = ["value", "value_low", "value_mid", "value_high"]
pd.testing.assert_frame_equal(
output_df.sort_values(["submission_date", "measure"]).reset_index(drop=True),
expected[output_df.columns].reset_index(drop=True),
)
# should not be any nulls outside the metric column
non_metric_columns = [el for el in output_df.columns if el not in numeric_cols]
assert not pd.isna(output_df[non_metric_columns]).any(axis=None)
def test_under_summarize(mocker, forecast):
"""testing _summarize"""
# forecast predictions are set with the
# mock_aggregate_forecast_observed function so they
# can be ommited here
forecast_df = pd.DataFrame(
{
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
# rows with negative values are those expected to be removed
# by filters in summarize
observed_df = pd.DataFrame(
{
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"value": [10, 20],
}
)
mocker.patch.object(
forecast, "_aggregate_forecast_observed", mock_aggregate_forecast_observed
)
numpy_aggregations = ["mean"]
percentiles = [10, 50, 90]
output_df = forecast._summarize(
forecast_df,
observed_df,
period="period",
numpy_aggregations=numpy_aggregations,
percentiles=percentiles,
)
observed_expected_df = pd.DataFrame(
{
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"value": [10, 20],
"measure": ["observed", "observed"],
"source": ["historical", "historical"],
}
)
# 4x2 columns, 4 metrics (mean, p10, p50, p90)
forecast_expected_df = pd.DataFrame(
{
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"measure": ["mean", "mean", "p10", "p10", "p50", "p50", "p90", "p90"],
"value": [0] * 8,
"source": ["forecast"] * 8,
}
)
# concat in same order to make our lives easier
expected = pd.concat([observed_expected_df, forecast_expected_df]).sort_values(
["submission_date", "measure"]
)
expected["aggregation_period"] = "period"
assert set(expected.columns) == set(output_df.columns)
# force value columns to be floats in both cases to make check easier
numeric_cols = ["value", "value_low", "value_mid", "value_high"]
pd.testing.assert_frame_equal(
output_df.sort_values(["submission_date", "measure"]).reset_index(drop=True),
expected[output_df.columns].reset_index(drop=True),
)
# should not be any nulls outside the metric column
non_metric_columns = [el for el in output_df.columns if el not in numeric_cols]
assert not pd.isna(output_df[non_metric_columns]).any(axis=None)
def test_summarize(mocker, forecast):
"""testing summarize"""
# create dummy metric hub object to when meta data from
# it is added we don't get an error
MetricHub = collections.namedtuple(
"MetricHub",
["alias", "app_name", "slug", "min_date", "max_date"],
)
dummy_metric_hub = MetricHub("", "", "", TEST_DATE_STR, TEST_DATE_STR)
# forecast predictions are set with the
# mock_aggregate_forecast_observed function so they
# can be ommited here
forecast_df = pd.DataFrame(
{
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
# rows with negative values are those expected to be removed
# by filters in summarize
observed_df = pd.DataFrame(
{
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"value": [10, 20],
}
)
mocker.patch.object(
forecast, "_aggregate_forecast_observed", mock_aggregate_forecast_observed
)
numpy_aggregations = ["mean"]
percentiles = [10, 50, 90]
forecast.observed_df = observed_df
forecast.forecast_df = forecast_df
forecast.metric_hub = dummy_metric_hub
# timestamp attributes created by fit and predict
# must be added manuall
forecast.collected_at = ""
forecast.trained_at = ""
forecast.predicted_at = ""
forecast.metadata_params = ""
numpy_aggregations = ["mean"]
percentiles = [10, 50, 90]
forecast.summarize(
periods=["period1", "period2"],
numpy_aggregations=numpy_aggregations,
percentiles=percentiles,
)
output_df = forecast.summary_df
observed_expected_df = pd.DataFrame(
{
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"value": [10, 20],
"measure": ["observed", "observed"],
"source": ["historical", "historical"],
}
)
# 4x2 columns, 4 metrics (mean, p10, p50, p90)
forecast_expected_df = pd.DataFrame(
{
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
"measure": ["mean", "mean", "p10", "p10", "p50", "p50", "p90", "p90"],
"value": [0] * 8,
"source": ["forecast"] * 8,
}
)
# concat in same order to make our lives easier
expected = pd.concat([observed_expected_df, forecast_expected_df]).sort_values(
["submission_date", "measure"]
)
expected1 = expected.copy()
expected2 = expected.copy()
expected1["aggregation_period"] = "period1"
expected2["aggregation_period"] = "period2"
expected = pd.concat([expected1, expected2])
# not going to check all the metadata columns
# in assert_frame_equal. Just make sure they're there
metadata_columns = {
"metric_alias",
"metric_hub_app_name",
"metric_hub_slug",
"metric_start_date",
"metric_end_date",
"metric_collected_at",
"forecast_start_date",
"forecast_end_date",
"forecast_trained_at",
"forecast_predicted_at",
"forecast_parameters",
}
assert set(expected.columns) | metadata_columns == set(output_df.columns)
# force value columns to be floats in both cases to make check easier
numeric_cols = ["value", "value_low", "value_mid", "value_high"]
pd.testing.assert_frame_equal(
output_df.sort_values(["submission_date", "aggregation_period", "measure"])[
expected.columns
].reset_index(drop=True),
expected.sort_values(
["submission_date", "aggregation_period", "measure"]
).reset_index(drop=True),
)
# should not be any nulls outside the metric column
non_metric_columns = [el for el in output_df.columns if el not in numeric_cols]
assert not pd.isna(output_df[non_metric_columns]).any(axis=None)
def test_under_predict(mocker, forecast):
"""testing _predict"""
# this ensures forecast is using MockModel
mocker.patch.object(forecast, "_build_model", mock_build_model)
observed_df = pd.DataFrame(
{
"y": [0, 1],
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
dates_to_predict = pd.DataFrame(
{
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
]
}
)
forecast.observed_df = observed_df
forecast.parameters = {"param1": 1, "param2": 2}
forecast.fit()
out = forecast._predict(dates_to_predict).reset_index(drop=True)
# in MockModel, the predictive_samples method sets the output to
# np.arange(len(dates_to_predict)) * self.value for one column called 0
# this helps ensure the forecast_df in segment_models is set properly
expected = pd.DataFrame(
{
0: [0, 2],
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
pd.testing.assert_frame_equal(out, expected)
# test predict while we're here
forecast.dates_to_predict = dates_to_predict
forecast.number_of_simulations = 1 # so that _validate doesn't break
forecast.predict()
out = forecast.forecast_df
# in MockModel, the predictive_samples method sets the output to
# np.arange(len(dates_to_predict)) * self.value for one column called 0
# this helps ensure the forecast_df in segment_models is set properly
expected = pd.DataFrame(
{
0: [0, 2],
"submission_date": [
TEST_DATE,
TEST_DATE_NEXT_DAY,
],
}
)
pd.testing.assert_frame_equal(out, expected)
assert forecast.predicted_at is not None
def test_summarize_non_overlapping_day():
observed_start_date = "2124-01-01"
observed_end_date = "2124-02-01"
observed_start_date = TEST_DATE_STR
observed_end_date = (TEST_DATE + relativedelta(months=1)).strftime("%Y-%m-%d")
predict_start_date = "2124-02-02"
predict_end_date = "2124-03-01"
predict_start_date = (TEST_DATE + relativedelta(months=1, days=1)).strftime(
"%Y-%m-%d"
)
predict_end_date = (TEST_DATE + relativedelta(months=2)).strftime("%Y-%m-%d")
forecast = ProphetForecast(
model_type="test",
@ -33,10 +568,15 @@ def test_summarize_non_overlapping_day():
}
)
# there are the samples generated
# the mean and median are the aggregates used
test_samples = np.array([1, 1, 2, 3, 5, 8, 13])
test_mean = np.mean(test_samples)
test_median = np.median(test_samples)
# mean and median scale with a factor
# so a factor is multiplied on to make sure the aggregation is working
# across rows properly
forecast_array = np.stack(
[test_samples * i for i in range(1, 1 + len(predict_submission_dates))],
axis=0,
@ -110,12 +650,22 @@ def test_summarize_non_overlapping_day():
def test_summarize_non_overlapping_month():
# choose arbitrary year for the start and end dates
# two full months (Jan and Feb )
# are in the observed data, the number of days (31 and 28 days respectively)
# in each month is used in the checks
observed_start_date = "2124-01-01"
observed_end_date = "2124-02-28"
# two full months (April and May )
# are in the observed data, the number of days (28 and 31 days respectively)
# in each month is used in the checks
predict_start_date = "2124-04-01"
predict_end_date = "2124-05-31"
print(observed_start_date, observed_end_date)
print(predict_start_date, predict_end_date)
forecast = ProphetForecast(
model_type="test",
parameters=DotMap(),
@ -229,11 +779,11 @@ def test_summarize_non_overlapping_month():
def test_summarize_overlapping_day():
observed_start_date = "2124-01-01"
observed_end_date = "2124-02-01"
observed_start_date = TEST_DATE_STR
observed_end_date = (TEST_DATE + relativedelta(months=1)).strftime("%Y-%m-%d")
predict_start_date = "2124-01-01"
predict_end_date = "2124-02-01"
predict_start_date = TEST_DATE_STR
predict_end_date = (TEST_DATE + relativedelta(months=1)).strftime("%Y-%m-%d")
forecast = ProphetForecast(
model_type="test",
@ -255,10 +805,15 @@ def test_summarize_overlapping_day():
}
)
# there are the samples generated
# the mean and median are the aggregates used
test_samples = np.array([1, 1, 2, 3, 5, 8, 13])
test_mean = np.mean(test_samples)
test_median = np.median(test_samples)
# mean and median scale with a factor
# so a factor is multiplied on to make sure the aggregation is working
# across rows properly
forecast_array = np.stack(
[test_samples * i for i in range(1, 1 + len(predict_submission_dates))],
axis=0,
@ -334,6 +889,10 @@ def test_summarize_overlapping_day():
def test_summarize_overlapping_month():
# choose arbitrary year for the start and end dates
# two full months (Jan and Feb )
# are in the observed data, the number of days (31 and 28 days respectively)
# in each month is used in the checks
observed_start_date = "2124-01-01"
observed_end_date = "2124-02-28"
@ -360,10 +919,15 @@ def test_summarize_overlapping_month():
}
)
# there are the samples generated
# the mean and median are the aggregates used
test_samples = np.array([1, 1, 2, 3, 5, 8, 13])
test_mean = np.mean(test_samples)
test_median = np.median(test_samples)
# mean and median scale with a factor
# so a factor is multiplied on to make sure the aggregation is working
# across rows properly
forecast_array = np.stack(
[test_samples] * len(predict_submission_dates),
axis=0,