This commit is contained in:
m-d-bowerman 2024-08-14 07:30:33 -07:00
Родитель b75c46a10b
Коммит 18dead8c1b
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: DA9D3625B1E7678A
1 изменённых файлов: 26 добавлений и 8 удалений

Просмотреть файл

@ -42,16 +42,18 @@ class ScalarForecast(BaseForecast):
if all(pd.to_datetime(self.observed_df["submission_date"]).dt.day == 1):
self.start_date = self._default_start_date_monthly
if self.metric_hub is None:
# this is used to avoid the code below for testing purposes
return
# Get the list of adjustments for the metric slug being forecasted. That
## slug must be a key in scalar_adjustments.yaml; otherwise, this will raise a KeyError
self.scalar_adjustments = parse_scalar_adjustments(
self.metric_hub.slug, self.start_date
)
# Construct a DataFrame containing all combination of segment values in the observed_df
self.combination_df = self.observed_df[
self.metric_hub.segments.keys()
].drop_duplicates()
self._prep_class_dataframes(self.observed_df, self.metric_hub.segments.keys())
# Set up the columns to be used to join the observed_df to the forecast_df in subsequent
## methods
@ -61,10 +63,6 @@ class ScalarForecast(BaseForecast):
## covariates in the future
self.observed_df.rename(columns={"value": self.metric_hub.alias}, inplace=True)
# Cross join to the dates_to_predict DataFrame to create a DataFrame that contains a row
## for each forecast date for each segment
self.forecast_df = self.dates_to_predict.merge(self.combination_df, how="cross")
@property
def period_names_map(self) -> Dict[str, pd.DateOffset]:
"""
@ -80,6 +78,26 @@ class ScalarForecast(BaseForecast):
"""The first day after the last date in the observed dataset."""
return self.observed_df["submission_date"].max() + pd.DateOffset(months=1)
def _prep_class_dataframes(self, observed_df: pd.DataFrame, segment_column_list: List) -> None:
"""
Prepares the dataframes necessary to identify segment combinations and hold results
of scalar forecasting.
Args:
observed_df (pd.DataFrame): dataframe containing observed data used to model
must contain columns specified in the keys of the segments section of the config
segment_column_list (list): list of columns of observed_df to use to determine segments
"""
# Construct a DataFrame containing all combination of segment values in the observed_df
self.combination_df = observed_df[
segment_column_list
].drop_duplicates()
# Cross join to the dates_to_predict DataFrame to create a DataFrame that contains a row
## for each forecast date for each segment
self.forecast_df = self.dates_to_predict.merge(self.combination_df, how="cross")
def _parse_formula_for_over_period_changes(self) -> Dict | None:
"""
Find period-over-period metric specifications in provided formula. If present, create a dict that