This commit is contained in:
Mike Tokic 2022-07-13 16:43:14 -07:00
Родитель e614e9e831
Коммит 9a193f9682
3 изменённых файлов: 59 добавлений и 28 удалений

Просмотреть файл

@ -485,8 +485,9 @@ construct_forecast_models <- function(full_data_tbl,
# if multivariate models are chosen to run, ensemble models are turned on, and more than one individual model has been run,
# then create enough back test scenarios to train ensemble models, otherwise just run back test scenario input amount and turn ensembles off
if(run_ensemble_models & (length(unique(combined_models_recipe_1$.model_desc))+length(unique(combined_models_recipe_2$.model_desc)))>1 & sum(grepl("-R", c(unique(combined_models_recipe_1$.model_desc), unique(combined_models_recipe_2$.model_desc)))) > 0) {
#slice_limit_amount <- floor(back_test_scenarios*2)
slice_limit_amount <- 100
# slice_limit_amount <- floor(back_test_scenarios*2)
#slice_limit_amount <- 100
slice_limit_amount <- back_test_scenarios
run_ensemble_models <- TRUE
} else {
slice_limit_amount <- back_test_scenarios
@ -636,7 +637,7 @@ construct_forecast_models <- function(full_data_tbl,
}
fcst_tbl <- tibble::tibble()
print(ensemble_train_data_initial)
if(run_ensemble_models) {
cli::cli_h3("Ensemble Model Training")
@ -679,20 +680,45 @@ construct_forecast_models <- function(full_data_tbl,
#create ensemble resamples to train future and back test folds
cli::cli_h3("Refitting Ensemble Models")
ensemble_tscv <- submodels_resample_tscv_tbl %>%
ensemble_tscv_prep <- submodels_resample_tscv_tbl %>%
dplyr::select(-.id) %>%
tidyr::pivot_wider(names_from = "Model", values_from = "FCST") %>%
timetk::time_series_cv(
date_var = Date,
initial = "1 year",
assess = forecast_horizon,
skip = back_test_spacing,
cumulative = FALSE,
slice_limit = back_test_scenarios) %>%
timetk::tk_time_series_cv_plan() %>%
tidyr::pivot_wider(names_from = "Model", values_from = "FCST") #%>%
# timetk::time_series_cv(
# date_var = Date,
# initial = "1 year",
# assess = forecast_horizon,
# skip = back_test_spacing,
# cumulative = FALSE,
# slice_limit = back_test_scenarios) %>%
# timetk::tk_time_series_cv_plan() %>%
# dplyr::mutate(Horizon_char = as.character(Horizon))
# need to create a new function that creates ensemble splits that only uses back test
# fitted forecasts, but the test set in each kfold cv matches the hold out data in
# standard tscv process, with all remaining data used for training (filtering out future data)
ensemble_tscv <- ensemble_tscv_prep %>%
dplyr::filter(Date <= hist_end_date)
rsample::vfold_cv(v = 10) %>%
dplyr::rename(.splits = splits, .id = id) %>%
dplyr::ungroup() %>%
dplyr::mutate(
training = purrr::map(.splits, ~ rsample::training(.x)),
testing = purrr::map(.splits, ~ rsample::testing(.x))
) %>%
dplyr::select(-.splits) %>%
tidyr::gather(-.id, key = ".key", value = ".value", factor_key = TRUE) %>%
tidyr::unnest(.value) %>%
tibble::as_tibble() %>%
rbind(
rbind(
ensemble_tscv_prep %>%
dplyr::mutate(id)
)
)
dplyr::mutate(Horizon_char = as.character(Horizon))
#return(ensemble_tscv)
return(ensemble_tscv)
#Replace NaN/Inf with NA, then replace with zero
is.na(ensemble_tscv) <- sapply(ensemble_tscv, is.infinite)

Просмотреть файл

@ -355,7 +355,7 @@ forecast_time_series <- function(input_data,
stop("error during forecast run function call")
}
return(fcst)
# Adjust for NaNs and Negative Forecasts
fcst <- fcst %>%
get_forecast_negative_adjusted(negative_forecast)

Просмотреть файл

@ -247,7 +247,7 @@ get_resample_tscv <- function(train_data,
get_resample_kfold <-function(train_data){
train_data %>%
rsample::vfold_cv(v = 5)
rsample::vfold_cv(v = 10)
}
#' Get tuning grid with resample
@ -767,6 +767,13 @@ glmnet <- function(train_data,
parsnip::set_engine("glmnet",
lower.limits = 0)
wflw_spec_tune_glmnet <- get_workflow_simple(model_spec_glmnet,
recipe_spec_glmnet)
tune_results_glmnet <- train_data %>%
get_kfold_tune_grid(wflw_spec_tune_glmnet,
parallel)
}else{
recipe_spec_glmnet <- train_data %>%
get_recipie_configurable(fiscal_year_start,
@ -783,20 +790,18 @@ glmnet <- function(train_data,
mixture = tune::tune()
) %>%
parsnip::set_engine("glmnet")
wflw_spec_tune_glmnet <- get_workflow_simple(model_spec_glmnet,
recipe_spec_glmnet)
tune_results_glmnet <- train_data %>%
get_resample_tune_grid(tscv_initial,
horizon,
back_test_spacing,
wflw_spec_tune_glmnet,
parallel)
}
#print(recipe_spec_glmnet %>% recipes::prep() %>% recipes::juice() %>% dplyr::glimpse())
wflw_spec_tune_glmnet <- get_workflow_simple(model_spec_glmnet,
recipe_spec_glmnet)
tune_results_glmnet <- train_data %>%
get_resample_tune_grid(tscv_initial,
horizon,
back_test_spacing,
wflw_spec_tune_glmnet,
parallel)
wflw_fit_glmnet <- train_data %>%
get_fit_wkflw_best(tune_results_glmnet,
wflw_spec_tune_glmnet)