This commit is contained in:
Родитель
e614e9e831
Коммит
9a193f9682
|
@ -485,8 +485,9 @@ construct_forecast_models <- function(full_data_tbl,
|
|||
# if multivariate models are chosen to run, ensemble models are turned on, and more than one individual model has been run,
|
||||
# then create enough back test scenarios to train ensemble models, otherwise just run back test scenario input amount and turn ensembles off
|
||||
if(run_ensemble_models & (length(unique(combined_models_recipe_1$.model_desc))+length(unique(combined_models_recipe_2$.model_desc)))>1 & sum(grepl("-R", c(unique(combined_models_recipe_1$.model_desc), unique(combined_models_recipe_2$.model_desc)))) > 0) {
|
||||
#slice_limit_amount <- floor(back_test_scenarios*2)
|
||||
slice_limit_amount <- 100
|
||||
# slice_limit_amount <- floor(back_test_scenarios*2)
|
||||
#slice_limit_amount <- 100
|
||||
slice_limit_amount <- back_test_scenarios
|
||||
run_ensemble_models <- TRUE
|
||||
} else {
|
||||
slice_limit_amount <- back_test_scenarios
|
||||
|
@ -636,7 +637,7 @@ construct_forecast_models <- function(full_data_tbl,
|
|||
}
|
||||
|
||||
fcst_tbl <- tibble::tibble()
|
||||
|
||||
print(ensemble_train_data_initial)
|
||||
if(run_ensemble_models) {
|
||||
|
||||
cli::cli_h3("Ensemble Model Training")
|
||||
|
@ -679,20 +680,45 @@ construct_forecast_models <- function(full_data_tbl,
|
|||
#create ensemble resamples to train future and back test folds
|
||||
cli::cli_h3("Refitting Ensemble Models")
|
||||
|
||||
ensemble_tscv <- submodels_resample_tscv_tbl %>%
|
||||
ensemble_tscv_prep <- submodels_resample_tscv_tbl %>%
|
||||
dplyr::select(-.id) %>%
|
||||
tidyr::pivot_wider(names_from = "Model", values_from = "FCST") %>%
|
||||
timetk::time_series_cv(
|
||||
date_var = Date,
|
||||
initial = "1 year",
|
||||
assess = forecast_horizon,
|
||||
skip = back_test_spacing,
|
||||
cumulative = FALSE,
|
||||
slice_limit = back_test_scenarios) %>%
|
||||
timetk::tk_time_series_cv_plan() %>%
|
||||
tidyr::pivot_wider(names_from = "Model", values_from = "FCST") #%>%
|
||||
# timetk::time_series_cv(
|
||||
# date_var = Date,
|
||||
# initial = "1 year",
|
||||
# assess = forecast_horizon,
|
||||
# skip = back_test_spacing,
|
||||
# cumulative = FALSE,
|
||||
# slice_limit = back_test_scenarios) %>%
|
||||
# timetk::tk_time_series_cv_plan() %>%
|
||||
# dplyr::mutate(Horizon_char = as.character(Horizon))
|
||||
|
||||
# need to create a new function that creates ensemble splits that only uses back test
|
||||
# fitted forecasts, but the test set in each kfold cv matches the hold out data in
|
||||
# standard tscv process, with all remaining data used for training (filtering out future data)
|
||||
|
||||
ensemble_tscv <- ensemble_tscv_prep %>%
|
||||
dplyr::filter(Date <= hist_end_date)
|
||||
rsample::vfold_cv(v = 10) %>%
|
||||
dplyr::rename(.splits = splits, .id = id) %>%
|
||||
dplyr::ungroup() %>%
|
||||
dplyr::mutate(
|
||||
training = purrr::map(.splits, ~ rsample::training(.x)),
|
||||
testing = purrr::map(.splits, ~ rsample::testing(.x))
|
||||
) %>%
|
||||
dplyr::select(-.splits) %>%
|
||||
tidyr::gather(-.id, key = ".key", value = ".value", factor_key = TRUE) %>%
|
||||
tidyr::unnest(.value) %>%
|
||||
tibble::as_tibble() %>%
|
||||
rbind(
|
||||
rbind(
|
||||
ensemble_tscv_prep %>%
|
||||
dplyr::mutate(id)
|
||||
)
|
||||
)
|
||||
dplyr::mutate(Horizon_char = as.character(Horizon))
|
||||
|
||||
#return(ensemble_tscv)
|
||||
return(ensemble_tscv)
|
||||
|
||||
#Replace NaN/Inf with NA, then replace with zero
|
||||
is.na(ensemble_tscv) <- sapply(ensemble_tscv, is.infinite)
|
||||
|
|
|
@ -355,7 +355,7 @@ forecast_time_series <- function(input_data,
|
|||
stop("error during forecast run function call")
|
||||
|
||||
}
|
||||
|
||||
return(fcst)
|
||||
# Adjust for NaNs and Negative Forecasts
|
||||
fcst <- fcst %>%
|
||||
get_forecast_negative_adjusted(negative_forecast)
|
||||
|
|
31
R/models.R
31
R/models.R
|
@ -247,7 +247,7 @@ get_resample_tscv <- function(train_data,
|
|||
get_resample_kfold <-function(train_data){
|
||||
|
||||
train_data %>%
|
||||
rsample::vfold_cv(v = 5)
|
||||
rsample::vfold_cv(v = 10)
|
||||
}
|
||||
|
||||
#' Get tuning grid with resample
|
||||
|
@ -767,6 +767,13 @@ glmnet <- function(train_data,
|
|||
parsnip::set_engine("glmnet",
|
||||
lower.limits = 0)
|
||||
|
||||
wflw_spec_tune_glmnet <- get_workflow_simple(model_spec_glmnet,
|
||||
recipe_spec_glmnet)
|
||||
|
||||
tune_results_glmnet <- train_data %>%
|
||||
get_kfold_tune_grid(wflw_spec_tune_glmnet,
|
||||
parallel)
|
||||
|
||||
}else{
|
||||
recipe_spec_glmnet <- train_data %>%
|
||||
get_recipie_configurable(fiscal_year_start,
|
||||
|
@ -783,20 +790,18 @@ glmnet <- function(train_data,
|
|||
mixture = tune::tune()
|
||||
) %>%
|
||||
parsnip::set_engine("glmnet")
|
||||
|
||||
wflw_spec_tune_glmnet <- get_workflow_simple(model_spec_glmnet,
|
||||
recipe_spec_glmnet)
|
||||
|
||||
tune_results_glmnet <- train_data %>%
|
||||
get_resample_tune_grid(tscv_initial,
|
||||
horizon,
|
||||
back_test_spacing,
|
||||
wflw_spec_tune_glmnet,
|
||||
parallel)
|
||||
}
|
||||
|
||||
#print(recipe_spec_glmnet %>% recipes::prep() %>% recipes::juice() %>% dplyr::glimpse())
|
||||
|
||||
wflw_spec_tune_glmnet <- get_workflow_simple(model_spec_glmnet,
|
||||
recipe_spec_glmnet)
|
||||
|
||||
tune_results_glmnet <- train_data %>%
|
||||
get_resample_tune_grid(tscv_initial,
|
||||
horizon,
|
||||
back_test_spacing,
|
||||
wflw_spec_tune_glmnet,
|
||||
parallel)
|
||||
|
||||
wflw_fit_glmnet <- train_data %>%
|
||||
get_fit_wkflw_best(tune_results_glmnet,
|
||||
wflw_spec_tune_glmnet)
|
||||
|
|
Загрузка…
Ссылка в новой задаче