|
|
|
@ -72,12 +72,12 @@ get_back_test_scenario_hist_periods <- function(input_tbl,
|
|
|
|
|
#'
|
|
|
|
|
#' @param input_tbl full data table
|
|
|
|
|
#' @param hist_end_date historical end date
|
|
|
|
|
#' @param date_type
|
|
|
|
|
#' @param date_type date type
|
|
|
|
|
#' @param forecast_horizon forecast horizon
|
|
|
|
|
#' @param back_test_scenarios back test scenarios
|
|
|
|
|
#' @param back_test_spacing back test spacing
|
|
|
|
|
#'
|
|
|
|
|
#' @return Returns back_test_scenarios and hist_periods_80
|
|
|
|
|
#' @return Returns table of train test splits
|
|
|
|
|
#' @keywords internal
|
|
|
|
|
#' @export
|
|
|
|
|
train_test_split <- function(input_tbl,
|
|
|
|
@ -182,13 +182,13 @@ train_test_split <- function(input_tbl,
|
|
|
|
|
|
|
|
|
|
#' Gets model workflows
|
|
|
|
|
#'
|
|
|
|
|
#' @param model_recipe_tbl
|
|
|
|
|
#' @param models_to_run
|
|
|
|
|
#' @param models_not_to_run
|
|
|
|
|
#' @param run_deep_learning
|
|
|
|
|
#' @param pca
|
|
|
|
|
#' @param model_recipe_tbl model recipe table
|
|
|
|
|
#' @param models_to_run models to run
|
|
|
|
|
#' @param models_not_to_run models not to run
|
|
|
|
|
#' @param run_deep_learning run deep learning models
|
|
|
|
|
#' @param pca pca
|
|
|
|
|
#'
|
|
|
|
|
#' @return Returns back_test_scenarios and hist_periods_80
|
|
|
|
|
#' @return Returns table of model workflows
|
|
|
|
|
#' @keywords internal
|
|
|
|
|
#' @export
|
|
|
|
|
model_workflows <- function(model_recipe_tbl,
|
|
|
|
@ -392,8 +392,8 @@ model_hyperparameters <- function(model_workflow_tbl,
|
|
|
|
|
#' @param run_local_models run local models
|
|
|
|
|
#' @param global_model_recipes global model recipes
|
|
|
|
|
#' @param combo_variables combo variables
|
|
|
|
|
#' @param parallel_processing
|
|
|
|
|
#' @param num_cores
|
|
|
|
|
#' @param parallel_processing parallel processing
|
|
|
|
|
#' @param num_cores number of cores
|
|
|
|
|
#' @param seed seed number
|
|
|
|
|
#'
|
|
|
|
|
#' @return table
|
|
|
|
@ -579,12 +579,12 @@ tune_hyperparameters <- function(model_recipe_tbl,
|
|
|
|
|
'Cubist', 'earth', 'glmnet', 'kernlab', 'modeltime.gluonts', 'purrr',
|
|
|
|
|
'recipes', 'rules', 'modeltime'),
|
|
|
|
|
function_exports = NULL)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# select the best combination of hyperparameters
|
|
|
|
|
iter_list2 <- iter_list %>%
|
|
|
|
|
dplyr::select(Combo, Model, Recipe_ID) %>%
|
|
|
|
|
dplyr::distinct()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
choose_hyperparameters_fn <- function(x) {
|
|
|
|
|
|
|
|
|
|
combo <- x %>%
|
|
|
|
@ -596,18 +596,24 @@ tune_hyperparameters <- function(model_recipe_tbl,
|
|
|
|
|
recipe <- x %>%
|
|
|
|
|
dplyr::pull(Recipe_ID)
|
|
|
|
|
|
|
|
|
|
if(combo != "All-Data") {
|
|
|
|
|
test_tbl <- initial_tuning_tbl %>%
|
|
|
|
|
dplyr::filter(Combo == combo,
|
|
|
|
|
Recipe_ID == recipe,
|
|
|
|
|
Model == model) %>%
|
|
|
|
|
dplyr::select(Model, Recipe_ID, Hyperparameter_ID, Prediction, Model_Fit)
|
|
|
|
|
} else{
|
|
|
|
|
test_tbl <- initial_tuning_tbl %>%
|
|
|
|
|
dplyr::filter(Recipe_ID == recipe,
|
|
|
|
|
Model == model) %>%
|
|
|
|
|
dplyr::select(Model, Recipe_ID, Hyperparameter_ID, Prediction, Model_Fit)
|
|
|
|
|
}
|
|
|
|
|
test_tbl <- initial_tuning_tbl %>%
|
|
|
|
|
dplyr::filter(Combo == combo,
|
|
|
|
|
Recipe_ID == recipe,
|
|
|
|
|
Model == model) %>%
|
|
|
|
|
dplyr::select(Model, Recipe_ID, Hyperparameter_ID, Train_Test_ID, Prediction, Model_Fit)
|
|
|
|
|
|
|
|
|
|
# if(combo != "All-Data") {
|
|
|
|
|
# test_tbl <- initial_tuning_tbl %>%
|
|
|
|
|
# dplyr::filter(Combo == combo,
|
|
|
|
|
# Recipe_ID == recipe,
|
|
|
|
|
# Model == model) %>%
|
|
|
|
|
# dplyr::select(Model, Recipe_ID, Hyperparameter_ID, Train_Test_ID, Prediction, Model_Fit)
|
|
|
|
|
# } else{
|
|
|
|
|
# test_tbl <- initial_tuning_tbl %>%
|
|
|
|
|
# dplyr::filter(Recipe_ID == recipe,
|
|
|
|
|
# Model == model) %>%
|
|
|
|
|
# dplyr::select(Model, Recipe_ID, Hyperparameter_ID, Train_Test_ID, Prediction, Model_Fit)
|
|
|
|
|
# }
|
|
|
|
|
|
|
|
|
|
best_param <- test_tbl %>%
|
|
|
|
|
dplyr::select(-Model_Fit) %>%
|
|
|
|
@ -631,7 +637,8 @@ tune_hyperparameters <- function(model_recipe_tbl,
|
|
|
|
|
final_predictions <- test_tbl %>%
|
|
|
|
|
dplyr::filter(Hyperparameter_ID == best_param) %>%
|
|
|
|
|
dplyr::select(-Model_Fit) %>%
|
|
|
|
|
tidyr::unnest(Prediction)
|
|
|
|
|
tidyr::unnest(Prediction) %>%
|
|
|
|
|
dplyr::select(Combo, Date, Train_Test_ID, Target, Forecast)
|
|
|
|
|
|
|
|
|
|
return(tibble::tibble(Combo = combo,
|
|
|
|
|
Model = model,
|
|
|
|
@ -662,11 +669,11 @@ tune_hyperparameters <- function(model_recipe_tbl,
|
|
|
|
|
#' @param model_fit_tbl model fit table
|
|
|
|
|
#' @param model_train_test_tbl model train test split table
|
|
|
|
|
#' @param combo_variables combo variables
|
|
|
|
|
#' @param parallel_processing
|
|
|
|
|
#' @param num_cores
|
|
|
|
|
#' @param parallel_processing parallel processing
|
|
|
|
|
#' @param num_cores number of cores
|
|
|
|
|
#' @param seed seed number
|
|
|
|
|
#'
|
|
|
|
|
#' @return table
|
|
|
|
|
#' @return list of individual model predictions and fitted models
|
|
|
|
|
#' @keywords internal
|
|
|
|
|
#' @export
|
|
|
|
|
refit_models <- function(model_fit_tbl,
|
|
|
|
@ -678,7 +685,7 @@ refit_models <- function(model_fit_tbl,
|
|
|
|
|
seed = 123) {
|
|
|
|
|
|
|
|
|
|
iter_list <- model_train_test_tbl %>%
|
|
|
|
|
dplyr::filter(Run_Type %in% c("Future_Forecast", "Back_Test")) %>%
|
|
|
|
|
dplyr::filter(Run_Type %in% c("Future_Forecast", "Back_Test", "Ensemble")) %>%
|
|
|
|
|
dplyr::group_split(dplyr::row_number(), .keep = FALSE) %>%
|
|
|
|
|
purrr::map(.f = function(x) {
|
|
|
|
|
model_fit_tbl %>%
|
|
|
|
@ -690,7 +697,7 @@ refit_models <- function(model_fit_tbl,
|
|
|
|
|
dplyr::bind_rows()
|
|
|
|
|
|
|
|
|
|
fit_model <- function(x) {
|
|
|
|
|
print(x)
|
|
|
|
|
|
|
|
|
|
combo <- x %>%
|
|
|
|
|
dplyr::pull(Combo)
|
|
|
|
|
|
|
|
|
@ -796,32 +803,74 @@ refit_models <- function(model_fit_tbl,
|
|
|
|
|
'recipes', 'rules', 'modeltime'),
|
|
|
|
|
function_exports = NULL)
|
|
|
|
|
|
|
|
|
|
return(model_refit_final_tbl)
|
|
|
|
|
fitted_models <- model_refit_final_tbl %>%
|
|
|
|
|
dplyr::filter(Train_Test_ID == "01") %>%
|
|
|
|
|
dplyr::select(Combo, Model, Recipe_ID, Model_Fit)
|
|
|
|
|
|
|
|
|
|
return(list(Model_Predictions = model_refit_final_tbl %>% dplyr::select(-Model_Fit), Model_Fit = fitted_models))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#' Ensemble Models
|
|
|
|
|
#'
|
|
|
|
|
#' @param model_recipe_tbl model recipe table
|
|
|
|
|
#' @param model_fit_tbl model fit table
|
|
|
|
|
#' @param tuning_tbl hyperparameter tuning predictions
|
|
|
|
|
#' @param refit_tbl individual model predictions
|
|
|
|
|
#' @param model_train_test_tbl model train test split table
|
|
|
|
|
#' @param combo_variables combo variables
|
|
|
|
|
#' @param parallel_processing
|
|
|
|
|
#' @param num_cores
|
|
|
|
|
#' @param date_type date type
|
|
|
|
|
#' @param num_hyperparameters number of hyperparameters
|
|
|
|
|
#' @param parallel_processing parallel processing
|
|
|
|
|
#' @param num_cores number of cores
|
|
|
|
|
#' @param seed seed number
|
|
|
|
|
#'
|
|
|
|
|
#' @return table
|
|
|
|
|
#' @return list with ensemble predictions and fitted models
|
|
|
|
|
#' @keywords internal
|
|
|
|
|
#' @export
|
|
|
|
|
ensemble_models <- function(tuning_tbl,
|
|
|
|
|
refit_tbl,
|
|
|
|
|
model_train_test_tbl,
|
|
|
|
|
date_type,
|
|
|
|
|
num_hyperparameters = 5,
|
|
|
|
|
parallel_processing = NULL,
|
|
|
|
|
num_cores = NULL,
|
|
|
|
|
seed = 123) {
|
|
|
|
|
|
|
|
|
|
# example data
|
|
|
|
|
#tuning_results %>% dplyr::select(Prediction) %>% tidyr::unnest(Prediction)
|
|
|
|
|
# get individual prediction data
|
|
|
|
|
initial_results_tbl <- tuning_tbl %>%
|
|
|
|
|
dplyr::select(Combo, Model, Recipe_ID, Prediction) %>%
|
|
|
|
|
dplyr::rename(Combo_Key = Combo) %>%
|
|
|
|
|
tidyr::unnest(Prediction) %>%
|
|
|
|
|
rbind(
|
|
|
|
|
refit_tbl %>%
|
|
|
|
|
dplyr::select(Combo, Model, Recipe_ID, Train_Test_ID, Prediction) %>%
|
|
|
|
|
dplyr::rename(Combo_Key = Combo) %>%
|
|
|
|
|
tidyr::unnest(Prediction)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
combo_iter_list <- unique(initial_results_tbl$Combo)
|
|
|
|
|
|
|
|
|
|
prep_ensemble_fn <- function(combo) {
|
|
|
|
|
|
|
|
|
|
initial_results_tbl %>%
|
|
|
|
|
dplyr::filter(Combo == combo) %>%
|
|
|
|
|
dplyr::mutate(Suffix = ifelse(Combo_Key == "All-Data", "Global", "Local")) %>%
|
|
|
|
|
tidyr::unite(col= "Model_Key",
|
|
|
|
|
c("Model", "Recipe_ID", "Suffix"),
|
|
|
|
|
sep="-",
|
|
|
|
|
remove=F) %>%
|
|
|
|
|
tidyr::pivot_wider(names_from = Model_Key, values_from = Forecast,
|
|
|
|
|
id_cols = c("Combo", "Date", "Train_Test_ID", "Target"), values_fill = 0)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
prep_ensemble_tbl <- submit_fn(tuning_tbl,
|
|
|
|
|
parallel_processing,
|
|
|
|
|
combo_iter_list,
|
|
|
|
|
prep_ensemble_fn,
|
|
|
|
|
num_cores,
|
|
|
|
|
package_exports = c("tibble", "dplyr", "timetk", "hts", "tidyselect", "stringr", "foreach",
|
|
|
|
|
'doParallel', 'parallel', "lubridate", 'parsnip', 'tune', 'dials', 'workflows',
|
|
|
|
|
'Cubist', 'earth', 'glmnet', 'kernlab', 'modeltime.gluonts', 'purrr',
|
|
|
|
|
'recipes', 'rules', 'modeltime'),
|
|
|
|
|
function_exports = NULL)
|
|
|
|
|
|
|
|
|
|
# ensemble models to run
|
|
|
|
|
refit_models <- unique(refit_tbl$Model)
|
|
|
|
|
|
|
|
|
@ -830,40 +879,349 @@ ensemble_models <- function(tuning_tbl,
|
|
|
|
|
if(length(ensemble_model_list) < 1) {
|
|
|
|
|
stop("no ensemble models chosen to run")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_workflow_tbl <- tibble::tibble()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for(model in ensemble_model_list) {
|
|
|
|
|
|
|
|
|
|
avail_arg_list <- list('train_data' = tibble::tibble(Combo = 1,
|
|
|
|
|
Date = as),
|
|
|
|
|
'model_type' = "ensemble",
|
|
|
|
|
|
|
|
|
|
avail_arg_list <- list('train_data' = prep_ensemble_tbl %>% dplyr::select(-Train_Test_ID),
|
|
|
|
|
'model_type' = "ensemble",
|
|
|
|
|
'pca' = FALSE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# get specific model spec
|
|
|
|
|
fn_to_invoke <- get(gsub('-', '_', model))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exp_arg_list <- formalArgs(fn_to_invoke)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
avail_names <- names(avail_arg_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inp_arg_list <- list()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for(x in avail_names){
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if(x %in% exp_arg_list){
|
|
|
|
|
inp_arg_list[x] <- avail_arg_list[x]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_workflow <- do.call(fn_to_invoke,inp_arg_list, quote=TRUE)
|
|
|
|
|
|
|
|
|
|
workflow_tbl <- tibble::tibble(Model_Name = model,
|
|
|
|
|
|
|
|
|
|
workflow_tbl <- tibble::tibble(Model_Name = model,
|
|
|
|
|
Model_Workflow = list(model_workflow))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_workflow_tbl <- rbind(model_workflow_tbl, workflow_tbl)
|
|
|
|
|
}
|
|
|
|
|
return(model_workflow_tbl)
|
|
|
|
|
|
|
|
|
|
# get hyperparameters
|
|
|
|
|
hyperparameters_tbl <- tibble::tibble()
|
|
|
|
|
|
|
|
|
|
for(x in model_workflow_tbl %>% dplyr::group_split(dplyr::row_number(), .keep = FALSE)) {
|
|
|
|
|
|
|
|
|
|
model <- x %>%
|
|
|
|
|
dplyr::pull(Model_Name)
|
|
|
|
|
|
|
|
|
|
temp_tbl <- model_workflow_tbl %>%
|
|
|
|
|
dplyr::filter(Model_Name == model)
|
|
|
|
|
|
|
|
|
|
model_workflow <- temp_tbl$Model_Workflow[[1]]
|
|
|
|
|
|
|
|
|
|
model_spec <- model_workflow %>%
|
|
|
|
|
workflows::extract_spec_parsnip()
|
|
|
|
|
|
|
|
|
|
recipe_features <- prep_ensemble_tbl
|
|
|
|
|
|
|
|
|
|
if(model=="svm-rbf") {
|
|
|
|
|
parameters <- model_spec %>%
|
|
|
|
|
dials::parameters()
|
|
|
|
|
} else {
|
|
|
|
|
parameters <- model_spec %>%
|
|
|
|
|
dials::parameters() %>%
|
|
|
|
|
dials::finalize(recipe_features, force = FALSE)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
grid <- dials::grid_latin_hypercube(parameters, size = num_hyperparameters)
|
|
|
|
|
|
|
|
|
|
hyperparameters_temp <- grid %>%
|
|
|
|
|
dplyr::group_split(dplyr::row_number(), .keep = FALSE) %>%
|
|
|
|
|
purrr::map_df(tidyr::nest, data=tidyselect::everything()) %>%
|
|
|
|
|
dplyr::rename(Hyperparameters = data) %>%
|
|
|
|
|
tibble::rowid_to_column("Hyperparameter_Combo") %>%
|
|
|
|
|
dplyr::mutate(Model = model)
|
|
|
|
|
|
|
|
|
|
hyperparameters_tbl <- rbind(hyperparameters_tbl, hyperparameters_temp)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# fit models by hyperparameter
|
|
|
|
|
hyperparmaeter_iter_list <- purrr::map(combo_iter_list, .f = function(x) {
|
|
|
|
|
model_train_test_tbl %>%
|
|
|
|
|
dplyr::mutate(Combo = x) %>%
|
|
|
|
|
dplyr::rename(Train_Test_ID = Run_ID) %>%
|
|
|
|
|
dplyr::filter(Run_Type == "Validation") %>%
|
|
|
|
|
dplyr::select(Combo, Train_Test_ID)
|
|
|
|
|
}) %>%
|
|
|
|
|
dplyr::bind_rows() %>%
|
|
|
|
|
dplyr::group_split(dplyr::row_number(), .keep = FALSE) %>%
|
|
|
|
|
purrr::map(.f = function(x) {
|
|
|
|
|
hyperparameters_tbl %>%
|
|
|
|
|
dplyr::select(Hyperparameter_Combo, Model) %>%
|
|
|
|
|
dplyr::rename(Hyperparameter_ID = Hyperparameter_Combo) %>%
|
|
|
|
|
dplyr::mutate(Combo = x$Combo,
|
|
|
|
|
Train_Test_ID = x$Train_Test_ID)
|
|
|
|
|
}) %>%
|
|
|
|
|
dplyr::bind_rows() %>%
|
|
|
|
|
dplyr::select(Combo, Model, Train_Test_ID, Hyperparameter_ID)
|
|
|
|
|
|
|
|
|
|
initial_tune_fn <- function(x) {
|
|
|
|
|
|
|
|
|
|
# run input values
|
|
|
|
|
param_combo <- x %>%
|
|
|
|
|
dplyr::pull(Hyperparameter_ID)
|
|
|
|
|
|
|
|
|
|
model <- x %>%
|
|
|
|
|
dplyr::pull(Model)
|
|
|
|
|
|
|
|
|
|
data_split <- x %>%
|
|
|
|
|
dplyr::pull(Train_Test_ID)
|
|
|
|
|
|
|
|
|
|
combo <- x %>%
|
|
|
|
|
dplyr::pull(Combo)
|
|
|
|
|
|
|
|
|
|
train_end_date <- model_train_test_tbl %>%
|
|
|
|
|
dplyr::filter(Run_ID == data_split) %>%
|
|
|
|
|
dplyr::pull(Train_End)
|
|
|
|
|
|
|
|
|
|
test_end_date <- model_train_test_tbl %>%
|
|
|
|
|
dplyr::filter(Run_ID == data_split) %>%
|
|
|
|
|
dplyr::pull(Test_End)
|
|
|
|
|
|
|
|
|
|
# get train/test data
|
|
|
|
|
full_data <- prep_ensemble_tbl %>%
|
|
|
|
|
dplyr::filter(Combo == combo) %>%
|
|
|
|
|
dplyr::mutate(Date_index.num = 0)
|
|
|
|
|
|
|
|
|
|
training <- full_data %>%
|
|
|
|
|
dplyr::filter(Date <= train_end_date) %>%
|
|
|
|
|
dplyr::select(-Train_Test_ID)
|
|
|
|
|
|
|
|
|
|
testing <- full_data %>%
|
|
|
|
|
dplyr::filter(Date > train_end_date,
|
|
|
|
|
Date <= test_end_date,
|
|
|
|
|
Train_Test_ID == data_split)
|
|
|
|
|
|
|
|
|
|
# get workflow
|
|
|
|
|
workflow <- model_workflow_tbl %>%
|
|
|
|
|
dplyr::filter(Model_Name == model)
|
|
|
|
|
|
|
|
|
|
workflow_final <- workflow$Model_Workflow[[1]]
|
|
|
|
|
|
|
|
|
|
# get hyperparameters
|
|
|
|
|
hyperparameters <- hyperparameters_tbl %>%
|
|
|
|
|
dplyr::filter(Model == model,
|
|
|
|
|
Hyperparameter_Combo == param_combo) %>%
|
|
|
|
|
dplyr::select(Hyperparameters) %>%
|
|
|
|
|
tidyr::unnest(Hyperparameters)
|
|
|
|
|
|
|
|
|
|
# fit model
|
|
|
|
|
set.seed(seed)
|
|
|
|
|
|
|
|
|
|
model_fit <- workflow_final %>%
|
|
|
|
|
tune::finalize_workflow(parameters = hyperparameters) %>%
|
|
|
|
|
generics::fit(data = training)
|
|
|
|
|
|
|
|
|
|
# create prediction
|
|
|
|
|
model_prediction <- testing %>%
|
|
|
|
|
dplyr::bind_cols(
|
|
|
|
|
predict(model_fit, new_data = testing)
|
|
|
|
|
) %>%
|
|
|
|
|
dplyr::select(Combo, Date, Target, .pred) %>%
|
|
|
|
|
dplyr::rename(Forecast = .pred)
|
|
|
|
|
|
|
|
|
|
# finalize output tbl
|
|
|
|
|
final_tbl <- tibble::tibble(
|
|
|
|
|
Combo = combo,
|
|
|
|
|
Model = model,
|
|
|
|
|
Train_Test_ID = data_split,
|
|
|
|
|
Hyperparameter_ID = param_combo,
|
|
|
|
|
Model_Fit = list(model_fit),
|
|
|
|
|
Prediction = list(model_prediction)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return(final_tbl)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
initial_tuning_tbl <- submit_fn(model_workflow_tbl,
|
|
|
|
|
parallel_processing,
|
|
|
|
|
hyperparmaeter_iter_list %>%
|
|
|
|
|
dplyr::group_split(dplyr::row_number(), .keep = FALSE),
|
|
|
|
|
initial_tune_fn,
|
|
|
|
|
num_cores,
|
|
|
|
|
package_exports = c("tibble", "dplyr", "timetk", "hts", "tidyselect", "stringr", "foreach",
|
|
|
|
|
'doParallel', 'parallel', "lubridate", 'parsnip', 'tune', 'dials', 'workflows',
|
|
|
|
|
'Cubist', 'earth', 'glmnet', 'kernlab', 'modeltime.gluonts', 'purrr',
|
|
|
|
|
'recipes', 'rules', 'modeltime'),
|
|
|
|
|
function_exports = NULL)
|
|
|
|
|
|
|
|
|
|
hyperparmaeter_iter_list2 <- hyperparmaeter_iter_list %>%
|
|
|
|
|
dplyr::select(Combo, Model) %>%
|
|
|
|
|
dplyr::distinct()
|
|
|
|
|
|
|
|
|
|
choose_hyperparameters_fn <- function(x) {
|
|
|
|
|
|
|
|
|
|
combo <- x %>%
|
|
|
|
|
dplyr::pull(Combo)
|
|
|
|
|
|
|
|
|
|
model <- x %>%
|
|
|
|
|
dplyr::pull(Model)
|
|
|
|
|
|
|
|
|
|
test_tbl <- initial_tuning_tbl %>%
|
|
|
|
|
dplyr::filter(Combo == combo,
|
|
|
|
|
Model == model) %>%
|
|
|
|
|
dplyr::select(Model, Hyperparameter_ID, Train_Test_ID, Prediction, Model_Fit)
|
|
|
|
|
|
|
|
|
|
best_param <- test_tbl %>%
|
|
|
|
|
dplyr::select(-Model_Fit) %>%
|
|
|
|
|
tidyr::unnest(Prediction) %>%
|
|
|
|
|
dplyr::mutate(Combo = combo) %>%
|
|
|
|
|
dplyr::group_by(Combo, Model, Hyperparameter_ID) %>%
|
|
|
|
|
yardstick::rmse(truth = Target,
|
|
|
|
|
estimate = Forecast,
|
|
|
|
|
na_rm = TRUE) %>%
|
|
|
|
|
dplyr::ungroup() %>%
|
|
|
|
|
dplyr::arrange(.estimate) %>%
|
|
|
|
|
dplyr::slice(1) %>%
|
|
|
|
|
dplyr::pull(Hyperparameter_ID)
|
|
|
|
|
|
|
|
|
|
best_model_fit <- test_tbl %>%
|
|
|
|
|
dplyr::filter(Hyperparameter_ID == best_param) %>%
|
|
|
|
|
dplyr::slice(1)
|
|
|
|
|
|
|
|
|
|
best_model_fit <- best_model_fit$Model_Fit[[1]]
|
|
|
|
|
|
|
|
|
|
final_predictions <- test_tbl %>%
|
|
|
|
|
dplyr::filter(Hyperparameter_ID == best_param) %>%
|
|
|
|
|
dplyr::select(-Model_Fit) %>%
|
|
|
|
|
tidyr::unnest(Prediction) %>%
|
|
|
|
|
dplyr::select(Combo, Date, Train_Test_ID, Target, Forecast)
|
|
|
|
|
|
|
|
|
|
return(tibble::tibble(Combo = combo,
|
|
|
|
|
Model = model,
|
|
|
|
|
Hyperparameter_ID = best_param,
|
|
|
|
|
Model_Fit = list(best_model_fit),
|
|
|
|
|
Prediction = list(final_predictions)))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final_tuning_tbl <- submit_fn(model_workflow_tbl,
|
|
|
|
|
parallel_processing,
|
|
|
|
|
hyperparmaeter_iter_list2 %>%
|
|
|
|
|
dplyr::group_split(dplyr::row_number(), .keep = FALSE),
|
|
|
|
|
choose_hyperparameters_fn,
|
|
|
|
|
num_cores,
|
|
|
|
|
package_exports = c("tibble", "dplyr", "timetk", "hts", "tidyselect", "stringr", "foreach",
|
|
|
|
|
'doParallel', 'parallel', "lubridate", 'parsnip', 'tune', 'dials', 'workflows',
|
|
|
|
|
'Cubist', 'earth', 'glmnet', 'kernlab', 'modeltime.gluonts', 'purrr',
|
|
|
|
|
'recipes', 'rules', 'modeltime', 'yardstick'),
|
|
|
|
|
function_exports = NULL)
|
|
|
|
|
|
|
|
|
|
# refit ensemble models
|
|
|
|
|
refit_iter_list <- model_train_test_tbl %>%
|
|
|
|
|
dplyr::filter(Run_Type %in% c("Future_Forecast", "Back_Test")) %>%
|
|
|
|
|
dplyr::group_split(dplyr::row_number(), .keep = FALSE) %>%
|
|
|
|
|
purrr::map(.f = function(x) {
|
|
|
|
|
final_tuning_tbl %>%
|
|
|
|
|
dplyr::mutate(Run_Type = x %>% dplyr::pull(Run_Type),
|
|
|
|
|
Run_ID = x %>% dplyr::pull(Run_ID),
|
|
|
|
|
Train_End = x %>% dplyr::pull(Train_End),
|
|
|
|
|
Test_End = x %>% dplyr::pull(Test_End)) %>%
|
|
|
|
|
dplyr::select(-Model_Fit, -Prediction)}) %>%
|
|
|
|
|
dplyr::bind_rows()
|
|
|
|
|
|
|
|
|
|
fit_model <- function(x) {
|
|
|
|
|
|
|
|
|
|
combo <- x %>%
|
|
|
|
|
dplyr::pull(Combo)
|
|
|
|
|
|
|
|
|
|
model <- x %>%
|
|
|
|
|
dplyr::pull(Model)
|
|
|
|
|
|
|
|
|
|
model_fit <- final_tuning_tbl %>%
|
|
|
|
|
dplyr::filter(Model == model,
|
|
|
|
|
Combo == combo)
|
|
|
|
|
|
|
|
|
|
model_fit <- model_fit$Model_Fit[[1]]
|
|
|
|
|
|
|
|
|
|
run_type <- x %>%
|
|
|
|
|
dplyr::pull(Run_Type)
|
|
|
|
|
|
|
|
|
|
run_id <- x %>%
|
|
|
|
|
dplyr::pull(Run_ID)
|
|
|
|
|
|
|
|
|
|
train_end <- x %>%
|
|
|
|
|
dplyr::pull(Train_End)
|
|
|
|
|
|
|
|
|
|
test_end <- x %>%
|
|
|
|
|
dplyr::pull(Test_End)
|
|
|
|
|
|
|
|
|
|
full_data <- prep_ensemble_tbl %>%
|
|
|
|
|
dplyr::filter(Combo == combo) %>%
|
|
|
|
|
dplyr::mutate(Date_index.num = 0)
|
|
|
|
|
|
|
|
|
|
training <- full_data %>%
|
|
|
|
|
dplyr::filter(Date <= train_end) %>%
|
|
|
|
|
dplyr::select(-Train_Test_ID)
|
|
|
|
|
|
|
|
|
|
testing <- full_data %>%
|
|
|
|
|
dplyr::filter(Date > train_end,
|
|
|
|
|
Date <= test_end,
|
|
|
|
|
Train_Test_ID == run_id)
|
|
|
|
|
|
|
|
|
|
# fit model
|
|
|
|
|
set.seed(seed)
|
|
|
|
|
|
|
|
|
|
model_fit <- model_fit %>%
|
|
|
|
|
generics::fit(data = training)
|
|
|
|
|
|
|
|
|
|
# create prediction
|
|
|
|
|
model_prediction <- testing %>%
|
|
|
|
|
dplyr::bind_cols(
|
|
|
|
|
predict(model_fit, new_data = testing)
|
|
|
|
|
) %>%
|
|
|
|
|
dplyr::select(Combo, Date, Target, .pred) %>%
|
|
|
|
|
dplyr::rename(Forecast = .pred)
|
|
|
|
|
|
|
|
|
|
# finalize output tbl
|
|
|
|
|
final_tbl <- tibble::tibble(
|
|
|
|
|
Combo = combo,
|
|
|
|
|
Model = model,
|
|
|
|
|
Recipe_ID = "Ensemble",
|
|
|
|
|
Train_Test_ID = run_id,
|
|
|
|
|
Model_Fit = list(model_fit),
|
|
|
|
|
Prediction = list(model_prediction)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return(final_tbl)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
model_refit_final_tbl <- submit_fn(final_tuning_tbl,
|
|
|
|
|
parallel_processing,
|
|
|
|
|
refit_iter_list %>%
|
|
|
|
|
dplyr::group_split(dplyr::row_number(), .keep = FALSE),
|
|
|
|
|
fit_model,
|
|
|
|
|
num_cores,
|
|
|
|
|
package_exports = c("tibble", "dplyr", "timetk", "hts", "tidyselect", "stringr", "foreach",
|
|
|
|
|
'doParallel', 'parallel', "lubridate", 'parsnip', 'tune', 'dials', 'workflows',
|
|
|
|
|
'Cubist', 'earth', 'glmnet', 'kernlab', 'modeltime.gluonts', 'purrr',
|
|
|
|
|
'recipes', 'rules', 'modeltime'),
|
|
|
|
|
function_exports = NULL)
|
|
|
|
|
|
|
|
|
|
#get final combined results and return final fitted models
|
|
|
|
|
final_model_fit_tbl <- model_refit_final_tbl %>%
|
|
|
|
|
dplyr::filter(Train_Test_ID == "01") %>%
|
|
|
|
|
dplyr::select(Combo, Model, Recipe_ID, Model_Fit)
|
|
|
|
|
|
|
|
|
|
final_ensemble_results_tbl <- model_refit_final_tbl %>%
|
|
|
|
|
dplyr::select(-Model_Fit)
|
|
|
|
|
|
|
|
|
|
return(list(Model_Predictions = final_ensemble_results_tbl, Model_Fit = final_model_fit_tbl))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|