code formatting

2024-07-25 09:46:12 -07:00 · 2024-07-25 09:46:12 -07:00 · bc88b7b512
--- a/R/ensemble_models.R
+++ b/R/ensemble_models.R
@ -181,27 +181,29 @@ ensemble_models <- function(run_info,
      # model forecasts
      single_model_tbl <- NULL
      if (run_local_models) {
-        suppressWarnings(try(single_model_tbl <- read_file(run_info,
-          path = paste0(
-            "/forecasts/", hash_data(run_info$experiment_name), "-", hash_data(run_info$run_name),
-            "-", combo, "-single_models.", run_info$data_output
+        suppressWarnings(try(
+          single_model_tbl <- read_file(run_info,
+            path = paste0(
+              "/forecasts/", hash_data(run_info$experiment_name), "-", hash_data(run_info$run_name),
+              "-", combo, "-single_models.", run_info$data_output
+            ),
+            return_type = "df"
          ),
-          return_type = "df"
-        ),
-        silent = TRUE
+          silent = TRUE
        ))
      }

      global_model_tbl <- NULL
      if (run_global_models) {
-        suppressWarnings(try(global_model_tbl <- read_file(run_info,
-          path = paste0(
-            "/forecasts/", hash_data(run_info$experiment_name), "-", hash_data(run_info$run_name),
-            "-", combo, "-global_models.", run_info$data_output
+        suppressWarnings(try(
+          global_model_tbl <- read_file(run_info,
+            path = paste0(
+              "/forecasts/", hash_data(run_info$experiment_name), "-", hash_data(run_info$run_name),
+              "-", combo, "-global_models.", run_info$data_output
+            ),
+            return_type = "df"
          ),
-          return_type = "df"
-        ),
-        silent = TRUE
+          silent = TRUE
        ))
      }

@ -336,7 +338,6 @@ ensemble_models <- function(run_info,
        .multicombine = TRUE,
        .noexport = NULL
      ) %do% {
-
        # get initial run info
        model <- model_run %>%
          dplyr::pull(Model_Name)
--- a/R/feature_selection.R
+++ b/R/feature_selection.R
@ -23,10 +23,8 @@ run_feature_selection <- function(input_data,
                                  forecast_horizon,
                                  external_regressors,
                                  multistep_horizon = FALSE) {
-
  # check for more than one unique target value
  if (input_data %>% tidyr::drop_na(Target) %>% dplyr::pull(Target) %>% unique() %>% length() < 2) {
-
    # just return the date features
    fs_list <- input_data %>%
      dplyr::select(tidyselect::contains("Date"))
@ -83,7 +81,6 @@ run_feature_selection <- function(input_data,

    # run feature selection
    if (date_type %in% c("day", "week")) {
-
      # number of votes needed for feature to be selected
      votes_needed <- 3

@ -410,7 +407,6 @@ lofo_fn <- function(run_info,
                    parallel_processing,
                    pca = FALSE,
                    seed = 123) {
-
  # parallel run info
  par_info <- par_start(
    run_info = run_info,
--- a/R/final_models.R
+++ b/R/final_models.R
@ -141,7 +141,6 @@ final_models <- function(run_info,
  run_ensemble_models <- prev_log_df$run_ensemble_models

  if (sum(colnames(prev_log_df) %in% "weighted_mape")) {
-
    # check if input values have changed
    current_log_df <- tibble::tibble(
      average_models = average_models,
@ -294,7 +293,6 @@ final_models <- function(run_info,

      # simple model averaging
      if (average_models & length(final_model_list) > 1) {
-
        # create model combinations list
        model_combinations <- tibble::tibble()

@ -338,7 +336,6 @@ final_models <- function(run_info,
          .noexport = NULL
        ) %op%
          {
-
            # get list of models to average
            model_list <- strsplit(x, "_")[[1]]

@ -362,9 +359,9 @@ final_models <- function(run_info,
      } else {
        averages_tbl <- NULL
      }
-      
+
      # choose best average model
-      if(!is.null(averages_tbl)) {
+      if (!is.null(averages_tbl)) {
        avg_back_test_mape <- averages_tbl %>%
          dplyr::mutate(
            Train_Test_ID = as.numeric(Train_Test_ID),
@ -372,7 +369,7 @@ final_models <- function(run_info,
          ) %>%
          dplyr::filter(Train_Test_ID != 1) %>%
          dplyr::mutate(MAPE = round(abs((Forecast - Target) / Target), digits = 4))
-        
+
        avg_best_model_mape <- avg_back_test_mape %>%
          dplyr::group_by(Model_ID, Combo) %>%
          dplyr::mutate(
@ -385,7 +382,7 @@ final_models <- function(run_info,
          dplyr::group_by(Combo) %>%
          dplyr::slice(1) %>%
          dplyr::ungroup()
-        
+
        avg_best_model_tbl <- avg_best_model_mape %>%
          dplyr::select(Combo, Model_ID)
      }
@ -525,11 +522,11 @@ final_models <- function(run_info,
            by = "Model_ID"
          ) %>%
          dplyr::mutate(Best_Model = ifelse(!is.na(Best_Model), "Yes", "No"))
-        
-        if(!is.null(averages_tbl)) {
+
+        if (!is.null(averages_tbl)) {
          avg_model_final_tbl <- averages_tbl %>%
            dplyr::right_join(avg_best_model_tbl,
-                              by = c("Combo", "Model_ID")
+              by = c("Combo", "Model_ID")
            ) %>%
            dplyr::mutate(
              Combo_ID = Combo,
@ -619,14 +616,16 @@ final_models <- function(run_info,

  # clean up any parallel run process
  par_end(cl)
-  
+
  # condense outputs into less files for larger runs
-  if(length(combo_list) > 10000) {
+  if (length(combo_list) > 10000) {
    cli::cli_progress_step("Condensing Forecasts")
-    
-    condense_data(run_info, 
-                  parallel_processing,
-                  num_cores)
+
+    condense_data(
+      run_info,
+      parallel_processing,
+      num_cores
+    )
  }

  # reconcile hierarchical forecasts
@ -641,17 +640,21 @@ final_models <- function(run_info,
      num_cores
    )
  }
-  
+
  # calculate weighted mape
  weighted_mape <- get_forecast_data(run_info = run_info) %>%
-    dplyr::filter(Run_Type == "Back_Test", 
-                  Best_Model == "Yes") %>%
+    dplyr::filter(
+      Run_Type == "Back_Test",
+      Best_Model == "Yes"
+    ) %>%
    dplyr::mutate(
      Target = ifelse(Target == 0, 0.1, Target)
    ) %>%
-    dplyr::mutate(MAPE = round(abs((Forecast - Target) / Target), digits = 4), 
-                  Total = sum(Target, na.rm = TRUE), 
-                  Weight = (MAPE*Target)/Total) %>%
+    dplyr::mutate(
+      MAPE = round(abs((Forecast - Target) / Target), digits = 4),
+      Total = sum(Target, na.rm = TRUE),
+      Weight = (MAPE * Target) / Total
+    ) %>%
    dplyr::pull(Weight) %>%
    sum() %>%
    round(digits = 4)
--- a/R/forecast_time_series.R
+++ b/R/forecast_time_series.R
@ -323,7 +323,6 @@ forecast_backwards_compatibility <- function(run_info,
      dplyr::select(Combo, Model, Best_Model) %>%
      dplyr::distinct()
  } else {
-
    # read in unreconciled results
    best_model_tbl <- read_file(run_info,
      path = paste0(
--- a/R/hierarchy.R
+++ b/R/hierarchy.R
@ -82,7 +82,6 @@ prep_hierarchical_data <- function(input_data,
        hierarchical_tbl <- hierarchical_tbl %>%
          dplyr::left_join(temp_tbl, by = c("Date"))
      } else if (value_level != "All") {
-
        # agg by lowest level
        bottom_tbl <- input_data_adj %>%
          tidyr::unite("Combo",
@ -400,7 +399,6 @@ reconcile_hierarchical_data <- function(run_info,
                                        forecast_approach,
                                        negative_forecast = FALSE,
                                        num_cores) {
-
  # get run splits
  model_train_test_tbl <- read_file(run_info,
    path = paste0(
@ -420,15 +418,15 @@ reconcile_hierarchical_data <- function(run_info,
  hts_nodes <- hts_list$nodes
  original_combo_list <- hts_list$original_combos
  hts_combo_list <- hts_list$hts_combos
-  
+
  # check if data has been condensed
  cond_path <- paste0(
    run_info$path, "/forecasts/*", hash_data(run_info$experiment_name), "-",
    hash_data(run_info$run_name), "*condensed", ".", run_info$data_output
  )
-  
+
  condensed_files <- list_files(run_info$storage_object, fs::path(cond_path))
-  
+
  if (length(condensed_files) > 0) {
    condensed <- TRUE
  } else {
@ -444,7 +442,7 @@ reconcile_hierarchical_data <- function(run_info,
    return_type <- "df"
  }

-  if(condensed) {
+  if (condensed) {
    fcst_path <- paste0(
      "/forecasts/*", hash_data(run_info$experiment_name), "-",
      hash_data(run_info$run_name), "*condensed", ".", run_info$data_output
@ -453,7 +451,7 @@ reconcile_hierarchical_data <- function(run_info,
    fcst_path <- paste0(
      "/forecasts/*", hash_data(run_info$experiment_name), "-",
      hash_data(run_info$run_name), "*models", ".", run_info$data_output
-    ) 
+    )
  }

  unreconciled_tbl <- read_file(run_info,
@ -889,7 +887,6 @@ reconcile_hierarchical_data <- function(run_info,
 external_regressor_mapping <- function(data,
                                       combo_variables,
                                       external_regressors) {
-
  # create var combinations list
  var_combinations <- tibble::tibble()

@ -918,7 +915,6 @@ external_regressor_mapping <- function(data,
    .multicombine = TRUE,
    .noexport = NULL
  ) %do% {
-
    # get unique values of regressor per combo variable iteration
    var_unique_tbl <- foreach::foreach(
      var = iter_list,
@ -1000,7 +996,6 @@ sum_hts_data <- function(bottom_level_tbl,
                         forecast_approach,
                         frequency_number,
                         return_type = "data") {
-
  # create aggregations for target variable
  Date <- bottom_level_tbl$Date

--- a/R/input_checks.R
+++ b/R/input_checks.R
@ -1,4 +1,3 @@
-
 #' Check input values
 #'
 #' @param input_name input name
@ -13,21 +12,23 @@ check_input_type <- function(input_name,
                             type,
                             expected_value = NULL) {
  if (!inherits(input_value, type)) {
-    stop(paste0(
-      "invalid type for input name '", input_name, "', needs to be of type ",
-      glue::glue_collapse(type, " or ")
-    ),
-    call. = FALSE
+    stop(
+      paste0(
+        "invalid type for input name '", input_name, "', needs to be of type ",
+        glue::glue_collapse(type, " or ")
+      ),
+      call. = FALSE
    )
  }

  if (!is.null(expected_value) & !is.null(input_value)) {
    if (!sum(input_value %in% expected_value)) {
-      stop(paste0(
-        "invalid value for input name '", input_name, "', value needs to equal ",
-        glue::glue_collapse(expected_value, " or ")
-      ),
-      call. = FALSE
+      stop(
+        paste0(
+          "invalid value for input name '", input_name, "', value needs to equal ",
+          glue::glue_collapse(expected_value, " or ")
+        ),
+        call. = FALSE
      )
    }
  }
@ -52,7 +53,6 @@ check_input_data <- function(input_data,
                             date_type,
                             fiscal_year_start,
                             parallel_processing) {
-
  # data combo names match the input data
  if (sum(combo_variables %in% colnames(input_data)) != length(combo_variables)) {
    stop("combo variables do not match column headers in input data")
@ -103,7 +103,6 @@ check_input_data <- function(input_data,

  # input_data is correct type for parallel processing
  if (inherits(input_data, c("data.frame", "tbl")) & is.null(parallel_processing)) {
-
    # do nothing
  } else if (inherits(input_data, "tbl_spark") & is.null(parallel_processing)) {
    stop("spark data frames should run with spark parallel processing",
@ -148,7 +147,6 @@ check_input_data <- function(input_data,
 check_parallel_processing <- function(run_info,
                                      parallel_processing,
                                      inner_parallel = FALSE) {
-
  # parallel processing formatting
  if (is.null(parallel_processing)) {
    return()
--- a/R/models.R
+++ b/R/models.R
@ -701,7 +701,6 @@ glmnet <- function(train_data,
                   horizon,
                   external_regressors,
                   frequency) {
-
  # create model recipe and spec
  if (multistep) {
    recipe_spec_glmnet <- train_data %>%
@ -1328,7 +1327,6 @@ xgboost <- function(train_data,
                    horizon,
                    external_regressors,
                    frequency) {
-
  # create model recipe and spec
  if (multistep) {
    recipe_spec_xgboost <- train_data %>%
--- a/R/multistep_cubist.R
+++ b/R/multistep_cubist.R
@ -1,4 +1,3 @@
-
 # CUBIST Multistep ----

 #' Initialize custom cubist parsnip model
@ -298,7 +297,6 @@ cubist_multistep_fit_impl <- function(x, y,
                                      external_regressors = NULL,
                                      forecast_horizon = NULL,
                                      selected_features = NULL) {
-
  # X & Y
  # Expect outcomes  = vector
  # Expect predictor = data.frame
@ -321,7 +319,6 @@ cubist_multistep_fit_impl <- function(x, y,
  model_predictions <- list()

  for (lag in get_multi_lags(lag_periods, forecast_horizon)) {
-
    # get final features based on lag
    xreg_tbl_final <- multi_feature_selection(
      xreg_tbl,
@ -438,7 +435,6 @@ predict.cubist_multistep_fit_impl <- function(object, new_data, ...) {
 #' @keywords internal
 #' @export
 cubist_multistep_predict_impl <- function(object, new_data, ...) {
-
  # Date Mapping Table
  date_tbl <- new_data %>%
    dplyr::select(Date, Date_index.num) %>%
--- a/R/multistep_glmnet.R
+++ b/R/multistep_glmnet.R
@ -1,4 +1,3 @@
-
 # GLMNET Multistep ----

 #' Initialize custom glmnet parsnip model
@ -282,7 +281,6 @@ glmnet_multistep_fit_impl <- function(x, y,
                                      external_regressors = NULL,
                                      forecast_horizon = NULL,
                                      selected_features = NULL) {
-
  # X & Y
  # Expect outcomes  = vector
  # Expect predictor = data.frame
@ -311,7 +309,6 @@ glmnet_multistep_fit_impl <- function(x, y,
    parsnip::set_engine("glmnet")

  for (lag in get_multi_lags(lag_periods, forecast_horizon)) {
-
    # get final features based on lag
    xreg_tbl_final <- multi_feature_selection(
      xreg_tbl,
@ -426,7 +423,6 @@ predict.glmnet_multistep_fit_impl <- function(object, new_data, ...) {
 #' @keywords internal
 #' @export
 glmnet_multistep_predict_impl <- function(object, new_data, ...) {
-
  # Date Mapping Table
  date_tbl <- new_data %>%
    dplyr::select(Date, Date_index.num) %>%
--- a/R/multistep_helper.R
+++ b/R/multistep_helper.R
@ -1,4 +1,3 @@
-
 # Helper Functions ----

 #' Return xregs that contain future values for multistep horizon forecast
--- a/R/multistep_mars.R
+++ b/R/multistep_mars.R
@ -1,4 +1,3 @@
-
 # MARS Multistep ----

 #' Initialize custom mars parsnip model
@ -303,7 +302,6 @@ mars_multistep_fit_impl <- function(x, y,
                                    external_regressors = NULL,
                                    forecast_horizon = NULL,
                                    selected_features = NULL) {
-
  # X & Y
  # Expect outcomes  = vector
  # Expect predictor = data.frame
@ -334,7 +332,6 @@ mars_multistep_fit_impl <- function(x, y,
    parsnip::set_engine("earth")

  for (lag in get_multi_lags(lag_periods, forecast_horizon)) {
-
    # get final features based on lag
    xreg_tbl_final <- multi_feature_selection(
      xreg_tbl,
@ -449,7 +446,6 @@ predict.mars_multistep_fit_impl <- function(object, new_data, ...) {
 #' @keywords internal
 #' @export
 mars_multistep_predict_impl <- function(object, new_data, ...) {
-
  # Date Mapping Table
  date_tbl <- new_data %>%
    dplyr::select(Date, Date_index.num) %>%
--- a/R/multistep_svm_poly.R
+++ b/R/multistep_svm_poly.R
@ -1,4 +1,3 @@
-
 # SVM-POLY Multistep ----

 #' Initialize custom svm-poly parsnip model
@ -325,7 +324,6 @@ svm_poly_multistep_fit_impl <- function(x, y,
                                        external_regressors = NULL,
                                        forecast_horizon = NULL,
                                        selected_features = NULL) {
-
  # X & Y
  # Expect outcomes  = vector
  # Expect predictor = data.frame
@ -357,7 +355,6 @@ svm_poly_multistep_fit_impl <- function(x, y,
    parsnip::set_engine("kernlab")

  for (lag in get_multi_lags(lag_periods, forecast_horizon)) {
-
    # get final features based on lag
    xreg_tbl_final <- multi_feature_selection(
      xreg_tbl,
@ -475,7 +472,6 @@ predict.svm_poly_multistep_fit_impl <- function(object, new_data, ...) {
 #' @keywords internal
 #' @export
 svm_poly_multistep_predict_impl <- function(object, new_data, ...) {
-
  # Date Mapping Table
  date_tbl <- new_data %>%
    dplyr::select(Date, Date_index.num) %>%
--- a/R/multistep_svm_rbf.R
+++ b/R/multistep_svm_rbf.R
@ -1,4 +1,3 @@
-
 # SVM-RBF Multistep ----

 #' Initialize custom svm-rbf parsnip model
@ -306,7 +305,6 @@ svm_rbf_multistep_fit_impl <- function(x, y,
                                       external_regressors = NULL,
                                       forecast_horizon = NULL,
                                       selected_features = NULL) {
-
  # X & Y
  # Expect outcomes  = vector
  # Expect predictor = data.frame
@ -337,7 +335,6 @@ svm_rbf_multistep_fit_impl <- function(x, y,
    parsnip::set_engine("kernlab")

  for (lag in get_multi_lags(lag_periods, forecast_horizon)) {
-
    # get final features based on lag
    xreg_tbl_final <- multi_feature_selection(
      xreg_tbl,
@ -455,7 +452,6 @@ predict.svm_rbf_multistep_fit_impl <- function(object, new_data, ...) {
 #' @keywords internal
 #' @export
 svm_rbf_multistep_predict_impl <- function(object, new_data, ...) {
-
  # Date Mapping Table
  date_tbl <- new_data %>%
    dplyr::select(Date, Date_index.num) %>%
--- a/R/multistep_xgboost.R
+++ b/R/multistep_xgboost.R
@ -1,4 +1,3 @@
-
 # XGBOOST Multistep ----

 #' Initialize custom xgboost parsnip model
@ -389,7 +388,6 @@ xgboost_multistep_fit_impl <- function(x, y,
                                       forecast_horizon = NULL,
                                       selected_features = NULL,
                                       ...) {
-
  # X & Y
  # Expect outcomes  = vector
  # Expect predictor = data.frame
@ -412,7 +410,6 @@ xgboost_multistep_fit_impl <- function(x, y,
  model_predictions <- list()

  for (lag in get_multi_lags(lag_periods, forecast_horizon)) {
-
    # get final features based on lag
    xreg_tbl_final <- multi_feature_selection(
      xreg_tbl,
@ -437,7 +434,7 @@ xgboost_multistep_fit_impl <- function(x, y,
      y = outcome,
      max_depth = max_depth,
      nrounds = nrounds,
-      eta  = eta,
+      eta = eta,
      colsample_bytree = colsample_bytree,
      colsample_bynode = colsample_bynode,
      min_child_weight = min_child_weight,
@ -537,7 +534,6 @@ predict.xgboost_multistep_fit_impl <- function(object, new_data, ...) {
 #' @keywords internal
 #' @export
 xgboost_multistep_predict_impl <- function(object, new_data, ...) {
-
  # Date Mapping Table
  date_tbl <- new_data %>%
    dplyr::select(Date, Date_index.num) %>%
--- a/R/prep_data.R
+++ b/R/prep_data.R
@ -227,7 +227,6 @@ prep_data <- function(run_info,
    dplyr::filter(Combo %in% current_combo_list_final)

  if (length(combo_diff) == 0 & length(prev_combo_list) > 0) {
-
    # check if input values have changed
    current_log_df <- tibble::tibble(
      combo_variables = paste(combo_variables, collapse = "---"),
@ -466,204 +465,205 @@ prep_data <- function(run_info,
  } else if (parallel_processing == "spark") {
    final_data <- filtered_initial_prep_tbl %>%
      adjust_df(return_type = "sdf") %>%
-      sparklyr::spark_apply(function(df, context) {
-        # update objects
-        fn_env <- .GlobalEnv
+      sparklyr::spark_apply(
+        function(df, context) {
+          # update objects
+          fn_env <- .GlobalEnv

-        for (name in names(context)) {
-          assign(name, context[[name]], envir = fn_env)
-        }
+          for (name in names(context)) {
+            assign(name, context[[name]], envir = fn_env)
+          }

-        # get specific time series
-        combo <- unique(df$Combo)
+          # get specific time series
+          combo <- unique(df$Combo)

-        return_tbl <- tibble::tibble(
-          Combo = combo,
-          Combo_Hash = hash_data(combo)
-        )
-
-        # handle external regressors
-        xregs_future_tbl <- get_xregs_future_values_tbl(
-          df,
-          external_regressors,
-          hist_end_date
-        )
-
-        if (length(colnames(xregs_future_tbl)) > 2) {
-          xregs_future_list <- xregs_future_tbl %>%
-            dplyr::select(-Date, -Combo) %>%
-            colnames()
-        } else {
-          xregs_future_list <- NULL
-        }
-
-        # initial data prep
-        initial_tbl <- df %>%
-          dplyr::filter(Combo == combo) %>%
-          dplyr::select(
-            Combo,
-            Date,
-            Target,
-            tidyselect::all_of(external_regressors)
-          ) %>%
-          dplyr::group_by(Combo) %>%
-          timetk::pad_by_time(Date,
-            .by = date_type,
-            .pad_value = ifelse(clean_missing_values, NA, 0),
-            .end_date = hist_end_date
-          ) %>% # fill in missing values in between existing data points
-          timetk::pad_by_time(Date,
-            .by = date_type,
-            .pad_value = 0,
-            .start_date = hist_start_date,
-            .end_date = hist_end_date
-          ) %>% # fill in missing values at beginning of time series with zero
-          timetk::future_frame(Date,
-            .length_out = forecast_horizon,
-            .bind_data = TRUE
-          ) %>% # add future data
-          dplyr::ungroup() %>%
-          dplyr::left_join(xregs_future_tbl,
-            by = c("Combo", "Date")
-          ) %>% # join xregs that contain values given by user
-          clean_outliers_missing_values(
-            clean_outliers,
-            clean_missing_values,
-            get_frequency_number(date_type),
-            external_regressors
-          ) %>% # clean outliers and missing values
-          dplyr::mutate_if(is.numeric, list(~ replace(., is.infinite(.), NA))) %>% # replace infinite values
-          dplyr::mutate_if(is.numeric, list(~ replace(., is.nan(.), NA))) %>% # replace NaN values
-          dplyr::mutate_if(is.numeric, list(~ replace(., is.na(.), 0))) %>% # replace NA values
-          dplyr::mutate(Target = ifelse(Date > hist_end_date,
-            NA,
-            Target
-          ))
-
-        # box-cox transformation
-        if (box_cox) {
-          box_cox_tbl <- initial_tbl %>%
-            apply_box_cox()
-
-          initial_tbl <- box_cox_tbl$data
-
-          return_tbl <- return_tbl %>%
-            dplyr::left_join(box_cox_tbl$diff_info, by = "Combo")
-        }
-
-        # make stationary
-        if (stationary) {
-          stationary_tbl <- initial_tbl %>%
-            make_stationary()
-
-          initial_tbl <- stationary_tbl$data
-
-          return_tbl <- return_tbl %>%
-            dplyr::left_join(stationary_tbl$diff_info, by = "Combo")
-        }
-
-        # create date features
-        date_features <- initial_tbl %>%
-          dplyr::select(Date) %>%
-          dplyr::mutate(
-            Date_Adj = Date %m+% months(fiscal_year_start - 1),
-            Date_day_month_end = ifelse(lubridate::day(Date_Adj) == lubridate::days_in_month(Date_Adj), 1, 0)
-          ) %>%
-          timetk::tk_augment_timeseries_signature(Date_Adj) %>%
-          dplyr::select(!tidyselect::matches(get_date_regex(date_type)), -Date_Adj, -Date)
-
-        names(date_features) <- stringr::str_c("Date_", names(date_features))
-
-        initial_tbl <- initial_tbl %>%
-          cbind(date_features)
-
-        # Run Recipes
-        if (is.null(recipes_to_run)) {
-          run_all_recipes_override <- FALSE
-        } else if (recipes_to_run == "all") {
-          run_all_recipes_override <- TRUE
-        } else {
-          run_all_recipes_override <- FALSE
-        }
-
-        if (is.null(recipes_to_run) | "R1" %in% recipes_to_run | run_all_recipes_override) {
-          R1 <- initial_tbl %>%
-            multivariate_prep_recipe_1(external_regressors,
-              xregs_future_values_list = xregs_future_list,
-              get_fourier_periods(fourier_periods, date_type),
-              get_lag_periods(lag_periods, date_type, forecast_horizon, multistep_horizon, TRUE),
-              get_rolling_window_periods(rolling_window_periods, date_type)
-            ) %>%
-            dplyr::mutate(Target = base::ifelse(Date > hist_end_date, NA, Target))
-
-          write_data(
-            x = R1,
-            combo = combo,
-            run_info = run_info,
-            output_type = "data",
-            folder = "prep_data",
-            suffix = "-R1"
+          return_tbl <- tibble::tibble(
+            Combo = combo,
+            Combo_Hash = hash_data(combo)
          )
-        }

-        if ((is.null(recipes_to_run) & date_type %in% c("month", "quarter", "year")) | "R2" %in% recipes_to_run | run_all_recipes_override) {
-          R2 <- initial_tbl %>%
-            multivariate_prep_recipe_2(external_regressors,
-              xregs_future_values_list = xregs_future_list,
-              get_fourier_periods(fourier_periods, date_type),
-              get_lag_periods(lag_periods, date_type, forecast_horizon),
-              get_rolling_window_periods(rolling_window_periods, date_type),
-              date_type,
-              forecast_horizon
-            ) %>%
-            dplyr::mutate(Target = base::ifelse(Date > hist_end_date, NA, Target))
-
-          write_data(
-            x = R2,
-            combo = combo,
-            run_info = run_info,
-            output_type = "data",
-            folder = "prep_data",
-            suffix = "-R2"
+          # handle external regressors
+          xregs_future_tbl <- get_xregs_future_values_tbl(
+            df,
+            external_regressors,
+            hist_end_date
          )
-        }

-        return(data.frame(return_tbl))
-      },
-      group_by = "Combo",
-      context = list(
-        get_xregs_future_values_tbl = get_xregs_future_values_tbl,
-        external_regressors = external_regressors,
-        clean_missing_values = clean_missing_values,
-        clean_outliers_missing_values = clean_outliers_missing_values,
-        hash_data = hash_data,
-        hist_end_date = hist_end_date,
-        hist_start_date = hist_start_date,
-        forecast_approach = forecast_approach,
-        forecast_horizon = forecast_horizon,
-        clean_outliers = clean_outliers,
-        get_frequency_number = get_frequency_number,
-        date_type = date_type,
-        fiscal_year_start = fiscal_year_start,
-        get_date_regex = get_date_regex,
-        recipes_to_run = recipes_to_run,
-        multivariate_prep_recipe_1 = multivariate_prep_recipe_1,
-        multivariate_prep_recipe_2 = multivariate_prep_recipe_2,
-        run_info = run_info,
-        get_fourier_periods = get_fourier_periods,
-        fourier_periods = fourier_periods,
-        get_lag_periods = get_lag_periods,
-        lag_periods = lag_periods,
-        get_rolling_window_periods = get_rolling_window_periods,
-        rolling_window_periods = rolling_window_periods,
-        write_data = write_data,
-        write_data_folder = write_data_folder,
-        write_data_type = write_data_type,
-        box_cox = box_cox,
-        stationary = stationary,
-        make_stationary = make_stationary,
-        apply_box_cox = apply_box_cox
-      )
+          if (length(colnames(xregs_future_tbl)) > 2) {
+            xregs_future_list <- xregs_future_tbl %>%
+              dplyr::select(-Date, -Combo) %>%
+              colnames()
+          } else {
+            xregs_future_list <- NULL
+          }
+
+          # initial data prep
+          initial_tbl <- df %>%
+            dplyr::filter(Combo == combo) %>%
+            dplyr::select(
+              Combo,
+              Date,
+              Target,
+              tidyselect::all_of(external_regressors)
+            ) %>%
+            dplyr::group_by(Combo) %>%
+            timetk::pad_by_time(Date,
+              .by = date_type,
+              .pad_value = ifelse(clean_missing_values, NA, 0),
+              .end_date = hist_end_date
+            ) %>% # fill in missing values in between existing data points
+            timetk::pad_by_time(Date,
+              .by = date_type,
+              .pad_value = 0,
+              .start_date = hist_start_date,
+              .end_date = hist_end_date
+            ) %>% # fill in missing values at beginning of time series with zero
+            timetk::future_frame(Date,
+              .length_out = forecast_horizon,
+              .bind_data = TRUE
+            ) %>% # add future data
+            dplyr::ungroup() %>%
+            dplyr::left_join(xregs_future_tbl,
+              by = c("Combo", "Date")
+            ) %>% # join xregs that contain values given by user
+            clean_outliers_missing_values(
+              clean_outliers,
+              clean_missing_values,
+              get_frequency_number(date_type),
+              external_regressors
+            ) %>% # clean outliers and missing values
+            dplyr::mutate_if(is.numeric, list(~ replace(., is.infinite(.), NA))) %>% # replace infinite values
+            dplyr::mutate_if(is.numeric, list(~ replace(., is.nan(.), NA))) %>% # replace NaN values
+            dplyr::mutate_if(is.numeric, list(~ replace(., is.na(.), 0))) %>% # replace NA values
+            dplyr::mutate(Target = ifelse(Date > hist_end_date,
+              NA,
+              Target
+            ))
+
+          # box-cox transformation
+          if (box_cox) {
+            box_cox_tbl <- initial_tbl %>%
+              apply_box_cox()
+
+            initial_tbl <- box_cox_tbl$data
+
+            return_tbl <- return_tbl %>%
+              dplyr::left_join(box_cox_tbl$diff_info, by = "Combo")
+          }
+
+          # make stationary
+          if (stationary) {
+            stationary_tbl <- initial_tbl %>%
+              make_stationary()
+
+            initial_tbl <- stationary_tbl$data
+
+            return_tbl <- return_tbl %>%
+              dplyr::left_join(stationary_tbl$diff_info, by = "Combo")
+          }
+
+          # create date features
+          date_features <- initial_tbl %>%
+            dplyr::select(Date) %>%
+            dplyr::mutate(
+              Date_Adj = Date %m+% months(fiscal_year_start - 1),
+              Date_day_month_end = ifelse(lubridate::day(Date_Adj) == lubridate::days_in_month(Date_Adj), 1, 0)
+            ) %>%
+            timetk::tk_augment_timeseries_signature(Date_Adj) %>%
+            dplyr::select(!tidyselect::matches(get_date_regex(date_type)), -Date_Adj, -Date)
+
+          names(date_features) <- stringr::str_c("Date_", names(date_features))
+
+          initial_tbl <- initial_tbl %>%
+            cbind(date_features)
+
+          # Run Recipes
+          if (is.null(recipes_to_run)) {
+            run_all_recipes_override <- FALSE
+          } else if (recipes_to_run == "all") {
+            run_all_recipes_override <- TRUE
+          } else {
+            run_all_recipes_override <- FALSE
+          }
+
+          if (is.null(recipes_to_run) | "R1" %in% recipes_to_run | run_all_recipes_override) {
+            R1 <- initial_tbl %>%
+              multivariate_prep_recipe_1(external_regressors,
+                xregs_future_values_list = xregs_future_list,
+                get_fourier_periods(fourier_periods, date_type),
+                get_lag_periods(lag_periods, date_type, forecast_horizon, multistep_horizon, TRUE),
+                get_rolling_window_periods(rolling_window_periods, date_type)
+              ) %>%
+              dplyr::mutate(Target = base::ifelse(Date > hist_end_date, NA, Target))
+
+            write_data(
+              x = R1,
+              combo = combo,
+              run_info = run_info,
+              output_type = "data",
+              folder = "prep_data",
+              suffix = "-R1"
+            )
+          }
+
+          if ((is.null(recipes_to_run) & date_type %in% c("month", "quarter", "year")) | "R2" %in% recipes_to_run | run_all_recipes_override) {
+            R2 <- initial_tbl %>%
+              multivariate_prep_recipe_2(external_regressors,
+                xregs_future_values_list = xregs_future_list,
+                get_fourier_periods(fourier_periods, date_type),
+                get_lag_periods(lag_periods, date_type, forecast_horizon),
+                get_rolling_window_periods(rolling_window_periods, date_type),
+                date_type,
+                forecast_horizon
+              ) %>%
+              dplyr::mutate(Target = base::ifelse(Date > hist_end_date, NA, Target))
+
+            write_data(
+              x = R2,
+              combo = combo,
+              run_info = run_info,
+              output_type = "data",
+              folder = "prep_data",
+              suffix = "-R2"
+            )
+          }
+
+          return(data.frame(return_tbl))
+        },
+        group_by = "Combo",
+        context = list(
+          get_xregs_future_values_tbl = get_xregs_future_values_tbl,
+          external_regressors = external_regressors,
+          clean_missing_values = clean_missing_values,
+          clean_outliers_missing_values = clean_outliers_missing_values,
+          hash_data = hash_data,
+          hist_end_date = hist_end_date,
+          hist_start_date = hist_start_date,
+          forecast_approach = forecast_approach,
+          forecast_horizon = forecast_horizon,
+          clean_outliers = clean_outliers,
+          get_frequency_number = get_frequency_number,
+          date_type = date_type,
+          fiscal_year_start = fiscal_year_start,
+          get_date_regex = get_date_regex,
+          recipes_to_run = recipes_to_run,
+          multivariate_prep_recipe_1 = multivariate_prep_recipe_1,
+          multivariate_prep_recipe_2 = multivariate_prep_recipe_2,
+          run_info = run_info,
+          get_fourier_periods = get_fourier_periods,
+          fourier_periods = fourier_periods,
+          get_lag_periods = get_lag_periods,
+          lag_periods = lag_periods,
+          get_rolling_window_periods = get_rolling_window_periods,
+          rolling_window_periods = rolling_window_periods,
+          write_data = write_data,
+          write_data_folder = write_data_folder,
+          write_data_type = write_data_type,
+          box_cox = box_cox,
+          stationary = stationary,
+          make_stationary = make_stationary,
+          apply_box_cox = apply_box_cox
+        )
      )
  }

@ -693,12 +693,13 @@ prep_data <- function(run_info,
    length()

  if (successful_combos != total_combos) {
-    stop(paste0(
-      "Not all time series were prepped within 'prep_data', expected ",
-      total_combos, " time series but only ", successful_combos,
-      " time series are prepped. ", "Please run 'prep_data' again."
-    ),
-    call. = FALSE
+    stop(
+      paste0(
+        "Not all time series were prepped within 'prep_data', expected ",
+        total_combos, " time series but only ", successful_combos,
+        " time series are prepped. ", "Please run 'prep_data' again."
+      ),
+      call. = FALSE
    )
  }

@ -1067,7 +1068,6 @@ apply_box_cox <- function(df) {
  )

  for (column_name in names(df)) {
-
    # Only check numeric columns with more than 2 unique values
    if (is.numeric(df[[column_name]]) & length(unique(df[[column_name]])) > 2) {
      temp_tbl <- df %>%
@ -1119,7 +1119,6 @@ make_stationary <- function(df) {
  )

  for (column_name in names(df)) {
-
    # Only check numeric columns with more than 2 unique values
    if (is.numeric(df[[column_name]]) & length(unique(df[[column_name]])) > 2) {
      temp_tbl <- df %>%
@ -1183,7 +1182,6 @@ multivariate_prep_recipe_1 <- function(data,
                                       rolling_window_periods,
                                       hist_end_date,
                                       date_type) {
-
  # apply polynomial transformations
  numeric_xregs <- c()

@ -1345,7 +1343,6 @@ multivariate_prep_recipe_2 <- function(data,
  }

  for (period in 1:forecast_horizon) {
-
    # add horizon and origin components
    data_lag_window <- df_poly %>%
      dplyr::mutate(
--- a/R/prep_models.R
+++ b/R/prep_models.R
@ -1,4 +1,3 @@
-
 #' Prep Models
 #'
 #' Preps various aspects of run before training models. Things like train/test
@ -60,7 +59,6 @@ prep_models <- function(run_info,
                        pca = NULL,
                        num_hyperparameters = 10,
                        seed = 123) {
-
  # check input values
  check_input_type("run_info", run_info, "list")
  check_input_type("back_test_scenarios", back_test_scenarios, c("NULL", "numeric"))
@ -514,7 +512,6 @@ model_workflows <- function(run_info,
  ml_models <- list_models()

  if (is.null(models_to_run) & is.null(models_not_to_run)) {
-
    # do nothing, using existing ml_models list
  } else if (is.null(models_to_run) & !is.null(models_not_to_run)) {
    ml_models <- setdiff(ml_models, models_not_to_run)
--- a/R/read_write_data.R
+++ b/R/read_write_data.R
@ -1,4 +1,3 @@
-
 #' Get Final Forecast Data
 #'
 #' @param run_info run info using the [set_run_info()] function
@ -46,7 +45,6 @@
 #' @export
 get_forecast_data <- function(run_info,
                              return_type = "df") {
-
  # check input values
  check_input_type("run_info", run_info, "list")
  check_input_type("return_type", return_type, "character", c("df", "sdf"))
@ -76,17 +74,17 @@ get_forecast_data <- function(run_info,
    run_info$path, "/forecasts/*", hash_data(run_info$experiment_name), "-",
    hash_data(run_info$run_name), "*condensed", ".", run_info$data_output
  )
-  
+
  condensed_files <- list_files(run_info$storage_object, fs::path(cond_path))
-  
+
  if (length(condensed_files) > 0) {
    condensed <- TRUE
  } else {
    condensed <- FALSE
  }
-  
+
  # get forecast data
-  if(forecast_approach != "bottoms_up") {
+  if (forecast_approach != "bottoms_up") {
    fcst_path <- paste0(
      "/forecasts/*", hash_data(run_info$experiment_name), "-",
      hash_data(run_info$run_name), "*reconciled", ".", run_info$data_output
@ -172,7 +170,6 @@ get_forecast_data <- function(run_info,
 #' }
 #' @export
 get_trained_models <- function(run_info) {
-
  # check input values
  check_input_type("run_info", run_info, "list")

@ -228,7 +225,6 @@ get_trained_models <- function(run_info) {
 get_prepped_data <- function(run_info,
                             recipe,
                             return_type = "df") {
-
  # check input values
  check_input_type("run_info", run_info, "list")
  check_input_type("recipe", recipe, "character", c("R1", "R2"))
@ -299,7 +295,6 @@ get_prepped_data <- function(run_info,
 #' }
 #' @export
 get_prepped_models <- function(run_info) {
-
  # check input values
  check_input_type("run_info", run_info, "list")

@ -527,12 +522,12 @@ download_file <- function(storage_object,
 #' @noRd
 read_file <- function(run_info,
                      path = NULL,
-                      file_list = NULL, 
+                      file_list = NULL,
                      return_type = "df",
                      schema = NULL) {
  storage_object <- run_info$storage_object
-  
-  if(!is.null(path)) {
+
+  if (!is.null(path)) {
    folder <- fs::path_dir(path)
    initial_path <- run_info$path
    file <- fs::path_file(path)
@ -551,7 +546,7 @@ read_file <- function(run_info,
    files <- list_files(storage_object, fs::path(initial_path, path))
  }

-  if(!is.null(file_list)) {
+  if (!is.null(file_list)) {
    file_temp <- files[[1]]
    file_ext <- fs::path_ext(file_temp)
  } else if (fs::path_ext(file) == "*") {
@ -698,10 +693,9 @@ get_recipe_data <- function(run_info,
 #'
 #' @return nothing
 #' @noRd
-condense_data <- function(run_info, 
+condense_data <- function(run_info,
                          parallel_processing = NULL,
                          num_cores = NULL) {
-  
  # get initial list of files to condense
  initial_file_list <- list_files(
    run_info$storage_object,
@ -710,16 +704,16 @@ condense_data <- function(run_info,
      hash_data(run_info$run_name), "*_models.", run_info$data_output
    )
  )
-  
+
  # Initialize an empty list to store the batches
  list_of_batches <- list()
-  
+
  # Define the batch size
  batch_size <- 10000
-  
+
  # Calculate the number of batches needed
  num_batches <- ceiling(length(initial_file_list) / batch_size)
-  
+
  # Loop through the large list and create batches
  for (i in 1:num_batches) {
    start_index <- (i - 1) * batch_size + 1
@ -727,7 +721,7 @@ condense_data <- function(run_info,
    batch_name <- paste0("batch_", i)
    list_of_batches[[batch_name]] <- initial_file_list[start_index:end_index]
  }
-  
+
  # parallel run info
  par_info <- par_start(
    run_info = run_info,
@ -735,11 +729,11 @@ condense_data <- function(run_info,
    num_cores = min(length(names(list_of_batches)), num_cores),
    task_length = length(names(list_of_batches))
  )
-  
+
  cl <- par_info$cl
  packages <- par_info$packages
  `%op%` <- par_info$foreach_operator
-  
+
  # submit tasks
  condense_data_tbl <- foreach::foreach(
    batch = names(list_of_batches),
@ -750,23 +744,23 @@ condense_data <- function(run_info,
    .inorder = FALSE,
    .multicombine = TRUE,
    .noexport = NULL
-  ) %op%
-    {
-      files <- list_of_batches[[batch]]
+  ) %op% {
+    files <- list_of_batches[[batch]]

-      data <- read_file(run_info,
-                        file_list = files, 
-                        return_type = "df")
-      
-      write_data(
-        x = data,
-        combo = batch,
-        run_info = run_info,
-        output_type = "data",
-        folder = "forecasts",
-        suffix = "-condensed"
-      )
-      
-      return(batch)
-    }
+    data <- read_file(run_info,
+      file_list = files,
+      return_type = "df"
+    )
+
+    write_data(
+      x = data,
+      combo = batch,
+      run_info = run_info,
+      output_type = "data",
+      folder = "forecasts",
+      suffix = "-condensed"
+    )
+
+    return(batch)
+  }
 }
--- a/R/run_info.R
+++ b/R/run_info.R
@ -44,7 +44,6 @@ set_run_info <- function(experiment_name = "finn_fcst",
                         data_output = "csv",
                         object_output = "rds",
                         add_unique_id = TRUE) {
-
  # initial input checks
  if (!inherits(run_name, c("NULL", "character"))) {
    stop("`run_name` must either be a NULL or a string")
@ -151,7 +150,6 @@ set_run_info <- function(experiment_name = "finn_fcst",
    base::suppressWarnings()

  if (nrow(log_df) > 0 & add_unique_id == FALSE) {
-
    # check if input values have changed
    current_log_df <- tibble::tibble(
      experiment_name = experiment_name,
@ -266,7 +264,6 @@ get_run_info <- function(experiment_name = NULL,
                         run_name = NULL,
                         storage_object = NULL,
                         path = NULL) {
-
  # input checks
  if (!inherits(run_name, c("NULL", "character"))) {
    stop("`run_name` must either be a NULL or a string")
--- a/R/train_models.R
+++ b/R/train_models.R
@ -1,4 +1,3 @@
-
 #' Train Individual Models
 #'
 #' @param run_info run info using the [set_run_info()] function
@ -220,7 +219,6 @@ train_models <- function(run_info,
    stringr::str_replace(hash_data("All-Data"), "All-Data")

  if (length(combo_diff) == 0 & length(prev_combo_list) > 0) {
-
    # check if input values have changed
    current_log_df <- tibble::tibble(
      run_global_models = run_global_models,
@ -270,7 +268,6 @@ train_models <- function(run_info,
    .noexport = NULL
  ) %op%
    {
-
      # get time series
      combo_hash <- x

@ -402,7 +399,6 @@ train_models <- function(run_info,
        .multicombine = TRUE,
        .noexport = NULL
      ) %do% {
-
        # get initial run info
        model <- model_run %>%
          dplyr::pull(Model_Name)
@ -719,12 +715,13 @@ train_models <- function(run_info,
    length()

  if (successful_combos != total_combos) {
-    stop(paste0(
-      "Not all time series were completed within 'train_models', expected ",
-      total_combos, " time series but only ", successful_combos,
-      " time series were ran. ", "Please run 'train_models' again."
-    ),
-    call. = FALSE
+    stop(
+      paste0(
+        "Not all time series were completed within 'train_models', expected ",
+        total_combos, " time series but only ", successful_combos,
+        " time series were ran. ", "Please run 'train_models' again."
+      ),
+      call. = FALSE
    )
  }

@ -784,7 +781,6 @@ negative_fcst_adj <- function(data,
 #' @return tbl with train test splits
 #' @noRd
 create_splits <- function(data, train_test_splits) {
-
  # Create the rsplit object
  analysis_split <- function(data, train_indices, test_indices) {
    rsplit_object <- rsample::make_splits(
@ -846,7 +842,6 @@ create_splits <- function(data, train_test_splits) {
 undifference_forecast <- function(forecast_data,
                                  recipe_data,
                                  diff_tbl) {
-
  # check if data needs to be undifferenced
  diff1 <- diff_tbl$Diff_Value1
  diff2 <- diff_tbl$Diff_Value2
@ -863,10 +858,8 @@ undifference_forecast <- function(forecast_data,

  # non seasonal differencing
  if (!is.na(diff1)) {
-
    # loop through each back test split
    for (id in train_test_id) {
-
      # get specific train test split
      fcst_temp_tbl <- forecast_data %>%
        dplyr::filter(Train_Test_ID == id)
@ -950,7 +943,6 @@ undifference_forecast <- function(forecast_data,
 undifference_recipe <- function(recipe_data,
                                diff_tbl,
                                hist_end_date) {
-
  # check if data needs to be undifferenced
  diff1 <- diff_tbl$Diff_Value1
  diff2 <- diff_tbl$Diff_Value2
--- a/R/utility.R
+++ b/R/utility.R
@ -15,7 +15,7 @@ utils::globalVariables(c(
  "Auto_Accept", "Feature", "Imp", "Importance", "LOFO_Var", "Var_RMSE", "Vote", "Votes", "desc",
  "term", "Column", "Box_Cox_Lambda", "get_recipie_configurable", "Agg", "Unique", "Var",
  "Var_Combo", "regressor", "regressor_tbl", "value_level_iter", ".actual", ".fitted",
-  "forecast_horizon", "lag", "new_data", "object", "fit", "Row_Num", "Run_Number", "weight", 
+  "forecast_horizon", "lag", "new_data", "object", "fit", "Row_Num", "Run_Number", "weight",
  "Total", "Weight", "batch"
 ))

@ -89,7 +89,6 @@ cbind.fill <- function(..., fill = NA) {
 # been loaded.

 .onLoad <- function(libname, pkgname) {
-
  # CRAN OMP THREAD LIMIT
  Sys.setenv("OMP_THREAD_LIMIT" = 1)

--- a/tests/testthat/test-forecast_time_series.R
+++ b/tests/testthat/test-forecast_time_series.R
@ -1,4 +1,3 @@
-
 # * custom test functions ----

 check_exist <- function(to_check, ret) {
--- a/tests/testthat/test-hierarchical.R
+++ b/tests/testthat/test-hierarchical.R
@ -1,6 +1,4 @@
-
 test_that("prep_hierarchical_data returns correct grouped hierarchies", {
-
  # Mock data setup
  data <- tibble::tibble(
    Segment = as.character(c(
@ -77,7 +75,6 @@ test_that("prep_hierarchical_data returns correct grouped hierarchies", {
 })

 test_that("prep_hierarchical_data returns correct standard hierarchies", {
-
  # Mock data setup
  data <- tibble::tibble(
    Area = as.character(c("EMEA", "EMEA", "EMEA", "EMEA", "EMEA", "EMEA", "EMEA", "EMEA", "United States", "United States", "United States", "United States")),
--- a/tests/testthat/test-multistep_horizon.R
+++ b/tests/testthat/test-multistep_horizon.R
@ -1,5 +1,4 @@
 test_that("multistep_horizon monthly data", {
-
  # Mock data setup
  data <- timetk::m4_monthly %>%
    dplyr::mutate(id = as.character(id)) %>%
--- a/vignettes/back-testing-and-hyperparameter-tuning.Rmd
+++ b/vignettes/back-testing-and-hyperparameter-tuning.Rmd
@ -22,27 +22,33 @@ When `prep_models()` is ran, hyperparameters and back test splits are calculated
 library(finnts)

 hist_data <- timetk::m4_monthly %>%
-  dplyr::filter(date >= "2012-01-01", 
-                id == "M2") %>%
+  dplyr::filter(
+    date >= "2012-01-01",
+    id == "M2"
+  ) %>%
  dplyr::rename(Date = date) %>%
  dplyr::mutate(id = as.character(id))

 run_info <- set_run_info(
-  experiment_name = "finnts_fcst", 
+  experiment_name = "finnts_fcst",
  run_name = "get_prepped_models"
 )

-prep_data(run_info = run_info, 
-          input_data = hist_data, 
-          combo_variables = c("id"), 
-          target_variable = "value", 
-          date_type = "month", 
-          recipes_to_run = "R1",
-          forecast_horizon = 6)
+prep_data(
+  run_info = run_info,
+  input_data = hist_data,
+  combo_variables = c("id"),
+  target_variable = "value",
+  date_type = "month",
+  recipes_to_run = "R1",
+  forecast_horizon = 6
+)

-prep_models(run_info = run_info, 
-            models_to_run = c("arima", "ets", "xgboost"), 
-            num_hyperparameters = 10)
+prep_models(
+  run_info = run_info,
+  models_to_run = c("arima", "ets", "xgboost"),
+  num_hyperparameters = 10
+)

 model_info <- get_prepped_models(run_info = run_info)

@ -67,7 +73,6 @@ model_hyperparameter_info <- model_info %>%
  tidyr::unnest(Data)

 print(model_hyperparameter_info)
-
 ```

 The above outputs allow a Finn user to understand what hyperparameters are chosen for tuning and how the model refitting process will work. When tuning hyperparameters, Finn uses the "Validation" train/test splits, with the final parameters chosen using RMSE. For some models like ARIMA that don't follow a traditional hyperparameter tuning process, the model is fit from scratch across all train/test splits. After hyperparameters are chosen, the model is refit across the "Back_Test" and "Future_Forecast" splits. The "Back_Test" splits are the true testing data that will be used when selecting the final "Best-Model". "Ensemble" splits are also created as ensemble training data if ensemble models are chosen to run. Ensemble models follow a similar tuning process.
--- a/vignettes/best-model-selection.Rmd
+++ b/vignettes/best-model-selection.Rmd
@ -23,17 +23,21 @@ suppressMessages(library(dplyr))

 message("Simple Back Test Results")
 back_test_tbl <- tibble(
-  Combo = c("Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1"), 
-  Date = c("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01"), 
-  Model = c("arima", "arima", "arima", "arima", "arima", "ets", "ets", "ets", "ets", "ets"), 
-  FCST = c(9, 23, 35, 41, 48, 7, 22, 29, 42, 53), 
+  Combo = c("Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1"),
+  Date = c("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01"),
+  Model = c("arima", "arima", "arima", "arima", "arima", "ets", "ets", "ets", "ets", "ets"),
+  FCST = c(9, 23, 35, 41, 48, 7, 22, 29, 42, 53),
  Target = c(10, 20, 30, 40, 50, 10, 20, 30, 40, 50)
 ) %>%
-  dplyr::mutate(MAPE = abs(Target-FCST)/Target, 
-                Date = as.Date(Date)) %>%
+  dplyr::mutate(
+    MAPE = abs(Target - FCST) / Target,
+    Date = as.Date(Date)
+  ) %>%
  dplyr::group_by(Combo, Model) %>%
-  dplyr::mutate(Target_Total = sum(Target), 
-                Percent_Total = Target/Target_Total) %>%
+  dplyr::mutate(
+    Target_Total = sum(Target),
+    Percent_Total = Target / Target_Total
+  ) %>%
  dplyr::ungroup()

 print(back_test_tbl)
@ -44,12 +48,13 @@ message("Overall Model Accuracy by Combo")
 suppressMessages(best_model <- back_test_tbl %>%
  dplyr::group_by(Combo, Model) %>%
  dplyr::mutate(Weighted_MAPE = MAPE * Percent_Total) %>%
-  dplyr::summarise(MAPE = mean(MAPE), 
-                   Weighted_MAPE = sum(Weighted_MAPE)) %>%
+  dplyr::summarise(
+    MAPE = mean(MAPE),
+    Weighted_MAPE = sum(Weighted_MAPE)
+  ) %>%
  dplyr::ungroup())

 print(best_model)
-
 ```

 During the simple back test process above, arima seems to be the better model from a pure MAPE perspective, but ETS ends up being the winner when using weighted MAPE. The benefits of weighted MAPE allow finnts to find the optimal model that performs the best on the biggest components of a forecast, which comes with the added benefit of putting more weight on more recent observations since those are more likely to have larger target values then ones further into the past. Another way of putting more weight on more recent observations is how Finn overlaps its back testing scenarios. This means the most recent observations are tested for accuracy in different forecast horizons (H=1, H=2, etc). More info on this in the back testing vignette.
--- a/vignettes/external-regressors.Rmd
+++ b/vignettes/external-regressors.Rmd
@ -42,11 +42,11 @@ Below is an example of how you can set up your input_data to leverage future val
 library(dplyr)

 tibble(
-  Combo = c("Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1"), 
-  Date = c("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01", "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01", "2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01"), 
-  Target = c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, NA, NA, NA), 
-  Holiday = c("New Years", "Valentines Day", "None", "Easter", "None", "None", "4th of July", "None", "Labor Day", "Halloween", "Thanksgiving", "Christmas", "New Years", "Valentines Day", "None"), 
-  GDP = c(5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, NA, NA, NA), 
+  Combo = c("Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1", "Country_1"),
+  Date = c("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01", "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01", "2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01"),
+  Target = c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, NA, NA, NA),
+  Holiday = c("New Years", "Valentines Day", "None", "Easter", "None", "None", "4th of July", "None", "Labor Day", "Halloween", "Thanksgiving", "Christmas", "New Years", "Valentines Day", "None"),
+  GDP = c(5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, NA, NA, NA),
  Sales_Pipeline = c(100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240)
 ) %>%
  dplyr::mutate(Date = as.Date(Date))
--- a/vignettes/feature-engineering.Rmd
+++ b/vignettes/feature-engineering.Rmd
@ -74,7 +74,6 @@ In addition to the standard approaches above, finnts also does two different way
 In the first recipe, referred to as "R1" in default finnts models, by default takes a single step horizon approach. Meaning all of the engineered target and external regressor features are used but the lags cannot be less than the forecast horizon. For example, a monthly data set with a forecast horizon of 3, finnts will take engineered features like lags and rolling window features but only use those lags that are for periods equal to or greater than 3. You can also run a multistep horizon approach by setting `multistep_horizon` to TRUE in `prep_models()`. The multistep approach will create features that can be used by specific multivariate models that optimize for each period in a forecast horizon. More on this in the "models used in finnts" vignette. Recursive forecasting is not supported in finnts multivariate machine learning models, since feeding forecast outputs as features to create another forecast adds complex layers of uncertainty that can easily spiral out of control and produce poor forecasts. NA values created by generating lag features are filled "up". This results in the first initial periods of a time series having some data leakage but the effect should be small if the time series is long enough.

 ```{r, message = FALSE}
-
 library(finnts)

 hist_data <- timetk::m4_monthly %>%
@ -112,7 +111,6 @@ print(R1_prepped_data_tbl)
 The second recipe is referred to as "R2" in default finnts models. It takes a very different approach than the "R1" recipe. For a 3 month forecast horizon on a monthly dataset, target and rolling window features are created depending on the horizon period. They are also constrained to be equal or less than the forecast horizon. In the below example, "Origin" and "Horizon" features are created for each time period. This results in duplicating rows in the original data set to create new features that are now specific to each horizon period. This helps the default finnts models find new unique relationships to model, when compared to a more formal approach in "R1". NA values created by generating lag features are filled "up".

 ```{r, message = FALSE}
-
 library(finnts)

 hist_data <- timetk::m4_monthly %>%
--- a/vignettes/finnts.Rmd
+++ b/vignettes/finnts.Rmd
@ -10,8 +10,8 @@ vignette: >
 ```{r, include = FALSE}
 knitr::opts_chunk$set(
  collapse = TRUE,
-  comment = "#>", 
-  warning = FALSE, 
+  comment = "#>",
+  warning = FALSE,
  message = FALSE
 )
 library(finnts)
@ -62,18 +62,15 @@ The above data set contains 4 individual time series, identified using the "id"
 Before we call the Finn forecast function. Let's first set up some run information using `set_run_info()`, this helps log all components of our Finn forecast successfully.

 ```{r, message = FALSE, eval = hist_data, error=FALSE, warning = FALSE, echo=T, eval = TRUE}
-
 run_info <- set_run_info(
-  experiment_name = "finn_forecast", 
+  experiment_name = "finn_forecast",
  run_name = "test_run"
 )
-
 ```

 Calling the "forecast_time_series" function is the easiest part. In this example we will be running just two models.

 ```{r, message = FALSE, eval = hist_data, error=FALSE, warning = FALSE, echo=T, eval = TRUE}
-
 # no need to assign it to a variable, since all of the outputs are written to disk :)
 forecast_time_series(
  run_info = run_info,
@ -82,8 +79,8 @@ forecast_time_series(
  target_variable = "value",
  date_type = "month",
  forecast_horizon = 3,
-  back_test_scenarios = 6, 
-  models_to_run = c("arima", "ets"), 
+  back_test_scenarios = 6,
+  models_to_run = c("arima", "ets"),
  return_data = FALSE
 )
 ```
@ -93,7 +90,6 @@ forecast_time_series(
 ### Initial Finn Outputs

 ```{r, message = FALSE, eval = finn_output, message = FALSE, eval = FALSE, echo=T}
-
 finn_output_tbl <- get_forecast_data(run_info = run_info)

 print(finn_output_tbl)
@ -102,7 +98,6 @@ print(finn_output_tbl)
 ### Future Forecast

 ```{r, message = FALSE, eval = finn_output, message = FALSE, eval = FALSE, echo=T}
-
 future_forecast_tbl <- finn_output_tbl %>%
  dplyr::filter(Run_Type == "Future_Forecast")

@ -142,13 +137,17 @@ print(trained_model_tbl)
 ### Initial Prepped Data

 ```{r, message = FALSE, eval = finn_output, eval = FALSE, echo=T}
-R1_prepped_data_tbl <- get_prepped_data(run_info = run_info, 
-                                        recipe = "R1")
+R1_prepped_data_tbl <- get_prepped_data(
+  run_info = run_info,
+  recipe = "R1"
+)

 print(R1_prepped_data_tbl)

-R2_prepped_data_tbl <- get_prepped_data(run_info = run_info, 
-                                        recipe = "R2")
+R2_prepped_data_tbl <- get_prepped_data(
+  run_info = run_info,
+  recipe = "R2"
+)

 print(R2_prepped_data_tbl)
 ```
--- a/vignettes/forecast-components.Rmd
+++ b/vignettes/forecast-components.Rmd
@ -10,8 +10,8 @@ vignette: >
 ```{r, include = FALSE}
 knitr::opts_chunk$set(
  collapse = TRUE,
-  comment = "#>", 
-  warning = FALSE, 
+  comment = "#>",
+  warning = FALSE,
  message = FALSE
 )
 library(finnts)
@ -29,13 +29,15 @@ Let's get some example data and then set our Finn run info.
 library(finnts)

 hist_data <- timetk::m4_monthly %>%
-  dplyr::filter(date >= "2013-01-01", 
-                id == "M2") %>%
+  dplyr::filter(
+    date >= "2013-01-01",
+    id == "M2"
+  ) %>%
  dplyr::rename(Date = date) %>%
  dplyr::mutate(id = as.character(id))

 run_info <- set_run_info(
-  experiment_name = "finnts_fcst", 
+  experiment_name = "finnts_fcst",
  run_name = "finn_sub_component_run"
 )
 ```
@ -45,20 +47,26 @@ run_info <- set_run_info(
 Clean and prepare our data before training models. We can even pull out our prepped data to see the features and transformations applied before models are trained.

 ```{r message=FALSE}
-prep_data(run_info = run_info, 
-          input_data = hist_data, 
-          combo_variables = c("id"), 
-          target_variable = "value", 
-          date_type = "month", 
-          forecast_horizon = 6)
+prep_data(
+  run_info = run_info,
+  input_data = hist_data,
+  combo_variables = c("id"),
+  target_variable = "value",
+  date_type = "month",
+  forecast_horizon = 6
+)

-R1_prepped_data_tbl <- get_prepped_data(run_info = run_info, 
-                                        recipe = "R1")
+R1_prepped_data_tbl <- get_prepped_data(
+  run_info = run_info,
+  recipe = "R1"
+)

 print(R1_prepped_data_tbl)

-R2_prepped_data_tbl <- get_prepped_data(run_info = run_info, 
-                                        recipe = "R2")
+R2_prepped_data_tbl <- get_prepped_data(
+  run_info = run_info,
+  recipe = "R2"
+)

 print(R2_prepped_data_tbl)
 ```
@ -70,12 +78,16 @@ Now that our data is prepared for modeling, let's now train some models. First w
 Then we can kick off training each model on our data.

 ```{r, message = FALSE}
-prep_models(run_info = run_info, 
-            models_to_run = c("arima", "ets", "glmnet"), 
-            num_hyperparameters = 2)
+prep_models(
+  run_info = run_info,
+  models_to_run = c("arima", "ets", "glmnet"),
+  num_hyperparameters = 2
+)

-train_models(run_info = run_info, 
-             run_global_models = FALSE)
+train_models(
+  run_info = run_info,
+  run_global_models = FALSE
+)
 ```

 ## Train Ensemble Models
@ -99,7 +111,6 @@ final_models(run_info = run_info)
 Finally we can now retrieve the forecast results from this Finn run.

 ```{r, message = FALSE}
-
 finn_output_tbl <- get_forecast_data(run_info = run_info)

 print(finn_output_tbl)
--- a/vignettes/hierarchical-forecasting.Rmd
+++ b/vignettes/hierarchical-forecasting.Rmd
@ -27,15 +27,14 @@ message("Standard Hierarchical Time Series Data")

 hts <- tibble(
  Continent = c("North America", "North America", "North America", "North America", "North America", "North America", "North America", "North America", "North America"),
-  Country = c("United States", "United States", "United States", "United States", "United States", "United States", "Mexico", "Mexico", "Mexico"), 
-  City = c("Kansas City", "Kansas City", "Kansas City", "Seattle", "Seattle", "Seattle", "Mexico City", "Mexico City", "Mexico City"), 
-  Date = c("2020-01-01", "2020-02-01", "2020-03-01", "2020-01-01", "2020-02-01", "2020-03-01", "2020-01-01", "2020-02-01", "2020-03-01"), 
+  Country = c("United States", "United States", "United States", "United States", "United States", "United States", "Mexico", "Mexico", "Mexico"),
+  City = c("Kansas City", "Kansas City", "Kansas City", "Seattle", "Seattle", "Seattle", "Mexico City", "Mexico City", "Mexico City"),
+  Date = c("2020-01-01", "2020-02-01", "2020-03-01", "2020-01-01", "2020-02-01", "2020-03-01", "2020-01-01", "2020-02-01", "2020-03-01"),
  Target = c(100, 250, 320, 80, 200, 270, 50, 80, 120)
 ) %>%
  dplyr::mutate(Date = as.Date(Date))

 print(hts)
-
 ```

 In the above example, "City" was the lowest level of the hierarchy, which feeds into "Country", which then feeds into "Continent". Finn will take this data and will forecast by City, total Country, and total Continent. After each model is ran for every level in the hierarchy, the best model is chosen at each level, then the "Best Model" and every other model is reconciled back down to the lowest level.
@ -50,16 +49,15 @@ suppressMessages(library(dplyr))
 message("Grouped Hierarchical Time Series Data")

 gts <- tibble(
-  Country = c("United States", "United States", "United States", "United States", "United States", "United States", "Mexico", "Mexico", "Mexico", "Mexico", "Mexico", "Mexico"), 
-  Segment = c("Enterprise", "Enterprise", "Enterprise", "Public Sector", "Public Sector", "Public Sector", "Enterprise", "Enterprise", "Enterprise", "Enterprise", "Enterprise", "Enterprise"), 
-  Product = c("Coffee", "Coffee", "Coffee", "Coffee", "Coffee", "Coffee", "Coffee", "Coffee", "Coffee", "Tea", "Tea", "Tea"), 
-  Date = c("2020-01-01", "2020-02-01", "2020-03-01", "2020-01-01", "2020-02-01", "2020-03-01", "2020-01-01", "2020-02-01", "2020-03-01", "2020-01-01", "2020-02-01", "2020-03-01"), 
+  Country = c("United States", "United States", "United States", "United States", "United States", "United States", "Mexico", "Mexico", "Mexico", "Mexico", "Mexico", "Mexico"),
+  Segment = c("Enterprise", "Enterprise", "Enterprise", "Public Sector", "Public Sector", "Public Sector", "Enterprise", "Enterprise", "Enterprise", "Enterprise", "Enterprise", "Enterprise"),
+  Product = c("Coffee", "Coffee", "Coffee", "Coffee", "Coffee", "Coffee", "Coffee", "Coffee", "Coffee", "Tea", "Tea", "Tea"),
+  Date = c("2020-01-01", "2020-02-01", "2020-03-01", "2020-01-01", "2020-02-01", "2020-03-01", "2020-01-01", "2020-02-01", "2020-03-01", "2020-01-01", "2020-02-01", "2020-03-01"),
  Target = c(10, 20, 30, 5, 8, 11, 20, 23, 27, 50, 55, 60)
 ) %>%
  dplyr::mutate(Date = as.Date(Date))

 print(gts)
-
 ```

 It would be hard to aggregate the above data in a traditional hierarchy. The same products are found in different segments and countries, also the same segments are found in multiple countries. Finn will follow a similar modeling process as the one described for a traditional hierarchy, but instead will create forecasts at the below levels.
--- a/vignettes/models-used-in-finnts.Rmd
+++ b/vignettes/models-used-in-finnts.Rmd
@ -22,7 +22,7 @@ reactable::reactable(
  data.frame() %>%
    rbind(data.frame(Model = "arima", Type = "univariate, local", Underlying.Package = "modeltime, forecast", Description = "Regression model that is based on finding relationships between lagged values of the target variable you are trying to forecast.")) %>%
    rbind(data.frame(Model = "arima-boost", Type = "multivariate, local", Underlying.Package = "modeltime, forecast, xgboost", Description = "Arima model (refer to arima) that models the trend compoent of target variable, then uses xgboost model (refer to xgboost) to train on the remaining residuals.")) %>%
-        rbind(data.frame(Model = "arimax", Type = "multivariate, local", Underlying.Package = "modeltime, forecast", Description = "ARIMA model that incorporates external regressors and other engineered features.")) %>%
+    rbind(data.frame(Model = "arimax", Type = "multivariate, local", Underlying.Package = "modeltime, forecast", Description = "ARIMA model that incorporates external regressors and other engineered features.")) %>%
    rbind(data.frame(Model = "cubist", Type = "multivariate, local, global, ensemble", Underlying.Package = "rules", Description = "Hybrid of tree based and linear regression approach. Many decision trees are built, but regression coefficients are used at each terminal node instead of averging values in other tree based approaches.")) %>%
    rbind(data.frame(Model = "croston", Type = "univariate, local", Underlying.Package = "modeltime, forecast", Description = "Useful for intermittent demand forecasting, aka when there are a lot of periods of zero values. Involves simple exponential smoothing on non-zero values of target variable and another application of seasonal exponential smoothing on periods between non-zero elements of the target variable. Refer to ets for more details on exponential smoothing.")) %>%
    rbind(data.frame(Model = "ets", Type = "univariate, local", Underlying.Package = "modeltime, forecast", Description = "Forecasts produced using exponential smoothing methods are weighted averages of past observations, with the weights decaying exponentially as the observations get older. Exponential smoothing models try to forecast the components of a time series which can be broken down in to error, trend, and seasonality. These components can be forecasted separately then either added or multiplied together to get the final forecast output.")) %>%
@ -41,8 +41,7 @@ reactable::reactable(
    rbind(data.frame(Model = "svm-rbf", Type = "multivariate, local, global, ensemble", Underlying.Package = "parsnip, kernlab", Description = "Uses a nonlinear function, specifically a radial basis function, to create a regression line of the target variable.")) %>%
    rbind(data.frame(Model = "tbats", Type = "univariate, local", Underlying.Package = "modeltime, forecast", Description = "A spin off of the traditional ets model (refer to ets), with some additional components to capture multiple seasonalities.")) %>%
    rbind(data.frame(Model = "theta", Type = "univariate, local", Underlying.Package = "modeltime, forecast", Description = "Theta is similar to exponential smoothing (refer to ets) but with another component called drift. Adding drift to exponential smoothing allows the forecast to increase or decrease over time, where the amount of change over time (called the drift) is set to be the average change seen within the historical data.")) %>%
-    rbind(data.frame(Model = "xgboost", Type = "multivariate, local, global, ensemble", Underlying.Package = "parsnip, xgboost", Description = "Builds many decision trees (similar to random forests), but predictions that are initially inaccurate are applied more weight in subsequent training rounds to increase accuracy across all predictions."))
-  , 
+    rbind(data.frame(Model = "xgboost", Type = "multivariate, local, global, ensemble", Underlying.Package = "parsnip, xgboost", Description = "Builds many decision trees (similar to random forests), but predictions that are initially inaccurate are applied more weight in subsequent training rounds to increase accuracy across all predictions.")),
  defaultColDef = colDef(
    header = function(value) gsub(".", " ", value, fixed = TRUE),
    cell = function(value) format(value, nsmall = 1),
@ -51,12 +50,11 @@ reactable::reactable(
    headerStyle = list(background = "#f7f7f8")
  ),
  columns = list(
-    Description = colDef(minWidth = 140, align = "left")  # overrides the default
+    Description = colDef(minWidth = 140, align = "left") # overrides the default
  ),
  bordered = TRUE,
  highlight = TRUE
 )
-
 ```

 ### Univariate vs Multivariate Models
--- a/vignettes/parallel-processing.Rmd
+++ b/vignettes/parallel-processing.Rmd
@ -49,8 +49,8 @@ hist_data <- timetk::m4_monthly %>%
 data_sdf <- sparklyr::copy_to(sc, hist_data, "data_sdf", overwrite = TRUE)

 run_info <- set_run_info(
-  experiment_name = "finn_fcst", 
-  run_name = "spark_run_1", 
+  experiment_name = "finn_fcst",
+  run_name = "spark_run_1",
  path = "/dbfs/mnt/example/folder" # important that you mount an ADLS path
 )

@ -60,16 +60,18 @@ forecast_time_series(
  combo_variables = c("id"),
  target_variable = "value",
  date_type = "month",
-  forecast_horizon = 3, 
-  parallel_processing = "spark", 
+  forecast_horizon = 3,
+  parallel_processing = "spark",
  return_data = FALSE
 )

 # return the outputs as a spark data frame
-finn_output_tbl <- get_forecast_data(run_info = run_info, 
-                                     return_type = "sdf")
+finn_output_tbl <- get_forecast_data(
+  run_info = run_info,
+  return_type = "sdf"
+)
 ```

 The above example runs each time series on a separate core on a spark cluster. You can also submit multiple time series where each time series runs on a separate spark executor (VM) and then leverage all of the cores on that executor to run things like hyperparameter tuning or model refitting in parallel. This creates two levels of parallelization. One at the time series level, then another when doing things like hyperparameter tuning within a specific time series. To do that set `inner_parallel` to TRUE in `forecast_time_series()`. Also make sure that you adjust the number of spark executor cores to 1, that ensures that only 1 time series runs on an executor at a time. Leverage the "spark.executor.cores" argument when configuring your spark connection. This can be done using [sparklyr](https://spark.rstudio.com/guides/connections#:~:text=In%20sparklyr%2C%20Spark%20properties%20can%20be%20set%20by,customized%20as%20shown%20in%20the%20example%20code%20below.) or within the cluster manager itself within the Azure resource. Use the "num_cores" argument in the "forecast_time_series" function to control how many cores should be used within an executor when running things like hyperparameter tuning.

-`forecast_time_series()` will be looking for a variable called "sc" to use when submitting tasks to the spark cluster, so make sure you use that as the variable name when connecting to spark. Also it's important that you mount your spark session to an Azure Data Lake Storage (ADLS) account, and provide the mounted path to where you'd like your Finn results to be written to within `set_run_info()`.
+`forecast_time_series()` will be looking for a variable called "sc" to use when submitting tasks to the spark cluster, so make sure you use that as the variable name when connecting to spark. Also it's important that you mount your spark session to an Azure Data Lake Storage (ADLS) account, and provide the mounted path to where you'd like your Finn results to be written to within `set_run_info()`.