[R-package] discourage use of regex for fixed string comparisons (#5685)

2023-01-30 23:33:48 -06:00 · 2023-01-30 23:33:48 -06:00 · 9954bc4231
--- a/.ci/lint_r_code.R
+++ b/.ci/lint_r_code.R
@ -42,6 +42,7 @@ LINTERS_TO_USE <- list(
    , "implicit_integers"    = lintr::implicit_integer_linter()
    , "infix_spaces"         = lintr::infix_spaces_linter()
    , "inner_combine"        = lintr::inner_combine_linter()
    , "fixed_regex"          = lintr::fixed_regex_linter()
    , "literal_coercion"     = lintr::literal_coercion_linter()
    , "long_lines"           = lintr::line_length_linter(length = 120L)
    , "missing_argument"     = lintr::missing_argument_linter()
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@ -25,7 +25,7 @@ lgb.params2str <- function(params) {
    stop("params must be a list")
  }
-  names(params) <- gsub("\\.", "_", names(params))
+  names(params) <- gsub(".", "_", names(params), fixed = TRUE)
  param_names <- names(params)
  ret <- list()
--- a/R-package/inst/make-r-def.R
+++ b/R-package/inst/make-r-def.R
@ -81,9 +81,9 @@ end_of_table <- empty_lines[empty_lines > start_index][1L]
 # Read the contents of the table
 exported_symbols <- objdump_results[(start_index + 1L):end_of_table]
-exported_symbols <- gsub("\t", "", exported_symbols)
+exported_symbols <- gsub("\t", "", exported_symbols, fixed = TRUE)
 exported_symbols <- gsub(".*\\] ", "", exported_symbols)
-exported_symbols <- gsub(" ", "", exported_symbols)
+exported_symbols <- gsub(" ", "", exported_symbols, fixed = TRUE)
 # Write R.def file
 writeLines(
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@ -618,6 +618,7 @@ test_that("lgb.cv() prefers objective in params to keyword argument", {
    model_txt_lines <- strsplit(
      x = bst$save_model_to_string()
      , split = "\n"
      , fixed = TRUE
    )[[1L]]
    expect_true(any(model_txt_lines == "objective=regression_l1"))
    expect_false(any(model_txt_lines == "objective=regression_l2"))
@ -780,6 +781,7 @@ test_that("lgb.train() prefers objective in params to keyword argument", {
  model_txt_lines <- strsplit(
    x = bst$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(model_txt_lines == "objective=regression_l1"))
  expect_false(any(model_txt_lines == "objective=regression_l2"))
@ -2296,8 +2298,8 @@ test_that("lgb.cv() respects changes to logging verbosity", {
      , verbose = 1L
    )
  })
-  expect_true(any(grepl("\\[LightGBM\\] \\[Info\\]", lgb_cv_logs)))
+  expect_true(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
-  expect_true(any(grepl("\\[LightGBM\\] \\[Warning\\]", lgb_cv_logs)))
+  expect_true(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))
  # (verbose = 0) should be WARNING level logs only
  lgb_cv_logs <- capture.output({
@ -2310,8 +2312,8 @@ test_that("lgb.cv() respects changes to logging verbosity", {
      , verbose = 0L
    )
  })
-  expect_false(any(grepl("\\[LightGBM\\] \\[Info\\]", lgb_cv_logs)))
+  expect_false(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
-  expect_true(any(grepl("\\[LightGBM\\] \\[Warning\\]", lgb_cv_logs)))
+  expect_true(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))
  # (verbose = -1) no logs
  lgb_cv_logs <- capture.output({
@ -2326,8 +2328,8 @@ test_that("lgb.cv() respects changes to logging verbosity", {
  })
  # NOTE: this is not length(lgb_cv_logs) == 0 because lightgbm's
  #       dependencies might print other messages
-  expect_false(any(grepl("\\[LightGBM\\] \\[Info\\]", lgb_cv_logs)))
+  expect_false(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
-  expect_false(any(grepl("\\[LightGBM\\] \\[Warning\\]", lgb_cv_logs)))
+  expect_false(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))
 })
 test_that("lgb.cv() updates params based on keyword arguments", {
@ -2918,6 +2920,7 @@ test_that("lightgbm() accepts objective as function argument and under params",
  model_txt_lines <- strsplit(
    x = bst1$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(model_txt_lines == "objective=regression_l1"))
  expect_false(any(model_txt_lines == "objective=regression_l2"))
@ -2933,6 +2936,7 @@ test_that("lightgbm() accepts objective as function argument and under params",
  model_txt_lines <- strsplit(
    x = bst2$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(model_txt_lines == "objective=regression_l1"))
  expect_false(any(model_txt_lines == "objective=regression_l2"))
@ -2951,6 +2955,7 @@ test_that("lightgbm() prioritizes objective under params over objective as funct
  model_txt_lines <- strsplit(
    x = bst1$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(model_txt_lines == "objective=regression_l1"))
  expect_false(any(model_txt_lines == "objective=regression_l2"))
@ -2967,6 +2972,7 @@ test_that("lightgbm() prioritizes objective under params over objective as funct
  model_txt_lines <- strsplit(
    x = bst2$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(model_txt_lines == "objective=regression_l1"))
  expect_false(any(model_txt_lines == "objective=regression_l2"))
@ -3006,6 +3012,7 @@ test_that("lightgbm() defaults to 'regression' objective if objective not otherw
  model_txt_lines <- strsplit(
    x = bst$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(model_txt_lines == "objective=regression"))
  expect_false(any(model_txt_lines == "objective=regression_l1"))
@ -3023,8 +3030,9 @@ test_that("lightgbm() accepts 'num_threads' as either top-level argument or unde
  model_txt_lines <- strsplit(
    x = bst$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
-  expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines)))
+  expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))
  bst <- lightgbm(
    data = train$data
@ -3037,8 +3045,9 @@ test_that("lightgbm() accepts 'num_threads' as either top-level argument or unde
  model_txt_lines <- strsplit(
    x = bst$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
-  expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines)))
+  expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))
  bst <- lightgbm(
    data = train$data
@ -3052,8 +3061,9 @@ test_that("lightgbm() accepts 'num_threads' as either top-level argument or unde
  model_txt_lines <- strsplit(
    x = bst$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
-  expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines)))
+  expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))
 })
 test_that("lightgbm() accepts 'weight' and 'weights'", {
@ -3090,19 +3100,19 @@ test_that("lightgbm() accepts 'weight' and 'weights'", {
 .assert_has_expected_logs <- function(log_txt, lgb_info, lgb_warn, early_stopping, valid_eval_msg) {
  expect_identical(
-    object = any(grepl("\\[LightGBM\\] \\[Info\\]", log_txt))
+    object = any(grepl("[LightGBM] [Info]", log_txt, fixed = TRUE))
    , expected = lgb_info
  )
  expect_identical(
-    object = any(grepl("\\[LightGBM\\] \\[Warning\\]", log_txt))
+    object = any(grepl("[LightGBM] [Warning]", log_txt, fixed = TRUE))
    , expected = lgb_warn
  )
  expect_identical(
-    object = any(grepl("Will train until there is no improvement in 5 rounds", log_txt))
+    object = any(grepl("Will train until there is no improvement in 5 rounds", log_txt, fixed = TRUE))
    , expected = early_stopping
  )
  expect_identical(
-    object = any(grepl("Did not meet early stopping", log_txt))
+    object = any(grepl("Did not meet early stopping", log_txt, fixed = TRUE))
    , expected = early_stopping
  )
  expect_identical(
--- a/R-package/tests/testthat/test_learning_to_rank.R
+++ b/R-package/tests/testthat/test_learning_to_rank.R
@ -19,7 +19,7 @@ test_that("learning-to-rank with lgb.train() works as expected", {
        , group = rep(150L, 40L)
    )
    ndcg_at <- "1,2,3"
-    eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
+    eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",", fixed = TRUE)[[1L]])
    params <- list(
        objective = "lambdarank"
        , metric = "ndcg"
@ -81,7 +81,7 @@ test_that("learning-to-rank with lgb.cv() works as expected", {
        , group = rep(150L, 40L)
    )
    ndcg_at <- "1,2,3"
-    eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
+    eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",", fixed = TRUE)[[1L]])
    params <- list(
        objective = "lambdarank"
        , metric = "ndcg"
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@ -705,7 +705,7 @@ test_that("Saving a model with different feature importance types works", {
    expect_true(lgb.is.Booster(bst))
    .feat_importance_from_string <- function(model_string) {
-        file_lines <- strsplit(model_string, "\n")[[1L]]
+        file_lines <- strsplit(model_string, "\n", fixed = TRUE)[[1L]]
        start_indx <- which(grepl("^feature_importances\\:$", file_lines)) + 1L
        blank_line_indices <- which(file_lines == "")
        end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L
@ -771,7 +771,7 @@ test_that("Saving a model with unknown importance type fails", {
 .params_from_model_string <- function(model_str) {
-    file_lines <- strsplit(model_str, "\n")[[1L]]
+    file_lines <- strsplit(model_str, "\n", fixed = TRUE)[[1L]]
    start_indx <- which(grepl("^parameters\\:$", file_lines)) + 1L
    blank_line_indices <- which(file_lines == "")
    end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L
@ -1286,7 +1286,7 @@ test_that("Booster's print, show, and summary work correctly", {
    .has_expected_content_for_finalized_model <- function(printed_txt) {
      expect_true(any(grepl("^LightGBM Model$", printed_txt)))
-      expect_true(any(grepl("Booster handle is invalid", printed_txt)))
+      expect_true(any(grepl("Booster handle is invalid", printed_txt, fixed = TRUE)))
    }
    .check_methods_work <- function(model) {
--- a/build_r.R
+++ b/build_r.R
@ -26,8 +26,8 @@ TEMP_SOURCE_DIR <- file.path(TEMP_R_DIR, "src")
  for (arg in args) {
    if (any(grepl("^\\-j[0-9]+", arg))) {  # nolint: non_portable_path
        out_list[["make_args"]] <- arg
-    } else if (any(grepl("=", arg))) {
+    } else if (any(grepl("=", arg, fixed = TRUE))) {
-      split_arg <- strsplit(arg, "=")[[1L]]
+      split_arg <- strsplit(arg, "=", fixed = TRUE)[[1L]]
      arg_name <- split_arg[[1L]]
      arg_value <- split_arg[[2L]]
      out_list[["keyword_args"]][[arg_name]] <- arg_value
@ -371,6 +371,7 @@ LGB_VERSION <- gsub(
  pattern = "rc"
  , replacement = "-"
  , x = LGB_VERSION
  , fixed = TRUE
 )
 # DESCRIPTION has placeholders for version
@ -381,11 +382,13 @@ description_contents <- gsub(
  pattern = "~~VERSION~~"
  , replacement = LGB_VERSION
  , x = description_contents
  , fixed = TRUE
 )
 description_contents <- gsub(
  pattern = "~~DATE~~"
  , replacement = as.character(Sys.Date())
  , x = description_contents
  , fixed = TRUE
 )
 writeLines(description_contents, DESCRIPTION_FILE)
@ -410,6 +413,7 @@ c_api_contents <- gsub(
  pattern = "LIGHTGBM_C_EXPORT SEXP "
  , replacement = ""
  , x = c_api_contents
  , fixed = TRUE
 )
 c_api_symbols <- gsub(
  pattern = "\\(.*"
@ -435,13 +439,15 @@ if (isTRUE(SKIP_VIGNETTES)) {
 # Install the package
 version <- gsub(
-  "Version: ",
+  pattern = "Version: ",
-  "",
+  replacement = "",
-  grep(
+  x = grep(
-    "Version: "
+    pattern = "Version: "
-    , readLines(con = file.path(TEMP_R_DIR, "DESCRIPTION"))
+    , x = readLines(con = file.path(TEMP_R_DIR, "DESCRIPTION"))
    , value = TRUE
    , fixed = TRUE
  )
  , fixed = TRUE
 )
 tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version))