[R-package] discourage use of regex for fixed string comparisons (#5685)

This commit is contained in:
James Lamb 2023-01-30 23:33:48 -06:00 коммит произвёл GitHub
Родитель 3ae0484a55
Коммит 9954bc4231
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 45 добавлений и 28 удалений

Просмотреть файл

@ -42,6 +42,7 @@ LINTERS_TO_USE <- list(
, "implicit_integers" = lintr::implicit_integer_linter() , "implicit_integers" = lintr::implicit_integer_linter()
, "infix_spaces" = lintr::infix_spaces_linter() , "infix_spaces" = lintr::infix_spaces_linter()
, "inner_combine" = lintr::inner_combine_linter() , "inner_combine" = lintr::inner_combine_linter()
, "fixed_regex" = lintr::fixed_regex_linter()
, "literal_coercion" = lintr::literal_coercion_linter() , "literal_coercion" = lintr::literal_coercion_linter()
, "long_lines" = lintr::line_length_linter(length = 120L) , "long_lines" = lintr::line_length_linter(length = 120L)
, "missing_argument" = lintr::missing_argument_linter() , "missing_argument" = lintr::missing_argument_linter()

Просмотреть файл

@ -25,7 +25,7 @@ lgb.params2str <- function(params) {
stop("params must be a list") stop("params must be a list")
} }
names(params) <- gsub("\\.", "_", names(params)) names(params) <- gsub(".", "_", names(params), fixed = TRUE)
param_names <- names(params) param_names <- names(params)
ret <- list() ret <- list()

Просмотреть файл

@ -81,9 +81,9 @@ end_of_table <- empty_lines[empty_lines > start_index][1L]
# Read the contents of the table # Read the contents of the table
exported_symbols <- objdump_results[(start_index + 1L):end_of_table] exported_symbols <- objdump_results[(start_index + 1L):end_of_table]
exported_symbols <- gsub("\t", "", exported_symbols) exported_symbols <- gsub("\t", "", exported_symbols, fixed = TRUE)
exported_symbols <- gsub(".*\\] ", "", exported_symbols) exported_symbols <- gsub(".*\\] ", "", exported_symbols)
exported_symbols <- gsub(" ", "", exported_symbols) exported_symbols <- gsub(" ", "", exported_symbols, fixed = TRUE)
# Write R.def file # Write R.def file
writeLines( writeLines(

Просмотреть файл

@ -618,6 +618,7 @@ test_that("lgb.cv() prefers objective in params to keyword argument", {
model_txt_lines <- strsplit( model_txt_lines <- strsplit(
x = bst$save_model_to_string() x = bst$save_model_to_string()
, split = "\n" , split = "\n"
, fixed = TRUE
)[[1L]] )[[1L]]
expect_true(any(model_txt_lines == "objective=regression_l1")) expect_true(any(model_txt_lines == "objective=regression_l1"))
expect_false(any(model_txt_lines == "objective=regression_l2")) expect_false(any(model_txt_lines == "objective=regression_l2"))
@ -780,6 +781,7 @@ test_that("lgb.train() prefers objective in params to keyword argument", {
model_txt_lines <- strsplit( model_txt_lines <- strsplit(
x = bst$save_model_to_string() x = bst$save_model_to_string()
, split = "\n" , split = "\n"
, fixed = TRUE
)[[1L]] )[[1L]]
expect_true(any(model_txt_lines == "objective=regression_l1")) expect_true(any(model_txt_lines == "objective=regression_l1"))
expect_false(any(model_txt_lines == "objective=regression_l2")) expect_false(any(model_txt_lines == "objective=regression_l2"))
@ -2296,8 +2298,8 @@ test_that("lgb.cv() respects changes to logging verbosity", {
, verbose = 1L , verbose = 1L
) )
}) })
expect_true(any(grepl("\\[LightGBM\\] \\[Info\\]", lgb_cv_logs))) expect_true(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
expect_true(any(grepl("\\[LightGBM\\] \\[Warning\\]", lgb_cv_logs))) expect_true(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))
# (verbose = 0) should be WARNING level logs only # (verbose = 0) should be WARNING level logs only
lgb_cv_logs <- capture.output({ lgb_cv_logs <- capture.output({
@ -2310,8 +2312,8 @@ test_that("lgb.cv() respects changes to logging verbosity", {
, verbose = 0L , verbose = 0L
) )
}) })
expect_false(any(grepl("\\[LightGBM\\] \\[Info\\]", lgb_cv_logs))) expect_false(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
expect_true(any(grepl("\\[LightGBM\\] \\[Warning\\]", lgb_cv_logs))) expect_true(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))
# (verbose = -1) no logs # (verbose = -1) no logs
lgb_cv_logs <- capture.output({ lgb_cv_logs <- capture.output({
@ -2326,8 +2328,8 @@ test_that("lgb.cv() respects changes to logging verbosity", {
}) })
# NOTE: this is not length(lgb_cv_logs) == 0 because lightgbm's # NOTE: this is not length(lgb_cv_logs) == 0 because lightgbm's
# dependencies might print other messages # dependencies might print other messages
expect_false(any(grepl("\\[LightGBM\\] \\[Info\\]", lgb_cv_logs))) expect_false(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
expect_false(any(grepl("\\[LightGBM\\] \\[Warning\\]", lgb_cv_logs))) expect_false(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))
}) })
test_that("lgb.cv() updates params based on keyword arguments", { test_that("lgb.cv() updates params based on keyword arguments", {
@ -2918,6 +2920,7 @@ test_that("lightgbm() accepts objective as function argument and under params",
model_txt_lines <- strsplit( model_txt_lines <- strsplit(
x = bst1$save_model_to_string() x = bst1$save_model_to_string()
, split = "\n" , split = "\n"
, fixed = TRUE
)[[1L]] )[[1L]]
expect_true(any(model_txt_lines == "objective=regression_l1")) expect_true(any(model_txt_lines == "objective=regression_l1"))
expect_false(any(model_txt_lines == "objective=regression_l2")) expect_false(any(model_txt_lines == "objective=regression_l2"))
@ -2933,6 +2936,7 @@ test_that("lightgbm() accepts objective as function argument and under params",
model_txt_lines <- strsplit( model_txt_lines <- strsplit(
x = bst2$save_model_to_string() x = bst2$save_model_to_string()
, split = "\n" , split = "\n"
, fixed = TRUE
)[[1L]] )[[1L]]
expect_true(any(model_txt_lines == "objective=regression_l1")) expect_true(any(model_txt_lines == "objective=regression_l1"))
expect_false(any(model_txt_lines == "objective=regression_l2")) expect_false(any(model_txt_lines == "objective=regression_l2"))
@ -2951,6 +2955,7 @@ test_that("lightgbm() prioritizes objective under params over objective as funct
model_txt_lines <- strsplit( model_txt_lines <- strsplit(
x = bst1$save_model_to_string() x = bst1$save_model_to_string()
, split = "\n" , split = "\n"
, fixed = TRUE
)[[1L]] )[[1L]]
expect_true(any(model_txt_lines == "objective=regression_l1")) expect_true(any(model_txt_lines == "objective=regression_l1"))
expect_false(any(model_txt_lines == "objective=regression_l2")) expect_false(any(model_txt_lines == "objective=regression_l2"))
@ -2967,6 +2972,7 @@ test_that("lightgbm() prioritizes objective under params over objective as funct
model_txt_lines <- strsplit( model_txt_lines <- strsplit(
x = bst2$save_model_to_string() x = bst2$save_model_to_string()
, split = "\n" , split = "\n"
, fixed = TRUE
)[[1L]] )[[1L]]
expect_true(any(model_txt_lines == "objective=regression_l1")) expect_true(any(model_txt_lines == "objective=regression_l1"))
expect_false(any(model_txt_lines == "objective=regression_l2")) expect_false(any(model_txt_lines == "objective=regression_l2"))
@ -3006,6 +3012,7 @@ test_that("lightgbm() defaults to 'regression' objective if objective not otherw
model_txt_lines <- strsplit( model_txt_lines <- strsplit(
x = bst$save_model_to_string() x = bst$save_model_to_string()
, split = "\n" , split = "\n"
, fixed = TRUE
)[[1L]] )[[1L]]
expect_true(any(model_txt_lines == "objective=regression")) expect_true(any(model_txt_lines == "objective=regression"))
expect_false(any(model_txt_lines == "objective=regression_l1")) expect_false(any(model_txt_lines == "objective=regression_l1"))
@ -3023,8 +3030,9 @@ test_that("lightgbm() accepts 'num_threads' as either top-level argument or unde
model_txt_lines <- strsplit( model_txt_lines <- strsplit(
x = bst$save_model_to_string() x = bst$save_model_to_string()
, split = "\n" , split = "\n"
, fixed = TRUE
)[[1L]] )[[1L]]
expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines))) expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))
bst <- lightgbm( bst <- lightgbm(
data = train$data data = train$data
@ -3037,8 +3045,9 @@ test_that("lightgbm() accepts 'num_threads' as either top-level argument or unde
model_txt_lines <- strsplit( model_txt_lines <- strsplit(
x = bst$save_model_to_string() x = bst$save_model_to_string()
, split = "\n" , split = "\n"
, fixed = TRUE
)[[1L]] )[[1L]]
expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines))) expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))
bst <- lightgbm( bst <- lightgbm(
data = train$data data = train$data
@ -3052,8 +3061,9 @@ test_that("lightgbm() accepts 'num_threads' as either top-level argument or unde
model_txt_lines <- strsplit( model_txt_lines <- strsplit(
x = bst$save_model_to_string() x = bst$save_model_to_string()
, split = "\n" , split = "\n"
, fixed = TRUE
)[[1L]] )[[1L]]
expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines))) expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))
}) })
test_that("lightgbm() accepts 'weight' and 'weights'", { test_that("lightgbm() accepts 'weight' and 'weights'", {
@ -3090,19 +3100,19 @@ test_that("lightgbm() accepts 'weight' and 'weights'", {
.assert_has_expected_logs <- function(log_txt, lgb_info, lgb_warn, early_stopping, valid_eval_msg) { .assert_has_expected_logs <- function(log_txt, lgb_info, lgb_warn, early_stopping, valid_eval_msg) {
expect_identical( expect_identical(
object = any(grepl("\\[LightGBM\\] \\[Info\\]", log_txt)) object = any(grepl("[LightGBM] [Info]", log_txt, fixed = TRUE))
, expected = lgb_info , expected = lgb_info
) )
expect_identical( expect_identical(
object = any(grepl("\\[LightGBM\\] \\[Warning\\]", log_txt)) object = any(grepl("[LightGBM] [Warning]", log_txt, fixed = TRUE))
, expected = lgb_warn , expected = lgb_warn
) )
expect_identical( expect_identical(
object = any(grepl("Will train until there is no improvement in 5 rounds", log_txt)) object = any(grepl("Will train until there is no improvement in 5 rounds", log_txt, fixed = TRUE))
, expected = early_stopping , expected = early_stopping
) )
expect_identical( expect_identical(
object = any(grepl("Did not meet early stopping", log_txt)) object = any(grepl("Did not meet early stopping", log_txt, fixed = TRUE))
, expected = early_stopping , expected = early_stopping
) )
expect_identical( expect_identical(

Просмотреть файл

@ -19,7 +19,7 @@ test_that("learning-to-rank with lgb.train() works as expected", {
, group = rep(150L, 40L) , group = rep(150L, 40L)
) )
ndcg_at <- "1,2,3" ndcg_at <- "1,2,3"
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]]) eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",", fixed = TRUE)[[1L]])
params <- list( params <- list(
objective = "lambdarank" objective = "lambdarank"
, metric = "ndcg" , metric = "ndcg"
@ -81,7 +81,7 @@ test_that("learning-to-rank with lgb.cv() works as expected", {
, group = rep(150L, 40L) , group = rep(150L, 40L)
) )
ndcg_at <- "1,2,3" ndcg_at <- "1,2,3"
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]]) eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",", fixed = TRUE)[[1L]])
params <- list( params <- list(
objective = "lambdarank" objective = "lambdarank"
, metric = "ndcg" , metric = "ndcg"

Просмотреть файл

@ -705,7 +705,7 @@ test_that("Saving a model with different feature importance types works", {
expect_true(lgb.is.Booster(bst)) expect_true(lgb.is.Booster(bst))
.feat_importance_from_string <- function(model_string) { .feat_importance_from_string <- function(model_string) {
file_lines <- strsplit(model_string, "\n")[[1L]] file_lines <- strsplit(model_string, "\n", fixed = TRUE)[[1L]]
start_indx <- which(grepl("^feature_importances\\:$", file_lines)) + 1L start_indx <- which(grepl("^feature_importances\\:$", file_lines)) + 1L
blank_line_indices <- which(file_lines == "") blank_line_indices <- which(file_lines == "")
end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L
@ -771,7 +771,7 @@ test_that("Saving a model with unknown importance type fails", {
.params_from_model_string <- function(model_str) { .params_from_model_string <- function(model_str) {
file_lines <- strsplit(model_str, "\n")[[1L]] file_lines <- strsplit(model_str, "\n", fixed = TRUE)[[1L]]
start_indx <- which(grepl("^parameters\\:$", file_lines)) + 1L start_indx <- which(grepl("^parameters\\:$", file_lines)) + 1L
blank_line_indices <- which(file_lines == "") blank_line_indices <- which(file_lines == "")
end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L
@ -1286,7 +1286,7 @@ test_that("Booster's print, show, and summary work correctly", {
.has_expected_content_for_finalized_model <- function(printed_txt) { .has_expected_content_for_finalized_model <- function(printed_txt) {
expect_true(any(grepl("^LightGBM Model$", printed_txt))) expect_true(any(grepl("^LightGBM Model$", printed_txt)))
expect_true(any(grepl("Booster handle is invalid", printed_txt))) expect_true(any(grepl("Booster handle is invalid", printed_txt, fixed = TRUE)))
} }
.check_methods_work <- function(model) { .check_methods_work <- function(model) {

Просмотреть файл

@ -26,8 +26,8 @@ TEMP_SOURCE_DIR <- file.path(TEMP_R_DIR, "src")
for (arg in args) { for (arg in args) {
if (any(grepl("^\\-j[0-9]+", arg))) { # nolint: non_portable_path if (any(grepl("^\\-j[0-9]+", arg))) { # nolint: non_portable_path
out_list[["make_args"]] <- arg out_list[["make_args"]] <- arg
} else if (any(grepl("=", arg))) { } else if (any(grepl("=", arg, fixed = TRUE))) {
split_arg <- strsplit(arg, "=")[[1L]] split_arg <- strsplit(arg, "=", fixed = TRUE)[[1L]]
arg_name <- split_arg[[1L]] arg_name <- split_arg[[1L]]
arg_value <- split_arg[[2L]] arg_value <- split_arg[[2L]]
out_list[["keyword_args"]][[arg_name]] <- arg_value out_list[["keyword_args"]][[arg_name]] <- arg_value
@ -371,6 +371,7 @@ LGB_VERSION <- gsub(
pattern = "rc" pattern = "rc"
, replacement = "-" , replacement = "-"
, x = LGB_VERSION , x = LGB_VERSION
, fixed = TRUE
) )
# DESCRIPTION has placeholders for version # DESCRIPTION has placeholders for version
@ -381,11 +382,13 @@ description_contents <- gsub(
pattern = "~~VERSION~~" pattern = "~~VERSION~~"
, replacement = LGB_VERSION , replacement = LGB_VERSION
, x = description_contents , x = description_contents
, fixed = TRUE
) )
description_contents <- gsub( description_contents <- gsub(
pattern = "~~DATE~~" pattern = "~~DATE~~"
, replacement = as.character(Sys.Date()) , replacement = as.character(Sys.Date())
, x = description_contents , x = description_contents
, fixed = TRUE
) )
writeLines(description_contents, DESCRIPTION_FILE) writeLines(description_contents, DESCRIPTION_FILE)
@ -410,6 +413,7 @@ c_api_contents <- gsub(
pattern = "LIGHTGBM_C_EXPORT SEXP " pattern = "LIGHTGBM_C_EXPORT SEXP "
, replacement = "" , replacement = ""
, x = c_api_contents , x = c_api_contents
, fixed = TRUE
) )
c_api_symbols <- gsub( c_api_symbols <- gsub(
pattern = "\\(.*" pattern = "\\(.*"
@ -435,13 +439,15 @@ if (isTRUE(SKIP_VIGNETTES)) {
# Install the package # Install the package
version <- gsub( version <- gsub(
"Version: ", pattern = "Version: ",
"", replacement = "",
grep( x = grep(
"Version: " pattern = "Version: "
, readLines(con = file.path(TEMP_R_DIR, "DESCRIPTION")) , x = readLines(con = file.path(TEMP_R_DIR, "DESCRIPTION"))
, value = TRUE , value = TRUE
, fixed = TRUE
) )
, fixed = TRUE
) )
tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version)) tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version))