зеркало из https://github.com/microsoft/LightGBM.git
[R-package] discourage use of regex for fixed string comparisons (#5685)
This commit is contained in:
Родитель
3ae0484a55
Коммит
9954bc4231
|
@ -42,6 +42,7 @@ LINTERS_TO_USE <- list(
|
||||||
, "implicit_integers" = lintr::implicit_integer_linter()
|
, "implicit_integers" = lintr::implicit_integer_linter()
|
||||||
, "infix_spaces" = lintr::infix_spaces_linter()
|
, "infix_spaces" = lintr::infix_spaces_linter()
|
||||||
, "inner_combine" = lintr::inner_combine_linter()
|
, "inner_combine" = lintr::inner_combine_linter()
|
||||||
|
, "fixed_regex" = lintr::fixed_regex_linter()
|
||||||
, "literal_coercion" = lintr::literal_coercion_linter()
|
, "literal_coercion" = lintr::literal_coercion_linter()
|
||||||
, "long_lines" = lintr::line_length_linter(length = 120L)
|
, "long_lines" = lintr::line_length_linter(length = 120L)
|
||||||
, "missing_argument" = lintr::missing_argument_linter()
|
, "missing_argument" = lintr::missing_argument_linter()
|
||||||
|
|
|
@ -25,7 +25,7 @@ lgb.params2str <- function(params) {
|
||||||
stop("params must be a list")
|
stop("params must be a list")
|
||||||
}
|
}
|
||||||
|
|
||||||
names(params) <- gsub("\\.", "_", names(params))
|
names(params) <- gsub(".", "_", names(params), fixed = TRUE)
|
||||||
param_names <- names(params)
|
param_names <- names(params)
|
||||||
ret <- list()
|
ret <- list()
|
||||||
|
|
||||||
|
|
|
@ -81,9 +81,9 @@ end_of_table <- empty_lines[empty_lines > start_index][1L]
|
||||||
|
|
||||||
# Read the contents of the table
|
# Read the contents of the table
|
||||||
exported_symbols <- objdump_results[(start_index + 1L):end_of_table]
|
exported_symbols <- objdump_results[(start_index + 1L):end_of_table]
|
||||||
exported_symbols <- gsub("\t", "", exported_symbols)
|
exported_symbols <- gsub("\t", "", exported_symbols, fixed = TRUE)
|
||||||
exported_symbols <- gsub(".*\\] ", "", exported_symbols)
|
exported_symbols <- gsub(".*\\] ", "", exported_symbols)
|
||||||
exported_symbols <- gsub(" ", "", exported_symbols)
|
exported_symbols <- gsub(" ", "", exported_symbols, fixed = TRUE)
|
||||||
|
|
||||||
# Write R.def file
|
# Write R.def file
|
||||||
writeLines(
|
writeLines(
|
||||||
|
|
|
@ -618,6 +618,7 @@ test_that("lgb.cv() prefers objective in params to keyword argument", {
|
||||||
model_txt_lines <- strsplit(
|
model_txt_lines <- strsplit(
|
||||||
x = bst$save_model_to_string()
|
x = bst$save_model_to_string()
|
||||||
, split = "\n"
|
, split = "\n"
|
||||||
|
, fixed = TRUE
|
||||||
)[[1L]]
|
)[[1L]]
|
||||||
expect_true(any(model_txt_lines == "objective=regression_l1"))
|
expect_true(any(model_txt_lines == "objective=regression_l1"))
|
||||||
expect_false(any(model_txt_lines == "objective=regression_l2"))
|
expect_false(any(model_txt_lines == "objective=regression_l2"))
|
||||||
|
@ -780,6 +781,7 @@ test_that("lgb.train() prefers objective in params to keyword argument", {
|
||||||
model_txt_lines <- strsplit(
|
model_txt_lines <- strsplit(
|
||||||
x = bst$save_model_to_string()
|
x = bst$save_model_to_string()
|
||||||
, split = "\n"
|
, split = "\n"
|
||||||
|
, fixed = TRUE
|
||||||
)[[1L]]
|
)[[1L]]
|
||||||
expect_true(any(model_txt_lines == "objective=regression_l1"))
|
expect_true(any(model_txt_lines == "objective=regression_l1"))
|
||||||
expect_false(any(model_txt_lines == "objective=regression_l2"))
|
expect_false(any(model_txt_lines == "objective=regression_l2"))
|
||||||
|
@ -2296,8 +2298,8 @@ test_that("lgb.cv() respects changes to logging verbosity", {
|
||||||
, verbose = 1L
|
, verbose = 1L
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
expect_true(any(grepl("\\[LightGBM\\] \\[Info\\]", lgb_cv_logs)))
|
expect_true(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
|
||||||
expect_true(any(grepl("\\[LightGBM\\] \\[Warning\\]", lgb_cv_logs)))
|
expect_true(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))
|
||||||
|
|
||||||
# (verbose = 0) should be WARNING level logs only
|
# (verbose = 0) should be WARNING level logs only
|
||||||
lgb_cv_logs <- capture.output({
|
lgb_cv_logs <- capture.output({
|
||||||
|
@ -2310,8 +2312,8 @@ test_that("lgb.cv() respects changes to logging verbosity", {
|
||||||
, verbose = 0L
|
, verbose = 0L
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
expect_false(any(grepl("\\[LightGBM\\] \\[Info\\]", lgb_cv_logs)))
|
expect_false(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
|
||||||
expect_true(any(grepl("\\[LightGBM\\] \\[Warning\\]", lgb_cv_logs)))
|
expect_true(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))
|
||||||
|
|
||||||
# (verbose = -1) no logs
|
# (verbose = -1) no logs
|
||||||
lgb_cv_logs <- capture.output({
|
lgb_cv_logs <- capture.output({
|
||||||
|
@ -2326,8 +2328,8 @@ test_that("lgb.cv() respects changes to logging verbosity", {
|
||||||
})
|
})
|
||||||
# NOTE: this is not length(lgb_cv_logs) == 0 because lightgbm's
|
# NOTE: this is not length(lgb_cv_logs) == 0 because lightgbm's
|
||||||
# dependencies might print other messages
|
# dependencies might print other messages
|
||||||
expect_false(any(grepl("\\[LightGBM\\] \\[Info\\]", lgb_cv_logs)))
|
expect_false(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
|
||||||
expect_false(any(grepl("\\[LightGBM\\] \\[Warning\\]", lgb_cv_logs)))
|
expect_false(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("lgb.cv() updates params based on keyword arguments", {
|
test_that("lgb.cv() updates params based on keyword arguments", {
|
||||||
|
@ -2918,6 +2920,7 @@ test_that("lightgbm() accepts objective as function argument and under params",
|
||||||
model_txt_lines <- strsplit(
|
model_txt_lines <- strsplit(
|
||||||
x = bst1$save_model_to_string()
|
x = bst1$save_model_to_string()
|
||||||
, split = "\n"
|
, split = "\n"
|
||||||
|
, fixed = TRUE
|
||||||
)[[1L]]
|
)[[1L]]
|
||||||
expect_true(any(model_txt_lines == "objective=regression_l1"))
|
expect_true(any(model_txt_lines == "objective=regression_l1"))
|
||||||
expect_false(any(model_txt_lines == "objective=regression_l2"))
|
expect_false(any(model_txt_lines == "objective=regression_l2"))
|
||||||
|
@ -2933,6 +2936,7 @@ test_that("lightgbm() accepts objective as function argument and under params",
|
||||||
model_txt_lines <- strsplit(
|
model_txt_lines <- strsplit(
|
||||||
x = bst2$save_model_to_string()
|
x = bst2$save_model_to_string()
|
||||||
, split = "\n"
|
, split = "\n"
|
||||||
|
, fixed = TRUE
|
||||||
)[[1L]]
|
)[[1L]]
|
||||||
expect_true(any(model_txt_lines == "objective=regression_l1"))
|
expect_true(any(model_txt_lines == "objective=regression_l1"))
|
||||||
expect_false(any(model_txt_lines == "objective=regression_l2"))
|
expect_false(any(model_txt_lines == "objective=regression_l2"))
|
||||||
|
@ -2951,6 +2955,7 @@ test_that("lightgbm() prioritizes objective under params over objective as funct
|
||||||
model_txt_lines <- strsplit(
|
model_txt_lines <- strsplit(
|
||||||
x = bst1$save_model_to_string()
|
x = bst1$save_model_to_string()
|
||||||
, split = "\n"
|
, split = "\n"
|
||||||
|
, fixed = TRUE
|
||||||
)[[1L]]
|
)[[1L]]
|
||||||
expect_true(any(model_txt_lines == "objective=regression_l1"))
|
expect_true(any(model_txt_lines == "objective=regression_l1"))
|
||||||
expect_false(any(model_txt_lines == "objective=regression_l2"))
|
expect_false(any(model_txt_lines == "objective=regression_l2"))
|
||||||
|
@ -2967,6 +2972,7 @@ test_that("lightgbm() prioritizes objective under params over objective as funct
|
||||||
model_txt_lines <- strsplit(
|
model_txt_lines <- strsplit(
|
||||||
x = bst2$save_model_to_string()
|
x = bst2$save_model_to_string()
|
||||||
, split = "\n"
|
, split = "\n"
|
||||||
|
, fixed = TRUE
|
||||||
)[[1L]]
|
)[[1L]]
|
||||||
expect_true(any(model_txt_lines == "objective=regression_l1"))
|
expect_true(any(model_txt_lines == "objective=regression_l1"))
|
||||||
expect_false(any(model_txt_lines == "objective=regression_l2"))
|
expect_false(any(model_txt_lines == "objective=regression_l2"))
|
||||||
|
@ -3006,6 +3012,7 @@ test_that("lightgbm() defaults to 'regression' objective if objective not otherw
|
||||||
model_txt_lines <- strsplit(
|
model_txt_lines <- strsplit(
|
||||||
x = bst$save_model_to_string()
|
x = bst$save_model_to_string()
|
||||||
, split = "\n"
|
, split = "\n"
|
||||||
|
, fixed = TRUE
|
||||||
)[[1L]]
|
)[[1L]]
|
||||||
expect_true(any(model_txt_lines == "objective=regression"))
|
expect_true(any(model_txt_lines == "objective=regression"))
|
||||||
expect_false(any(model_txt_lines == "objective=regression_l1"))
|
expect_false(any(model_txt_lines == "objective=regression_l1"))
|
||||||
|
@ -3023,8 +3030,9 @@ test_that("lightgbm() accepts 'num_threads' as either top-level argument or unde
|
||||||
model_txt_lines <- strsplit(
|
model_txt_lines <- strsplit(
|
||||||
x = bst$save_model_to_string()
|
x = bst$save_model_to_string()
|
||||||
, split = "\n"
|
, split = "\n"
|
||||||
|
, fixed = TRUE
|
||||||
)[[1L]]
|
)[[1L]]
|
||||||
expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines)))
|
expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))
|
||||||
|
|
||||||
bst <- lightgbm(
|
bst <- lightgbm(
|
||||||
data = train$data
|
data = train$data
|
||||||
|
@ -3037,8 +3045,9 @@ test_that("lightgbm() accepts 'num_threads' as either top-level argument or unde
|
||||||
model_txt_lines <- strsplit(
|
model_txt_lines <- strsplit(
|
||||||
x = bst$save_model_to_string()
|
x = bst$save_model_to_string()
|
||||||
, split = "\n"
|
, split = "\n"
|
||||||
|
, fixed = TRUE
|
||||||
)[[1L]]
|
)[[1L]]
|
||||||
expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines)))
|
expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))
|
||||||
|
|
||||||
bst <- lightgbm(
|
bst <- lightgbm(
|
||||||
data = train$data
|
data = train$data
|
||||||
|
@ -3052,8 +3061,9 @@ test_that("lightgbm() accepts 'num_threads' as either top-level argument or unde
|
||||||
model_txt_lines <- strsplit(
|
model_txt_lines <- strsplit(
|
||||||
x = bst$save_model_to_string()
|
x = bst$save_model_to_string()
|
||||||
, split = "\n"
|
, split = "\n"
|
||||||
|
, fixed = TRUE
|
||||||
)[[1L]]
|
)[[1L]]
|
||||||
expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines)))
|
expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("lightgbm() accepts 'weight' and 'weights'", {
|
test_that("lightgbm() accepts 'weight' and 'weights'", {
|
||||||
|
@ -3090,19 +3100,19 @@ test_that("lightgbm() accepts 'weight' and 'weights'", {
|
||||||
|
|
||||||
.assert_has_expected_logs <- function(log_txt, lgb_info, lgb_warn, early_stopping, valid_eval_msg) {
|
.assert_has_expected_logs <- function(log_txt, lgb_info, lgb_warn, early_stopping, valid_eval_msg) {
|
||||||
expect_identical(
|
expect_identical(
|
||||||
object = any(grepl("\\[LightGBM\\] \\[Info\\]", log_txt))
|
object = any(grepl("[LightGBM] [Info]", log_txt, fixed = TRUE))
|
||||||
, expected = lgb_info
|
, expected = lgb_info
|
||||||
)
|
)
|
||||||
expect_identical(
|
expect_identical(
|
||||||
object = any(grepl("\\[LightGBM\\] \\[Warning\\]", log_txt))
|
object = any(grepl("[LightGBM] [Warning]", log_txt, fixed = TRUE))
|
||||||
, expected = lgb_warn
|
, expected = lgb_warn
|
||||||
)
|
)
|
||||||
expect_identical(
|
expect_identical(
|
||||||
object = any(grepl("Will train until there is no improvement in 5 rounds", log_txt))
|
object = any(grepl("Will train until there is no improvement in 5 rounds", log_txt, fixed = TRUE))
|
||||||
, expected = early_stopping
|
, expected = early_stopping
|
||||||
)
|
)
|
||||||
expect_identical(
|
expect_identical(
|
||||||
object = any(grepl("Did not meet early stopping", log_txt))
|
object = any(grepl("Did not meet early stopping", log_txt, fixed = TRUE))
|
||||||
, expected = early_stopping
|
, expected = early_stopping
|
||||||
)
|
)
|
||||||
expect_identical(
|
expect_identical(
|
||||||
|
|
|
@ -19,7 +19,7 @@ test_that("learning-to-rank with lgb.train() works as expected", {
|
||||||
, group = rep(150L, 40L)
|
, group = rep(150L, 40L)
|
||||||
)
|
)
|
||||||
ndcg_at <- "1,2,3"
|
ndcg_at <- "1,2,3"
|
||||||
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
|
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",", fixed = TRUE)[[1L]])
|
||||||
params <- list(
|
params <- list(
|
||||||
objective = "lambdarank"
|
objective = "lambdarank"
|
||||||
, metric = "ndcg"
|
, metric = "ndcg"
|
||||||
|
@ -81,7 +81,7 @@ test_that("learning-to-rank with lgb.cv() works as expected", {
|
||||||
, group = rep(150L, 40L)
|
, group = rep(150L, 40L)
|
||||||
)
|
)
|
||||||
ndcg_at <- "1,2,3"
|
ndcg_at <- "1,2,3"
|
||||||
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
|
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",", fixed = TRUE)[[1L]])
|
||||||
params <- list(
|
params <- list(
|
||||||
objective = "lambdarank"
|
objective = "lambdarank"
|
||||||
, metric = "ndcg"
|
, metric = "ndcg"
|
||||||
|
|
|
@ -705,7 +705,7 @@ test_that("Saving a model with different feature importance types works", {
|
||||||
expect_true(lgb.is.Booster(bst))
|
expect_true(lgb.is.Booster(bst))
|
||||||
|
|
||||||
.feat_importance_from_string <- function(model_string) {
|
.feat_importance_from_string <- function(model_string) {
|
||||||
file_lines <- strsplit(model_string, "\n")[[1L]]
|
file_lines <- strsplit(model_string, "\n", fixed = TRUE)[[1L]]
|
||||||
start_indx <- which(grepl("^feature_importances\\:$", file_lines)) + 1L
|
start_indx <- which(grepl("^feature_importances\\:$", file_lines)) + 1L
|
||||||
blank_line_indices <- which(file_lines == "")
|
blank_line_indices <- which(file_lines == "")
|
||||||
end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L
|
end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L
|
||||||
|
@ -771,7 +771,7 @@ test_that("Saving a model with unknown importance type fails", {
|
||||||
|
|
||||||
|
|
||||||
.params_from_model_string <- function(model_str) {
|
.params_from_model_string <- function(model_str) {
|
||||||
file_lines <- strsplit(model_str, "\n")[[1L]]
|
file_lines <- strsplit(model_str, "\n", fixed = TRUE)[[1L]]
|
||||||
start_indx <- which(grepl("^parameters\\:$", file_lines)) + 1L
|
start_indx <- which(grepl("^parameters\\:$", file_lines)) + 1L
|
||||||
blank_line_indices <- which(file_lines == "")
|
blank_line_indices <- which(file_lines == "")
|
||||||
end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L
|
end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L
|
||||||
|
@ -1286,7 +1286,7 @@ test_that("Booster's print, show, and summary work correctly", {
|
||||||
|
|
||||||
.has_expected_content_for_finalized_model <- function(printed_txt) {
|
.has_expected_content_for_finalized_model <- function(printed_txt) {
|
||||||
expect_true(any(grepl("^LightGBM Model$", printed_txt)))
|
expect_true(any(grepl("^LightGBM Model$", printed_txt)))
|
||||||
expect_true(any(grepl("Booster handle is invalid", printed_txt)))
|
expect_true(any(grepl("Booster handle is invalid", printed_txt, fixed = TRUE)))
|
||||||
}
|
}
|
||||||
|
|
||||||
.check_methods_work <- function(model) {
|
.check_methods_work <- function(model) {
|
||||||
|
|
20
build_r.R
20
build_r.R
|
@ -26,8 +26,8 @@ TEMP_SOURCE_DIR <- file.path(TEMP_R_DIR, "src")
|
||||||
for (arg in args) {
|
for (arg in args) {
|
||||||
if (any(grepl("^\\-j[0-9]+", arg))) { # nolint: non_portable_path
|
if (any(grepl("^\\-j[0-9]+", arg))) { # nolint: non_portable_path
|
||||||
out_list[["make_args"]] <- arg
|
out_list[["make_args"]] <- arg
|
||||||
} else if (any(grepl("=", arg))) {
|
} else if (any(grepl("=", arg, fixed = TRUE))) {
|
||||||
split_arg <- strsplit(arg, "=")[[1L]]
|
split_arg <- strsplit(arg, "=", fixed = TRUE)[[1L]]
|
||||||
arg_name <- split_arg[[1L]]
|
arg_name <- split_arg[[1L]]
|
||||||
arg_value <- split_arg[[2L]]
|
arg_value <- split_arg[[2L]]
|
||||||
out_list[["keyword_args"]][[arg_name]] <- arg_value
|
out_list[["keyword_args"]][[arg_name]] <- arg_value
|
||||||
|
@ -371,6 +371,7 @@ LGB_VERSION <- gsub(
|
||||||
pattern = "rc"
|
pattern = "rc"
|
||||||
, replacement = "-"
|
, replacement = "-"
|
||||||
, x = LGB_VERSION
|
, x = LGB_VERSION
|
||||||
|
, fixed = TRUE
|
||||||
)
|
)
|
||||||
|
|
||||||
# DESCRIPTION has placeholders for version
|
# DESCRIPTION has placeholders for version
|
||||||
|
@ -381,11 +382,13 @@ description_contents <- gsub(
|
||||||
pattern = "~~VERSION~~"
|
pattern = "~~VERSION~~"
|
||||||
, replacement = LGB_VERSION
|
, replacement = LGB_VERSION
|
||||||
, x = description_contents
|
, x = description_contents
|
||||||
|
, fixed = TRUE
|
||||||
)
|
)
|
||||||
description_contents <- gsub(
|
description_contents <- gsub(
|
||||||
pattern = "~~DATE~~"
|
pattern = "~~DATE~~"
|
||||||
, replacement = as.character(Sys.Date())
|
, replacement = as.character(Sys.Date())
|
||||||
, x = description_contents
|
, x = description_contents
|
||||||
|
, fixed = TRUE
|
||||||
)
|
)
|
||||||
writeLines(description_contents, DESCRIPTION_FILE)
|
writeLines(description_contents, DESCRIPTION_FILE)
|
||||||
|
|
||||||
|
@ -410,6 +413,7 @@ c_api_contents <- gsub(
|
||||||
pattern = "LIGHTGBM_C_EXPORT SEXP "
|
pattern = "LIGHTGBM_C_EXPORT SEXP "
|
||||||
, replacement = ""
|
, replacement = ""
|
||||||
, x = c_api_contents
|
, x = c_api_contents
|
||||||
|
, fixed = TRUE
|
||||||
)
|
)
|
||||||
c_api_symbols <- gsub(
|
c_api_symbols <- gsub(
|
||||||
pattern = "\\(.*"
|
pattern = "\\(.*"
|
||||||
|
@ -435,13 +439,15 @@ if (isTRUE(SKIP_VIGNETTES)) {
|
||||||
|
|
||||||
# Install the package
|
# Install the package
|
||||||
version <- gsub(
|
version <- gsub(
|
||||||
"Version: ",
|
pattern = "Version: ",
|
||||||
"",
|
replacement = "",
|
||||||
grep(
|
x = grep(
|
||||||
"Version: "
|
pattern = "Version: "
|
||||||
, readLines(con = file.path(TEMP_R_DIR, "DESCRIPTION"))
|
, x = readLines(con = file.path(TEMP_R_DIR, "DESCRIPTION"))
|
||||||
, value = TRUE
|
, value = TRUE
|
||||||
|
, fixed = TRUE
|
||||||
)
|
)
|
||||||
|
, fixed = TRUE
|
||||||
)
|
)
|
||||||
tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version))
|
tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version))
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче