[R-package] factored dependency 'magrittr' out of R package (#2334)

This commit is contained in:
James Lamb 2019-09-28 23:05:12 -05:00 коммит произвёл GitHub
Родитель b3c126629e
Коммит 42204c43da
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 119 добавлений и 64 удалений

Просмотреть файл

@ -39,7 +39,6 @@ Imports:
data.table (>= 1.9.6), data.table (>= 1.9.6),
graphics, graphics,
jsonlite (>= 1.0), jsonlite (>= 1.0),
magrittr (>= 1.5),
Matrix (>= 1.1-0), Matrix (>= 1.1-0),
methods methods
RoxygenNote: 6.0.1 RoxygenNote: 6.0.1

6
R-package/NAMESPACE Executable file → Normal file
Просмотреть файл

@ -43,13 +43,11 @@ importFrom(data.table,data.table)
importFrom(data.table,rbindlist) importFrom(data.table,rbindlist)
importFrom(data.table,set) importFrom(data.table,set)
importFrom(data.table,setnames) importFrom(data.table,setnames)
importFrom(data.table,setorder)
importFrom(data.table,setorderv)
importFrom(graphics,barplot) importFrom(graphics,barplot)
importFrom(graphics,par) importFrom(graphics,par)
importFrom(jsonlite,fromJSON) importFrom(jsonlite,fromJSON)
importFrom(magrittr,"%>%")
importFrom(magrittr,"%T>%")
importFrom(magrittr,extract)
importFrom(magrittr,inset)
importFrom(methods,is) importFrom(methods,is)
importFrom(stats,quantile) importFrom(stats,quantile)
useDynLib(lib_lightgbm , .registration = TRUE) useDynLib(lib_lightgbm , .registration = TRUE)

Просмотреть файл

@ -29,8 +29,7 @@
#' tree_imp1 <- lgb.importance(model, percentage = TRUE) #' tree_imp1 <- lgb.importance(model, percentage = TRUE)
#' tree_imp2 <- lgb.importance(model, percentage = FALSE) #' tree_imp2 <- lgb.importance(model, percentage = FALSE)
#' #'
#' @importFrom magrittr %>% %T>% extract #' @importFrom data.table := setnames setorderv
#' @importFrom data.table :=
#' @export #' @export
lgb.importance <- function(model, percentage = TRUE) { lgb.importance <- function(model, percentage = TRUE) {
@ -43,22 +42,33 @@ lgb.importance <- function(model, percentage = TRUE) {
tree_dt <- lgb.model.dt.tree(model) tree_dt <- lgb.model.dt.tree(model)
# Extract elements # Extract elements
tree_imp <- tree_dt %>% tree_imp_dt <- tree_dt[
magrittr::extract(., !is.na(split_index)
i = ! is.na(split_index), , .(Gain = sum(split_gain), Cover = sum(internal_count), Frequency = .N)
j = .(Gain = sum(split_gain), Cover = sum(internal_count), Frequency = .N), , by = "split_feature"
by = "split_feature") %T>% ]
data.table::setnames(., old = "split_feature", new = "Feature") %>%
magrittr::extract(., i = order(Gain, decreasing = TRUE)) data.table::setnames(
tree_imp_dt
, old = "split_feature"
, new = "Feature"
)
# Sort features by Gain
data.table::setorderv(
x = tree_imp_dt
, cols = c("Gain")
, order = -1
)
# Check if relative values are requested # Check if relative values are requested
if (percentage) { if (percentage) {
tree_imp[, ":="(Gain = Gain / sum(Gain), tree_imp_dt[, ":="(Gain = Gain / sum(Gain),
Cover = Cover / sum(Cover), Cover = Cover / sum(Cover),
Frequency = Frequency / sum(Frequency))] Frequency = Frequency / sum(Frequency))]
} }
# Return importance table # Return importance table
return(tree_imp) return(tree_imp_dt)
} }

Просмотреть файл

@ -39,7 +39,6 @@
#' tree_interpretation <- lgb.interprete(model, test$data, 1:5) #' tree_interpretation <- lgb.interprete(model, test$data, 1:5)
#' #'
#' @importFrom data.table as.data.table #' @importFrom data.table as.data.table
#' @importFrom magrittr %>% %T>%
#' @export #' @export
lgb.interprete <- function(model, lgb.interprete <- function(model,
data, data,
@ -56,12 +55,18 @@ lgb.interprete <- function(model,
tree_interpretation_dt_list <- vector(mode = "list", length = length(idxset)) tree_interpretation_dt_list <- vector(mode = "list", length = length(idxset))
# Get parsed predictions of data # Get parsed predictions of data
leaf_index_mat_list <- model$predict(data[idxset, , drop = FALSE], pred_mat <- t(
num_iteration = num_iteration, model$predict(
predleaf = TRUE) %>% data[idxset, , drop = FALSE]
t(.) %>% , num_iteration = num_iteration
data.table::as.data.table(.) %>% , predleaf = TRUE
lapply(., FUN = function(x) matrix(x, ncol = num_class, byrow = TRUE)) )
)
leaf_index_dt <- data.table::as.data.table(pred_mat)
leaf_index_mat_list <- lapply(
X = leaf_index_dt
, FUN = function(x) matrix(x, ncol = num_class, byrow = TRUE)
)
# Get list of trees # Get list of trees
tree_index_mat_list <- lapply(leaf_index_mat_list, tree_index_mat_list <- lapply(leaf_index_mat_list,
@ -121,20 +126,39 @@ single.tree.interprete <- function(tree_dt,
} }
#' @importFrom data.table rbindlist #' @importFrom data.table := rbindlist setorder
#' @importFrom magrittr %>% extract
multiple.tree.interprete <- function(tree_dt, multiple.tree.interprete <- function(tree_dt,
tree_index, tree_index,
leaf_index) { leaf_index) {
# Apply each trees # Apply each trees
mapply(single.tree.interprete, interp_dt <- data.table::rbindlist(
tree_id = tree_index, leaf_id = leaf_index, l = mapply(
MoreArgs = list(tree_dt = tree_dt), FUN = single.tree.interprete
SIMPLIFY = FALSE, USE.NAMES = TRUE) %>% , tree_id = tree_index
data.table::rbindlist(., use.names = TRUE) %>% , leaf_id = leaf_index
magrittr::extract(., j = .(Contribution = sum(Contribution)), by = "Feature") %>% , MoreArgs = list(
magrittr::extract(., i = order(abs(Contribution), decreasing = TRUE)) tree_dt = tree_dt
)
, SIMPLIFY = FALSE
, USE.NAMES = TRUE
)
, use.names = TRUE
)
interp_dt <- interp_dt[, .(Contribution = sum(Contribution)), by = "Feature"]
# Sort features in descending order by contribution
interp_dt[, abs_contribution := abs(Contribution)]
data.table::setorder(
x = interp_dt
, -abs_contribution
)
# Drop absolute value of contribution (only needed for sorting)
interp_dt[, abs_contribution := NULL]
return(interp_dt)
} }
@ -147,14 +171,22 @@ single.row.interprete <- function(tree_dt, num_class, tree_index_mat, leaf_index
# Loop throughout each class # Loop throughout each class
for (i in seq_len(num_class)) { for (i in seq_len(num_class)) {
tree_interpretation[[i]] <- multiple.tree.interprete(tree_dt, tree_index_mat[,i], leaf_index_mat[,i]) %T>% { next_interp_dt <- multiple.tree.interprete(
tree_dt = tree_dt
, tree_index = tree_index_mat[,i]
, leaf_index = leaf_index_mat[,i]
)
# Number of classes larger than 1 requires adjustment if (num_class > 1){
if (num_class > 1) { data.table::setnames(
data.table::setnames(., old = "Contribution", new = paste("Class", i - 1)) next_interp_dt
} , old = "Contribution"
, new = paste("Class", i - 1)
)
} }
tree_interpretation[[i]] <- next_interp_dt
} }
# Check for numbe rof classes larger than 1 # Check for numbe rof classes larger than 1

Просмотреть файл

@ -42,7 +42,6 @@
#' #'
#' tree_dt <- lgb.model.dt.tree(model) #' tree_dt <- lgb.model.dt.tree(model)
#' #'
#' @importFrom magrittr %>%
#' @importFrom data.table := data.table rbindlist #' @importFrom data.table := data.table rbindlist
#' @importFrom jsonlite fromJSON #' @importFrom jsonlite fromJSON
#' @export #' @export
@ -64,10 +63,16 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) {
# Combine into single data.table fourth # Combine into single data.table fourth
tree_dt <- data.table::rbindlist(tree_list, use.names = TRUE) tree_dt <- data.table::rbindlist(tree_list, use.names = TRUE)
# Lookup sequence # Substitute feature index with the actual feature name
tree_dt[, split_feature := Lookup(split_feature,
seq.int(from = 0, to = parsed_json_model$max_feature_idx), # Since the index comes from C++ (which is 0-indexed), be sure
parsed_json_model$feature_names)] # to add 1 (e.g. index 28 means the 29th feature in feature_names)
split_feature_indx <- tree_dt[, split_feature] + 1
# Get corresponding feature names. Positions in split_feature_indx
# which are NA will result in an NA feature name
feature_names <- parsed_json_model$feature_names[split_feature_indx]
tree_dt[, split_feature := feature_names]
# Return tree # Return tree
return(tree_dt) return(tree_dt)
@ -159,13 +164,3 @@ single.tree.parse <- function(lgb_tree) {
return(single_tree_dt) return(single_tree_dt)
} }
#' @importFrom magrittr %>% extract inset
Lookup <- function(key, key_lookup, value_lookup, missing = NA) {
# Match key by looked up key
match(key, key_lookup) %>%
magrittr::extract(value_lookup, .) %>%
magrittr::inset(. , is.na(.), missing)
}

Просмотреть файл

@ -60,8 +60,14 @@ lgb.plot.importance <- function(tree_imp,
op <- graphics::par(no.readonly = TRUE) op <- graphics::par(no.readonly = TRUE)
on.exit(graphics::par(op)) on.exit(graphics::par(op))
# Do some magic plotting graphics::par(
graphics::par(mar = op$mar %>% magrittr::inset(., 2, left_margin)) mar = c(
op$mar[1]
, left_margin
, op$mar[3]
, op$mar[4]
)
)
# Do plot # Do plot
tree_imp[.N:1, tree_imp[.N:1,

Просмотреть файл

@ -35,7 +35,6 @@
#' lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10) #' lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10)
#' @importFrom data.table setnames #' @importFrom data.table setnames
#' @importFrom graphics barplot par #' @importFrom graphics barplot par
#' @importFrom magrittr inset
#' @export #' @export
lgb.plot.interpretation <- function(tree_interpretation_dt, lgb.plot.interpretation <- function(tree_interpretation_dt,
top_n = 10, top_n = 10,
@ -51,7 +50,18 @@ lgb.plot.interpretation <- function(tree_interpretation_dt,
on.exit(graphics::par(op)) on.exit(graphics::par(op))
# Do some magic plotting # Do some magic plotting
graphics::par(mar = op$mar %>% magrittr::inset(., 1:3, c(3, left_margin, 2))) bottom_margin <- 3.0
top_margin <- 2.0
right_margin <- op$mar[4]
graphics::par(
mar = c(
bottom_margin
, left_margin
, top_margin
, right_margin
)
)
# Check for number of classes # Check for number of classes
if (num_class == 1) { if (num_class == 1) {
@ -75,12 +85,18 @@ lgb.plot.interpretation <- function(tree_interpretation_dt,
for (i in seq_len(num_class)) { for (i in seq_len(num_class)) {
# Prepare interpretation, perform T, get the names, and plot straight away # Prepare interpretation, perform T, get the names, and plot straight away
tree_interpretation_dt[, c(1, i + 1), with = FALSE] %T>% plot_dt <- tree_interpretation_dt[, c(1, i + 1), with = FALSE]
data.table::setnames(., old = names(.), new = c("Feature", "Contribution")) %>% data.table::setnames(
multiple.tree.plot.interpretation(., # Self plot_dt
top_n = top_n, , old = names(plot_dt)
title = paste("Class", i - 1), , new = c("Feature", "Contribution")
cex = cex) )
multiple.tree.plot.interpretation(
plot_dt
, top_n = top_n
, title = paste("Class", i - 1)
, cex = cex
)
} }
} }
@ -114,6 +130,6 @@ multiple.tree.plot.interpretation <- function(tree_interpretation,
)] )]
# Return invisibly # Return invisibly
invisible(NULL) return(invisible(NULL))
} }

Просмотреть файл

@ -229,7 +229,6 @@ def generate_r_docs(app):
r-devtools=1.13.6=r351h6115d3f_0 \ r-devtools=1.13.6=r351h6115d3f_0 \
r-data.table=1.11.4=r351h96ca727_0 \ r-data.table=1.11.4=r351h96ca727_0 \
r-jsonlite=1.5=r351h96ca727_0 \ r-jsonlite=1.5=r351h96ca727_0 \
r-magrittr=1.5=r351h6115d3f_4 \
r-matrix=1.2_14=r351h96ca727_0 \ r-matrix=1.2_14=r351h96ca727_0 \
r-testthat=2.0.0=r351h29659fb_0 \ r-testthat=2.0.0=r351h29659fb_0 \
cmake=3.14.0=h52cb24c_0 cmake=3.14.0=h52cb24c_0