зеркало из https://github.com/microsoft/LightGBM.git
[R-package] factored dependency 'magrittr' out of R package (#2334)
This commit is contained in:
Родитель
b3c126629e
Коммит
42204c43da
|
@ -39,7 +39,6 @@ Imports:
|
||||||
data.table (>= 1.9.6),
|
data.table (>= 1.9.6),
|
||||||
graphics,
|
graphics,
|
||||||
jsonlite (>= 1.0),
|
jsonlite (>= 1.0),
|
||||||
magrittr (>= 1.5),
|
|
||||||
Matrix (>= 1.1-0),
|
Matrix (>= 1.1-0),
|
||||||
methods
|
methods
|
||||||
RoxygenNote: 6.0.1
|
RoxygenNote: 6.0.1
|
||||||
|
|
|
@ -43,13 +43,11 @@ importFrom(data.table,data.table)
|
||||||
importFrom(data.table,rbindlist)
|
importFrom(data.table,rbindlist)
|
||||||
importFrom(data.table,set)
|
importFrom(data.table,set)
|
||||||
importFrom(data.table,setnames)
|
importFrom(data.table,setnames)
|
||||||
|
importFrom(data.table,setorder)
|
||||||
|
importFrom(data.table,setorderv)
|
||||||
importFrom(graphics,barplot)
|
importFrom(graphics,barplot)
|
||||||
importFrom(graphics,par)
|
importFrom(graphics,par)
|
||||||
importFrom(jsonlite,fromJSON)
|
importFrom(jsonlite,fromJSON)
|
||||||
importFrom(magrittr,"%>%")
|
|
||||||
importFrom(magrittr,"%T>%")
|
|
||||||
importFrom(magrittr,extract)
|
|
||||||
importFrom(magrittr,inset)
|
|
||||||
importFrom(methods,is)
|
importFrom(methods,is)
|
||||||
importFrom(stats,quantile)
|
importFrom(stats,quantile)
|
||||||
useDynLib(lib_lightgbm , .registration = TRUE)
|
useDynLib(lib_lightgbm , .registration = TRUE)
|
||||||
|
|
|
@ -29,8 +29,7 @@
|
||||||
#' tree_imp1 <- lgb.importance(model, percentage = TRUE)
|
#' tree_imp1 <- lgb.importance(model, percentage = TRUE)
|
||||||
#' tree_imp2 <- lgb.importance(model, percentage = FALSE)
|
#' tree_imp2 <- lgb.importance(model, percentage = FALSE)
|
||||||
#'
|
#'
|
||||||
#' @importFrom magrittr %>% %T>% extract
|
#' @importFrom data.table := setnames setorderv
|
||||||
#' @importFrom data.table :=
|
|
||||||
#' @export
|
#' @export
|
||||||
lgb.importance <- function(model, percentage = TRUE) {
|
lgb.importance <- function(model, percentage = TRUE) {
|
||||||
|
|
||||||
|
@ -43,22 +42,33 @@ lgb.importance <- function(model, percentage = TRUE) {
|
||||||
tree_dt <- lgb.model.dt.tree(model)
|
tree_dt <- lgb.model.dt.tree(model)
|
||||||
|
|
||||||
# Extract elements
|
# Extract elements
|
||||||
tree_imp <- tree_dt %>%
|
tree_imp_dt <- tree_dt[
|
||||||
magrittr::extract(.,
|
!is.na(split_index)
|
||||||
i = ! is.na(split_index),
|
, .(Gain = sum(split_gain), Cover = sum(internal_count), Frequency = .N)
|
||||||
j = .(Gain = sum(split_gain), Cover = sum(internal_count), Frequency = .N),
|
, by = "split_feature"
|
||||||
by = "split_feature") %T>%
|
]
|
||||||
data.table::setnames(., old = "split_feature", new = "Feature") %>%
|
|
||||||
magrittr::extract(., i = order(Gain, decreasing = TRUE))
|
data.table::setnames(
|
||||||
|
tree_imp_dt
|
||||||
|
, old = "split_feature"
|
||||||
|
, new = "Feature"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Sort features by Gain
|
||||||
|
data.table::setorderv(
|
||||||
|
x = tree_imp_dt
|
||||||
|
, cols = c("Gain")
|
||||||
|
, order = -1
|
||||||
|
)
|
||||||
|
|
||||||
# Check if relative values are requested
|
# Check if relative values are requested
|
||||||
if (percentage) {
|
if (percentage) {
|
||||||
tree_imp[, ":="(Gain = Gain / sum(Gain),
|
tree_imp_dt[, ":="(Gain = Gain / sum(Gain),
|
||||||
Cover = Cover / sum(Cover),
|
Cover = Cover / sum(Cover),
|
||||||
Frequency = Frequency / sum(Frequency))]
|
Frequency = Frequency / sum(Frequency))]
|
||||||
}
|
}
|
||||||
|
|
||||||
# Return importance table
|
# Return importance table
|
||||||
return(tree_imp)
|
return(tree_imp_dt)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,6 @@
|
||||||
#' tree_interpretation <- lgb.interprete(model, test$data, 1:5)
|
#' tree_interpretation <- lgb.interprete(model, test$data, 1:5)
|
||||||
#'
|
#'
|
||||||
#' @importFrom data.table as.data.table
|
#' @importFrom data.table as.data.table
|
||||||
#' @importFrom magrittr %>% %T>%
|
|
||||||
#' @export
|
#' @export
|
||||||
lgb.interprete <- function(model,
|
lgb.interprete <- function(model,
|
||||||
data,
|
data,
|
||||||
|
@ -56,12 +55,18 @@ lgb.interprete <- function(model,
|
||||||
tree_interpretation_dt_list <- vector(mode = "list", length = length(idxset))
|
tree_interpretation_dt_list <- vector(mode = "list", length = length(idxset))
|
||||||
|
|
||||||
# Get parsed predictions of data
|
# Get parsed predictions of data
|
||||||
leaf_index_mat_list <- model$predict(data[idxset, , drop = FALSE],
|
pred_mat <- t(
|
||||||
num_iteration = num_iteration,
|
model$predict(
|
||||||
predleaf = TRUE) %>%
|
data[idxset, , drop = FALSE]
|
||||||
t(.) %>%
|
, num_iteration = num_iteration
|
||||||
data.table::as.data.table(.) %>%
|
, predleaf = TRUE
|
||||||
lapply(., FUN = function(x) matrix(x, ncol = num_class, byrow = TRUE))
|
)
|
||||||
|
)
|
||||||
|
leaf_index_dt <- data.table::as.data.table(pred_mat)
|
||||||
|
leaf_index_mat_list <- lapply(
|
||||||
|
X = leaf_index_dt
|
||||||
|
, FUN = function(x) matrix(x, ncol = num_class, byrow = TRUE)
|
||||||
|
)
|
||||||
|
|
||||||
# Get list of trees
|
# Get list of trees
|
||||||
tree_index_mat_list <- lapply(leaf_index_mat_list,
|
tree_index_mat_list <- lapply(leaf_index_mat_list,
|
||||||
|
@ -121,20 +126,39 @@ single.tree.interprete <- function(tree_dt,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#' @importFrom data.table rbindlist
|
#' @importFrom data.table := rbindlist setorder
|
||||||
#' @importFrom magrittr %>% extract
|
|
||||||
multiple.tree.interprete <- function(tree_dt,
|
multiple.tree.interprete <- function(tree_dt,
|
||||||
tree_index,
|
tree_index,
|
||||||
leaf_index) {
|
leaf_index) {
|
||||||
|
|
||||||
# Apply each trees
|
# Apply each trees
|
||||||
mapply(single.tree.interprete,
|
interp_dt <- data.table::rbindlist(
|
||||||
tree_id = tree_index, leaf_id = leaf_index,
|
l = mapply(
|
||||||
MoreArgs = list(tree_dt = tree_dt),
|
FUN = single.tree.interprete
|
||||||
SIMPLIFY = FALSE, USE.NAMES = TRUE) %>%
|
, tree_id = tree_index
|
||||||
data.table::rbindlist(., use.names = TRUE) %>%
|
, leaf_id = leaf_index
|
||||||
magrittr::extract(., j = .(Contribution = sum(Contribution)), by = "Feature") %>%
|
, MoreArgs = list(
|
||||||
magrittr::extract(., i = order(abs(Contribution), decreasing = TRUE))
|
tree_dt = tree_dt
|
||||||
|
)
|
||||||
|
, SIMPLIFY = FALSE
|
||||||
|
, USE.NAMES = TRUE
|
||||||
|
)
|
||||||
|
, use.names = TRUE
|
||||||
|
)
|
||||||
|
|
||||||
|
interp_dt <- interp_dt[, .(Contribution = sum(Contribution)), by = "Feature"]
|
||||||
|
|
||||||
|
# Sort features in descending order by contribution
|
||||||
|
interp_dt[, abs_contribution := abs(Contribution)]
|
||||||
|
data.table::setorder(
|
||||||
|
x = interp_dt
|
||||||
|
, -abs_contribution
|
||||||
|
)
|
||||||
|
|
||||||
|
# Drop absolute value of contribution (only needed for sorting)
|
||||||
|
interp_dt[, abs_contribution := NULL]
|
||||||
|
|
||||||
|
return(interp_dt)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,14 +171,22 @@ single.row.interprete <- function(tree_dt, num_class, tree_index_mat, leaf_index
|
||||||
# Loop throughout each class
|
# Loop throughout each class
|
||||||
for (i in seq_len(num_class)) {
|
for (i in seq_len(num_class)) {
|
||||||
|
|
||||||
tree_interpretation[[i]] <- multiple.tree.interprete(tree_dt, tree_index_mat[,i], leaf_index_mat[,i]) %T>% {
|
next_interp_dt <- multiple.tree.interprete(
|
||||||
|
tree_dt = tree_dt
|
||||||
|
, tree_index = tree_index_mat[,i]
|
||||||
|
, leaf_index = leaf_index_mat[,i]
|
||||||
|
)
|
||||||
|
|
||||||
# Number of classes larger than 1 requires adjustment
|
if (num_class > 1){
|
||||||
if (num_class > 1) {
|
data.table::setnames(
|
||||||
data.table::setnames(., old = "Contribution", new = paste("Class", i - 1))
|
next_interp_dt
|
||||||
}
|
, old = "Contribution"
|
||||||
|
, new = paste("Class", i - 1)
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tree_interpretation[[i]] <- next_interp_dt
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check for numbe rof classes larger than 1
|
# Check for numbe rof classes larger than 1
|
||||||
|
|
|
@ -42,7 +42,6 @@
|
||||||
#'
|
#'
|
||||||
#' tree_dt <- lgb.model.dt.tree(model)
|
#' tree_dt <- lgb.model.dt.tree(model)
|
||||||
#'
|
#'
|
||||||
#' @importFrom magrittr %>%
|
|
||||||
#' @importFrom data.table := data.table rbindlist
|
#' @importFrom data.table := data.table rbindlist
|
||||||
#' @importFrom jsonlite fromJSON
|
#' @importFrom jsonlite fromJSON
|
||||||
#' @export
|
#' @export
|
||||||
|
@ -64,10 +63,16 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) {
|
||||||
# Combine into single data.table fourth
|
# Combine into single data.table fourth
|
||||||
tree_dt <- data.table::rbindlist(tree_list, use.names = TRUE)
|
tree_dt <- data.table::rbindlist(tree_list, use.names = TRUE)
|
||||||
|
|
||||||
# Lookup sequence
|
# Substitute feature index with the actual feature name
|
||||||
tree_dt[, split_feature := Lookup(split_feature,
|
|
||||||
seq.int(from = 0, to = parsed_json_model$max_feature_idx),
|
# Since the index comes from C++ (which is 0-indexed), be sure
|
||||||
parsed_json_model$feature_names)]
|
# to add 1 (e.g. index 28 means the 29th feature in feature_names)
|
||||||
|
split_feature_indx <- tree_dt[, split_feature] + 1
|
||||||
|
|
||||||
|
# Get corresponding feature names. Positions in split_feature_indx
|
||||||
|
# which are NA will result in an NA feature name
|
||||||
|
feature_names <- parsed_json_model$feature_names[split_feature_indx]
|
||||||
|
tree_dt[, split_feature := feature_names]
|
||||||
|
|
||||||
# Return tree
|
# Return tree
|
||||||
return(tree_dt)
|
return(tree_dt)
|
||||||
|
@ -159,13 +164,3 @@ single.tree.parse <- function(lgb_tree) {
|
||||||
return(single_tree_dt)
|
return(single_tree_dt)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#' @importFrom magrittr %>% extract inset
|
|
||||||
Lookup <- function(key, key_lookup, value_lookup, missing = NA) {
|
|
||||||
|
|
||||||
# Match key by looked up key
|
|
||||||
match(key, key_lookup) %>%
|
|
||||||
magrittr::extract(value_lookup, .) %>%
|
|
||||||
magrittr::inset(. , is.na(.), missing)
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
|
@ -60,8 +60,14 @@ lgb.plot.importance <- function(tree_imp,
|
||||||
op <- graphics::par(no.readonly = TRUE)
|
op <- graphics::par(no.readonly = TRUE)
|
||||||
on.exit(graphics::par(op))
|
on.exit(graphics::par(op))
|
||||||
|
|
||||||
# Do some magic plotting
|
graphics::par(
|
||||||
graphics::par(mar = op$mar %>% magrittr::inset(., 2, left_margin))
|
mar = c(
|
||||||
|
op$mar[1]
|
||||||
|
, left_margin
|
||||||
|
, op$mar[3]
|
||||||
|
, op$mar[4]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Do plot
|
# Do plot
|
||||||
tree_imp[.N:1,
|
tree_imp[.N:1,
|
||||||
|
|
|
@ -35,7 +35,6 @@
|
||||||
#' lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10)
|
#' lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10)
|
||||||
#' @importFrom data.table setnames
|
#' @importFrom data.table setnames
|
||||||
#' @importFrom graphics barplot par
|
#' @importFrom graphics barplot par
|
||||||
#' @importFrom magrittr inset
|
|
||||||
#' @export
|
#' @export
|
||||||
lgb.plot.interpretation <- function(tree_interpretation_dt,
|
lgb.plot.interpretation <- function(tree_interpretation_dt,
|
||||||
top_n = 10,
|
top_n = 10,
|
||||||
|
@ -51,7 +50,18 @@ lgb.plot.interpretation <- function(tree_interpretation_dt,
|
||||||
on.exit(graphics::par(op))
|
on.exit(graphics::par(op))
|
||||||
|
|
||||||
# Do some magic plotting
|
# Do some magic plotting
|
||||||
graphics::par(mar = op$mar %>% magrittr::inset(., 1:3, c(3, left_margin, 2)))
|
bottom_margin <- 3.0
|
||||||
|
top_margin <- 2.0
|
||||||
|
right_margin <- op$mar[4]
|
||||||
|
|
||||||
|
graphics::par(
|
||||||
|
mar = c(
|
||||||
|
bottom_margin
|
||||||
|
, left_margin
|
||||||
|
, top_margin
|
||||||
|
, right_margin
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Check for number of classes
|
# Check for number of classes
|
||||||
if (num_class == 1) {
|
if (num_class == 1) {
|
||||||
|
@ -75,12 +85,18 @@ lgb.plot.interpretation <- function(tree_interpretation_dt,
|
||||||
for (i in seq_len(num_class)) {
|
for (i in seq_len(num_class)) {
|
||||||
|
|
||||||
# Prepare interpretation, perform T, get the names, and plot straight away
|
# Prepare interpretation, perform T, get the names, and plot straight away
|
||||||
tree_interpretation_dt[, c(1, i + 1), with = FALSE] %T>%
|
plot_dt <- tree_interpretation_dt[, c(1, i + 1), with = FALSE]
|
||||||
data.table::setnames(., old = names(.), new = c("Feature", "Contribution")) %>%
|
data.table::setnames(
|
||||||
multiple.tree.plot.interpretation(., # Self
|
plot_dt
|
||||||
top_n = top_n,
|
, old = names(plot_dt)
|
||||||
title = paste("Class", i - 1),
|
, new = c("Feature", "Contribution")
|
||||||
cex = cex)
|
)
|
||||||
|
multiple.tree.plot.interpretation(
|
||||||
|
plot_dt
|
||||||
|
, top_n = top_n
|
||||||
|
, title = paste("Class", i - 1)
|
||||||
|
, cex = cex
|
||||||
|
)
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -114,6 +130,6 @@ multiple.tree.plot.interpretation <- function(tree_interpretation,
|
||||||
)]
|
)]
|
||||||
|
|
||||||
# Return invisibly
|
# Return invisibly
|
||||||
invisible(NULL)
|
return(invisible(NULL))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -229,7 +229,6 @@ def generate_r_docs(app):
|
||||||
r-devtools=1.13.6=r351h6115d3f_0 \
|
r-devtools=1.13.6=r351h6115d3f_0 \
|
||||||
r-data.table=1.11.4=r351h96ca727_0 \
|
r-data.table=1.11.4=r351h96ca727_0 \
|
||||||
r-jsonlite=1.5=r351h96ca727_0 \
|
r-jsonlite=1.5=r351h96ca727_0 \
|
||||||
r-magrittr=1.5=r351h6115d3f_4 \
|
|
||||||
r-matrix=1.2_14=r351h96ca727_0 \
|
r-matrix=1.2_14=r351h96ca727_0 \
|
||||||
r-testthat=2.0.0=r351h29659fb_0 \
|
r-testthat=2.0.0=r351h29659fb_0 \
|
||||||
cmake=3.14.0=h52cb24c_0
|
cmake=3.14.0=h52cb24c_0
|
||||||
|
|
Загрузка…
Ссылка в новой задаче