Merge branch 'main' of https://github.com/microsoft/CausalGrid into main
This commit is contained in:
Коммит
d572525c38
|
@ -14,3 +14,6 @@
|
|||
^_pkgdown\.yml$
|
||||
^docs$
|
||||
^pkgdown$
|
||||
^CODE_OF_CONDUCT\.md$
|
||||
^SUPPORT\.md$
|
||||
^SECURITY\.md$
|
||||
|
|
|
@ -49,3 +49,4 @@ vignettes/*.pdf
|
|||
.Rproj.user
|
||||
doc/
|
||||
Meta
|
||||
doc
|
||||
|
|
14
DESCRIPTION
14
DESCRIPTION
|
@ -4,10 +4,6 @@ Version: 0.2
|
|||
Authors@R: person("Brian", "Quistorff", email = "Brian.Quistorff@microsoft.com",
|
||||
role = c("aut", "cre"))
|
||||
Description: Analysis of Subgroups.
|
||||
Depends: R (>= 3.1.0),
|
||||
caret,
|
||||
gsubfn,
|
||||
assertthat
|
||||
License: MIT + file LICENSE
|
||||
LazyData: true
|
||||
RoxygenNote: 7.1.1
|
||||
|
@ -20,9 +16,15 @@ Suggests:
|
|||
pbapply,
|
||||
testthat,
|
||||
knitr,
|
||||
rmarkdown
|
||||
rmarkdown,
|
||||
ragg,
|
||||
gridExtra
|
||||
BuildVignettes: true
|
||||
Imports: Rcpp (>= 1.0.1)
|
||||
Imports: Rcpp (>= 1.0.1),
|
||||
gsubfn,
|
||||
assertthat,
|
||||
caret,
|
||||
purrr
|
||||
LinkingTo: Rcpp
|
||||
Encoding: UTF-8
|
||||
VignetteBuilder: knitr
|
||||
|
|
16
NAMESPACE
16
NAMESPACE
|
@ -3,6 +3,8 @@
|
|||
S3method(est_params,grid_rf)
|
||||
S3method(est_params,lm_est)
|
||||
S3method(est_params,simple_est)
|
||||
S3method(get_desc_df,estimated_partition)
|
||||
S3method(get_desc_df,grid_partition)
|
||||
S3method(num_cells,estimated_partition)
|
||||
S3method(num_cells,grid_partition)
|
||||
S3method(plot,estimated_partition)
|
||||
|
@ -22,15 +24,14 @@ export(fit_estimate_partition)
|
|||
export(fit_on_train)
|
||||
export(fit_partition)
|
||||
export(get_X_range)
|
||||
export(get_desc_df.estimated_partition)
|
||||
export(get_desc_df.grid_partition)
|
||||
export(get_desc_df)
|
||||
export(grid_partition)
|
||||
export(grid_rf)
|
||||
export(is_estimated_partition)
|
||||
export(is_grid_partition)
|
||||
export(is_grid_partition_split)
|
||||
export(is_grid_rf)
|
||||
export(is_lm_est)
|
||||
export(is_partition_split)
|
||||
export(is_simple_est)
|
||||
export(lm_est)
|
||||
export(num_cells)
|
||||
|
@ -38,11 +39,11 @@ export(partition_split)
|
|||
export(residualize)
|
||||
export(simple_est)
|
||||
export(test_any_sign_effect)
|
||||
import(Rcpp)
|
||||
import(assertthat)
|
||||
import(caret)
|
||||
import(gsubfn)
|
||||
importFrom(Rcpp,sourceCpp)
|
||||
importFrom(caret,createFolds)
|
||||
importFrom(gsubfn,"[<-.result")
|
||||
importFrom(gsubfn,list)
|
||||
importFrom(purrr,partial)
|
||||
importFrom(stats,coef)
|
||||
importFrom(stats,formula)
|
||||
|
@ -58,4 +59,7 @@ importFrom(stats,sd)
|
|||
importFrom(stats,var)
|
||||
importFrom(stats,vcov)
|
||||
importFrom(utils,combn)
|
||||
importFrom(utils,getTxtProgressBar)
|
||||
importFrom(utils,setTxtProgressBar)
|
||||
importFrom(utils,txtProgressBar)
|
||||
useDynLib(CausalGrid, .registration = TRUE)
|
||||
|
|
|
@ -39,10 +39,9 @@
|
|||
#' @importFrom Rcpp sourceCpp
|
||||
#' @importFrom stats coef formula lm model.matrix p.adjust pt qt quantile sd
|
||||
#' vcov var predict rnorm
|
||||
#' @importFrom utils combn
|
||||
#' @import caret
|
||||
#' @import gsubfn
|
||||
#' @import Rcpp
|
||||
#' @importFrom utils combn txtProgressBar setTxtProgressBar getTxtProgressBar
|
||||
#' @importFrom caret createFolds
|
||||
#' @importFrom gsubfn list [<-.result
|
||||
#' @import assertthat
|
||||
#' @importFrom purrr partial
|
||||
#' @docType package
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#'
|
||||
#' @param partition partition
|
||||
#' @param cell_stats cell_stats
|
||||
#' @param ...
|
||||
#' @param ... Additional arguments
|
||||
#'
|
||||
#' @return object of class estimated_partition
|
||||
#' @export
|
||||
|
@ -203,6 +203,8 @@ num_cells.estimated_partition <- function(obj) {
|
|||
#' @param fit estimated_partition
|
||||
#' @param partition_i partition_i - 1 is the last include in split_seq included in new partition
|
||||
#' @inheritParams fit_partition
|
||||
#' @param index_tr Split between train and estimate samples (default is to get from \code{fit})
|
||||
#' @param split_seq sequential list of splits (default is to get from \code{fit})
|
||||
#'
|
||||
#' @return updated estimated_partition
|
||||
#' @export
|
||||
|
@ -222,23 +224,23 @@ change_complexity <- function(fit, y, X, d=NULL, partition_i, index_tr = fit$ind
|
|||
}
|
||||
|
||||
|
||||
#' Get descriptive data.frame for an estimated_partition
|
||||
#' Get descriptive data.frame
|
||||
#'
|
||||
#' Get information for each cell
|
||||
#'
|
||||
#' @inheritParams get_desc_df
|
||||
#' @param import_order Whether should use importance ordering
|
||||
#' (most important on the left) or input ordering (default) for features. Rows
|
||||
#' will be ordered so that the right-most will change most frequently.
|
||||
#'
|
||||
#' Get statistics for each cell (feature boundary, and estimated cell stats)
|
||||
#'
|
||||
#' @param obj estimated_partition object
|
||||
#' @param do_str If True, use a string like "(a, b]", otherwise have two separate columns with a and b
|
||||
#' @param drop_unsplit If True, drop columns for variables overwhich the partition did not split
|
||||
#' @param digits digits Option (default is NULL)
|
||||
#' @param import_order Should we use importance ordering (most important on the left) or input ordering (default) for features.
|
||||
#' Rows will be ordered so that the right-most will change most frequently.
|
||||
#'
|
||||
#' @return data.frame with columns: partitionin columns, {N_est, param_ests, pval} per estimate
|
||||
#' @export
|
||||
get_desc_df.estimated_partition <- function(obj, do_str=TRUE, drop_unsplit=TRUE, digits=NULL, import_order=FALSE) {
|
||||
#'
|
||||
#' @return data.frame with columns: partitioning columns, {N_est, param_ests,
|
||||
#' pval} per estimate
|
||||
#' @export
|
||||
get_desc_df.estimated_partition <- function(obj, cont_bounds_inf=TRUE, do_str=TRUE, drop_unsplit=TRUE, digits=NULL, unsplit_cat_star=TRUE, import_order=FALSE, ...) {
|
||||
M = obj$M
|
||||
stats = obj$cell_stats[c(F, rep(T,M), rep(T,M), rep(F,M),rep(F,M), rep(F,M), rep(F,M), rep(T,M), rep(F,M), rep(F,M))]
|
||||
part_df = get_desc_df.grid_partition(obj$partition, do_str=do_str, drop_unsplit=drop_unsplit, digits=digits)
|
||||
part_df = get_desc_df(obj$partition, cont_bounds_inf=cont_bounds_inf, do_str=do_str, drop_unsplit=drop_unsplit, digits=digits, unsplit_cat_star=unsplit_cat_star)
|
||||
|
||||
imp_weights = obj$importance_weights
|
||||
if(drop_unsplit) {
|
||||
|
@ -255,16 +257,18 @@ get_desc_df.estimated_partition <- function(obj, do_str=TRUE, drop_unsplit=TRUE,
|
|||
# Inherited params: do_str, drop_unsplit, digits, import_order
|
||||
#' Print estimated_partition
|
||||
#'
|
||||
#' Print a summary of the estimated partition. Uses \code{\link{get_desc_df.estimated_partition}}
|
||||
#' Print a summary of the estimated partition. Uses \code{\link{get_desc_df}}
|
||||
#'
|
||||
#' @param x estimated_partition object
|
||||
#' @inheritParams get_desc_df.estimated_partition
|
||||
#' @inheritParams get_desc_df
|
||||
#' @param import_order Whether should use importance ordering
|
||||
#' (most important on the left) or input ordering (default) for features.
|
||||
#' @param ... Additional arguments. These will be passed to print.data.frame
|
||||
#'
|
||||
#' @return string (and displayed)
|
||||
#' @export
|
||||
print.estimated_partition <- function(x, do_str=TRUE, drop_unsplit=TRUE, digits=NULL, import_order=FALSE, ...) {
|
||||
return(print(get_desc_df.estimated_partition(x, do_str, drop_unsplit, digits, import_order=import_order),
|
||||
return(print(get_desc_df(x, do_str, drop_unsplit, digits, import_order=import_order),
|
||||
digits=digits, ...))
|
||||
}
|
||||
|
||||
|
@ -426,26 +430,27 @@ est_full_stats <- function(y, X, d, est_plan, y_es=NULL, X_es=NULL, d_es=NULL, i
|
|||
#'
|
||||
#' Predicted unit-level treatment effect or outcome
|
||||
#'
|
||||
#' @param obj estimated_partition object
|
||||
#' @param object estimated_partition object
|
||||
#' @param new_X new X
|
||||
#' @param new_d new d. Required for type="outcome"
|
||||
#' @param type "effect" or "outcome" (currently not implemented)
|
||||
#' @param ... Additional arguments. Unused.
|
||||
#'
|
||||
#' @return predicted treatment effect
|
||||
#' @export
|
||||
predict.estimated_partition <- function(obj, new_X, new_d=NULL, type="effect") {
|
||||
predict.estimated_partition <- function(object, new_X, new_d = NULL, type = "effect", ...) {
|
||||
#TODO: for mode 1 &2 maybe return a matrix rather than list
|
||||
|
||||
new_X = ensure_good_X(new_X)
|
||||
new_X_range = get_X_range(new_X)
|
||||
|
||||
cell_factor = predict(obj$partition, new_X, new_X_range)
|
||||
M = obj$M
|
||||
cell_factor = predict(object$partition, new_X, new_X_range)
|
||||
M = object$M
|
||||
|
||||
if(M==1) {
|
||||
N=nrow(new_X)
|
||||
cell_factor_df = data.frame(id=1:N, cell_i = as.integer(cell_factor))
|
||||
m_df = merge(cell_factor_df, obj$cell_stats)
|
||||
m_df = merge(cell_factor_df, object$cell_stats)
|
||||
m_df = m_df[order(m_df[["id"]]), ]
|
||||
return(m_df[["param_ests"]])
|
||||
}
|
||||
|
@ -453,7 +458,7 @@ predict.estimated_partition <- function(obj, new_X, new_d=NULL, type="effect") {
|
|||
rets = list()
|
||||
for(m in 1:M) {
|
||||
cell_factor_df = data.frame(id=1:N[m], cell_i = as.integer(cell_factor[[m]]))
|
||||
m_df = merge(cell_factor_df, obj$cell_stats)
|
||||
m_df = merge(cell_factor_df, object$cell_stats)
|
||||
m_df = m_df[order(m_df[["id"]]), ]
|
||||
rets[[m]] = m_df[["param_ests"]]
|
||||
}
|
||||
|
|
39
R/graphing.R
39
R/graphing.R
|
@ -4,54 +4,57 @@
|
|||
#'
|
||||
#' Creates a 2D plot of parameter estimates or a series of such slices if partition is across >2 features.
|
||||
#'
|
||||
#' @param grid_fit grid_fit
|
||||
#' @param x grid_fit
|
||||
#' @param X_names_2D X_names_2D
|
||||
#' @param ... Additional arguments. Unused.
|
||||
#'
|
||||
#' @return ggplot2 object or list of such objects
|
||||
#' @export
|
||||
plot.estimated_partition <- function(grid_fit, X_names_2D=NULL) {
|
||||
plot.estimated_partition <- function(x, X_names_2D=NULL, ...) {
|
||||
if (!requireNamespace("ggplot2", quietly = TRUE)) {
|
||||
stop("Package \"ggplot2\" needed for this function to work. Please install it.",
|
||||
call. = FALSE)
|
||||
}
|
||||
split_dims = (grid_fit$partition$nsplits_by_dim > 0)
|
||||
|
||||
split_dims = (x$partition$nsplits_by_dim > 0)
|
||||
n_split_dims = sum(split_dims)
|
||||
if(n_split_dims==0) {
|
||||
print("Nothing to graph as no heterogeneity")
|
||||
return(NULL)
|
||||
}
|
||||
desc_range_df = get_desc_df.grid_partition(grid_fit$partition, drop_unsplit=TRUE, cont_bounds_inf=FALSE)
|
||||
desc_range_df = get_desc_df(x$partition, drop_unsplit=TRUE, cont_bounds_inf=FALSE)
|
||||
if(n_split_dims==1) {
|
||||
desc_range_df = do.call(cbind, lapply(desc_range_df, function(c) as.data.frame(t(matrix(unlist(c), nrow=2)))))
|
||||
desc_range_df['ymin'] = 0
|
||||
desc_range_df['ymax'] = 1
|
||||
colnames(desc_range_df)<-c("xmin", "xmax", "ymin", "ymax")
|
||||
desc_range_df["estimate"] = grid_fit$cell_stats$param_ests
|
||||
xname = if(!is.null(X_names_2D)) X_names_2D[1] else grid_fit$partition$varnames[split_dims]
|
||||
desc_range_df["estimate"] = x$cell_stats$param_ests
|
||||
xname = if(!is.null(X_names_2D)) X_names_2D[1] else x$partition$varnames[split_dims]
|
||||
plt = ggplot2::ggplot() +
|
||||
ggplot2::scale_x_continuous(name=xname) +
|
||||
theme(axis.title.y=element_blank(),
|
||||
axis.text.y=element_blank(),
|
||||
axis.ticks.y=element_blank()) + xlab(xname) +
|
||||
ggplot2::theme(axis.title.y=ggplot2::element_blank(),
|
||||
axis.text.y=ggplot2::element_blank(),
|
||||
axis.ticks.y=ggplot2::element_blank()) +
|
||||
ggplot2::xlab(xname) +
|
||||
ggplot2::geom_rect(data=desc_range_df, mapping=ggplot2::aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, fill=estimate), color="black")
|
||||
return(plt)
|
||||
}
|
||||
if(n_split_dims==2){
|
||||
if(is.null(X_names_2D)) X_names_2D = grid_fit$partition$varnames[split_dims]
|
||||
return(gen_one_plt(desc_range_df, grid_fit$cell_stats$param_ests, X_names_2D))
|
||||
if(is.null(X_names_2D)) X_names_2D = x$partition$varnames[split_dims]
|
||||
return(gen_one_plt(desc_range_df, x$cell_stats$param_ests, X_names_2D))
|
||||
}
|
||||
|
||||
desc_range_df_fact = data.frame(lapply(get_desc_df.grid_partition(grid_fit$partition, drop_unsplit=TRUE, do_str=TRUE), unclass))
|
||||
desc_range_df_fact = data.frame(lapply(get_desc_df(x$partition, drop_unsplit=TRUE, do_str=TRUE), unclass))
|
||||
if(is.null(X_names_2D)){
|
||||
if(is.null(grid_fit$importance_weights)) {
|
||||
X_names_2D = grid_fit$partition$varnames[split_dims][1:2]
|
||||
if(is.null(x$importance_weights)) {
|
||||
X_names_2D = x$partition$varnames[split_dims][1:2]
|
||||
}
|
||||
else {
|
||||
X_names_2D = grid_fit$partition$varnames[order(imp_weights, decreasing=FALSE)]
|
||||
X_names_2D = x$partition$varnames[order(imp_weights, decreasing=FALSE)]
|
||||
}
|
||||
}
|
||||
other_idx = !(names(desc_range_df) %in% X_names_2D)
|
||||
n_segs_other = (grid_fit$partition$nsplits_by_dim+1)[other_idx]
|
||||
n_segs_other = (x$partition$nsplits_by_dim+1)[other_idx]
|
||||
names_other = names(desc_range_df)[other_idx]
|
||||
size_other = cumprod(n_segs_other)
|
||||
test_row_equals_vec <- function(M, v) {
|
||||
|
@ -66,8 +69,8 @@ plot.estimated_partition <- function(grid_fit, X_names_2D=NULL) {
|
|||
for(k in 1:length(segment_indexes)){
|
||||
levels_desc[k] = levels(desc_range_df_fact[,which(other_idx)[k]])[segment_indexes[k]]
|
||||
}
|
||||
plts[[slice_i]] = gen_one_plt(desc_range_df[row_idx,X_names_2D], grid_fit$cell_stats$param_ests[row_idx], X_names_2D) +
|
||||
ggtitle(paste(paste(names_other, levels_desc), collapse=", "))
|
||||
plts[[slice_i]] = gen_one_plt(desc_range_df[row_idx,X_names_2D], x$cell_stats$param_ests[row_idx], X_names_2D) +
|
||||
ggplot2::ggtitle(paste(paste(names_other, levels_desc), collapse=", "))
|
||||
}
|
||||
|
||||
return(plts)
|
||||
|
|
|
@ -12,10 +12,10 @@
|
|||
NULL
|
||||
#> NULL
|
||||
|
||||
#' Create a null grid_partition
|
||||
#' Create a null \code{grid_partition}
|
||||
#'
|
||||
#' Create a empty partition. Splits can be added using \code{\link{add_partition_split}}.
|
||||
#' Information about a split can be retrieved using \code{\link{num_cells}}, \code{\link{get_desc_df.grid_partition}} and \code{\link{print}}
|
||||
#' Information about a split can be retrieved using \code{\link{num_cells}}, \code{\link{get_desc_df}} and \code{\link{print}}
|
||||
#' With data, one can determine the cell for each observation using \code{\link{predict}}
|
||||
#'
|
||||
#' @param X_range Such as from \code{\link{get_X_range}}
|
||||
|
@ -93,14 +93,15 @@ get_X_range <- function(X) {
|
|||
#' Note that currently if X has values more extreme (e.g., for numeric or factor levels ) than was used to generate the partition
|
||||
#' then we will return NA unless you provide and updated X_range.
|
||||
#'
|
||||
#' @param obj partition
|
||||
#' @param object partition
|
||||
#' @param X X data or list of X
|
||||
#' @param X_range (Optional) overrides the partition$X_range
|
||||
#' @param ... Additional arguments. Unused.
|
||||
#'
|
||||
#' @return Factor
|
||||
#' @export
|
||||
predict.grid_partition <- function(obj, X, X_range=NULL) {
|
||||
facts = get_factors_from_partition(obj, X, X_range=X_range)
|
||||
predict.grid_partition <- function(object, X, X_range=NULL, ...) {
|
||||
facts = get_factors_from_partition(object, X, X_range=X_range)
|
||||
return(interaction_m(facts, is_sep_sample(X)))
|
||||
}
|
||||
|
||||
|
@ -126,31 +127,27 @@ num_cells.grid_partition <- function(obj) {
|
|||
print.grid_partition <- function(x, do_str=TRUE, drop_unsplit=TRUE, digits=NULL, ...) {
|
||||
#To check: digits
|
||||
assert_that(is.flag(do_str), is.flag(drop_unsplit), msg="One of do_str or drop_unsplit are not flags")
|
||||
return(print(get_desc_df.grid_partition(x, do_str=do_str, drop_unsplit=drop_unsplit, digits=digits),
|
||||
return(print(get_desc_df(x, do_str=do_str, drop_unsplit=drop_unsplit, digits=digits),
|
||||
digits=digits, ...))
|
||||
}
|
||||
|
||||
|
||||
#' Get descriptive data.frame for grid_partition
|
||||
#' Get descriptive data.frame
|
||||
#'
|
||||
#' Get information for each cell
|
||||
#'
|
||||
#' @inheritParams get_desc_df
|
||||
#'
|
||||
#' A dataset with rows for each cell and columns defining partitioning
|
||||
#'
|
||||
#' @param partition Partition
|
||||
#' @param cont_bounds_inf If True, will put continuous bounds as -Inf/Inf. Otherwise will use X_range bounds
|
||||
#' @param do_str If True, use a string like "(a, b]", otherwise have two separate columns with a and b
|
||||
#' @param drop_unsplit If True, drop columns for variables overwhich the partition did not split
|
||||
#' @param digits digits option
|
||||
#' @param unsplit_cat_star if we don't split on a categorical var, should we show as "*" (otherwise list all levels)
|
||||
#'
|
||||
#' @return data.frame
|
||||
#'
|
||||
#' @return data.frame with columns: partitioning columns
|
||||
#' @export
|
||||
get_desc_df.grid_partition <- function(partition, cont_bounds_inf=TRUE, do_str=FALSE, drop_unsplit=FALSE,
|
||||
digits=NULL, unsplit_cat_star=TRUE) {
|
||||
get_desc_df.grid_partition <- function(obj, cont_bounds_inf=TRUE, do_str=FALSE, drop_unsplit=FALSE,
|
||||
digits=NULL, unsplit_cat_star=TRUE, ...) {
|
||||
#To check: digits
|
||||
assert_that(is.flag(cont_bounds_inf), is.flag(do_str), is.flag(drop_unsplit), is.flag(unsplit_cat_star), msg="One (cont_bounds_inf, do_str, drop_unsplit, unsplit_cat_star)of are not flags.")
|
||||
# A split at x_k means that we split to those <= and >
|
||||
|
||||
n_segs = partition$nsplits_by_dim+1
|
||||
n_segs = obj$nsplits_by_dim+1
|
||||
n_cells = prod(n_segs)
|
||||
|
||||
if(n_cells==1 & drop_unsplit) return(as.data.frame(matrix(NA, nrow=1, ncol=0)))
|
||||
|
@ -160,19 +157,19 @@ get_desc_df.grid_partition <- function(partition, cont_bounds_inf=TRUE, do_str=F
|
|||
#desc_df = data.frame(labels=levels(grid_fit$cell_stats$cell_factor),
|
||||
# stringsAsFactors = FALSE) %>% separate(labels, names(X), "(?<=]).(?=[(])", PERL=TRUE)
|
||||
|
||||
K = length(partition$nsplits_by_dim)
|
||||
X_range = partition$X_range
|
||||
K = length(obj$nsplits_by_dim)
|
||||
X_range = obj$X_range
|
||||
if(cont_bounds_inf) {
|
||||
for(k in 1:K) {
|
||||
if(!k %in% partition$dim_cat) X_range[[k]] = c(-Inf, Inf)
|
||||
if(!k %in% obj$dim_cat) X_range[[k]] = c(-Inf, Inf)
|
||||
}
|
||||
}
|
||||
colnames=partition$varnames
|
||||
colnames=obj$varnames
|
||||
if(is.null(colnames)) colnames = paste("X", 1:K, sep="")
|
||||
|
||||
list_of_windows = list()
|
||||
for(k in 1:K) {
|
||||
list_of_windows[[k]] = if(k %in% partition$dim_cat) get_windows_cat(partition$s_by_dim[[k]], X_range[[k]]) else get_window_cont(partition$s_by_dim[[k]], X_range[[k]])
|
||||
list_of_windows[[k]] = if(k %in% obj$dim_cat) get_windows_cat(obj$s_by_dim[[k]], X_range[[k]]) else get_window_cont(obj$s_by_dim[[k]], X_range[[k]])
|
||||
}
|
||||
|
||||
format_cell_cat <- function(win, unsplit_cat_star, n_tot_dim, sep=", ") {
|
||||
|
@ -195,7 +192,7 @@ get_desc_df.grid_partition <- function(partition, cont_bounds_inf=TRUE, do_str=F
|
|||
segment_indexes = segment_indexes_from_cell_i(cell_i, n_segs)
|
||||
win = list_of_windows[[k]][[segment_indexes[k]]]
|
||||
raw_data_k[[cell_i]] = win
|
||||
str_data_k[cell_i] = if(k %in% partition$dim_cat) format_cell_cat(win, unsplit_cat_star, length(list_of_windows[[k]])) else format_cell_cont(win)
|
||||
str_data_k[cell_i] = if(k %in% obj$dim_cat) format_cell_cat(win, unsplit_cat_star, length(list_of_windows[[k]])) else format_cell_cont(win)
|
||||
}
|
||||
raw_data[[colnames[k]]] = cbind(raw_data_k) #make a list-column: https://stackoverflow.com/a/51308306
|
||||
str_data[[colnames[k]]] = factor(str_data_k, levels=unique(str_data_k)) #will be in low-high order
|
||||
|
@ -334,7 +331,7 @@ partition_split <- function(k, X_k_cut) {
|
|||
return(structure(list(k=k, X_k_cut=X_k_cut), class=c("partition_split")))
|
||||
}
|
||||
|
||||
#' Is grid_partition_split
|
||||
#' Is \code{partition_split}
|
||||
#'
|
||||
#' Tests whether or not an object is a \code{partition_split}.
|
||||
#'
|
||||
|
@ -342,8 +339,8 @@ partition_split <- function(k, X_k_cut) {
|
|||
#'
|
||||
#' @return Boolean
|
||||
#' @export
|
||||
#' @describeIn grid_partition_split is grid_partition_split
|
||||
is_grid_partition_split <- function(x){
|
||||
#' @describeIn partition_split is partition_split
|
||||
is_partition_split <- function(x){
|
||||
inherits(x, "partition_split")
|
||||
}
|
||||
|
||||
|
@ -414,6 +411,7 @@ print.partition_split <- function(x, ...) {
|
|||
#' @param bump_samples Number of bump bootstraps (default 0), or list of such length where each items is a bootstrap sample.
|
||||
#' If m_mode==DS.MULTI_SAMPLE then each item is a sublist with such bootstrap samples over each dataset.
|
||||
#' @param bump_ratio For bootstraps the ratio of sample size to sample (between 0 and 1, default 1)
|
||||
#' @param ... Additional params.
|
||||
#'
|
||||
#' @return An object.
|
||||
#' \item{partition}{Grid Partition (type=\code{\link{grid_partition}})}
|
||||
|
@ -863,7 +861,7 @@ fit_partition_full_k <- function(k, y, X_d, d, X_range, pb, debug, valid_breaks,
|
|||
win_mask = gen_cont_window_mask_m(X_d, k, win_LB, win_UB)
|
||||
win_mask_aux = gen_cont_window_mask_m(X_aux, k, win_LB, win_UB)
|
||||
for(X_k_cut_i in seq_len(n_pot_break_points_k)) { #cut-point is top end of segment,
|
||||
if (verbosity>0 && !is.null(pb)) utils::setTxtProgressBar(pb, utils::getTxtProgressBar(pb)+1)
|
||||
if (verbosity>0 && !is.null(pb)) setTxtProgressBar(pb, getTxtProgressBar(pb)+1)
|
||||
X_k_cut = breaks_per_dim[[k]][X_k_cut_i]
|
||||
if(X_k_cut %in% partition$s_by_dim[[k]]) {
|
||||
prev_split_checked = X_k_cut
|
||||
|
@ -961,7 +959,7 @@ fit_partition_full_k <- function(k, y, X_d, d, X_range, pb, debug, valid_breaks,
|
|||
for(win_split_i in seq_len(length(pot_splits))) {
|
||||
win_split_val = pot_splits[[win_split_i]]
|
||||
#TODO: Refactor with continuous case
|
||||
if (verbosity>0 && !is.null(pb)) utils::setTxtProgressBar(pb, utils::getTxtProgressBar(pb)+1)
|
||||
if (verbosity>0 && !is.null(pb)) setTxtProgressBar(pb, getTxtProgressBar(pb)+1)
|
||||
if(!valid_breaks_k[[window_i]][win_split_i]) next
|
||||
|
||||
new_split = partition_split(k, win_split_val)
|
||||
|
@ -1092,7 +1090,7 @@ fit_partition_full <- function(y, X, d=NULL, X_aux=NULL, d_aux=NULL, X_range, ma
|
|||
if(verbosity>0) {
|
||||
cat(paste("Grid > Fitting > split ", split_i, ": Started\n"))
|
||||
t1 = Sys.time()
|
||||
if(is.null(pr_cl)) pb = utils::txtProgressBar(0, n_cuts_total, style = style)
|
||||
if(is.null(pr_cl)) pb = txtProgressBar(0, n_cuts_total, style = style)
|
||||
}
|
||||
|
||||
params = c(list(y=y, X_d=X, d=d, X_range=X_range, pb=NULL, debug=debug, valid_breaks=valid_breaks,
|
||||
|
|
43
R/utils.R
43
R/utils.R
|
@ -15,6 +15,29 @@ num_cells <- function(obj) {
|
|||
UseMethod("num_cells", obj)
|
||||
}
|
||||
|
||||
#' Get descriptive data.frame
|
||||
#'
|
||||
#' Get information for each cell
|
||||
#'
|
||||
#' @param obj partition object
|
||||
#' @param cont_bounds_inf Should "Inf" be used for continuous bounds (otherwise
|
||||
#' the bounds from X_range)
|
||||
#' @param do_str If True, use a string like "(a, b]", otherwise have two
|
||||
#' separate columns with a and b
|
||||
#' @param drop_unsplit If True, drop columns for variables overwhich the
|
||||
#' partition did not split
|
||||
#' @param digits digits Option (default is NULL)
|
||||
#' @param unsplit_cat_star Should unsplit categorical variables be listed as
|
||||
#' "*", otherwise all factor labels will be used.
|
||||
#' @param ... Additional arguments.
|
||||
#'
|
||||
#' @return data.frame with columns: partitioning columns
|
||||
#' @export
|
||||
get_desc_df <- function(obj, cont_bounds_inf=TRUE, do_str=FALSE, drop_unsplit=FALSE,
|
||||
digits=NULL, unsplit_cat_star=TRUE, ...) {
|
||||
UseMethod("get_desc_df", obj)
|
||||
}
|
||||
|
||||
# General Utils ----------------
|
||||
|
||||
#handles vectors and 2D structures
|
||||
|
@ -69,17 +92,17 @@ is_factor_dim_k <- function(X, k) {
|
|||
}
|
||||
|
||||
|
||||
#Standard way to check if vector is constant is const_vectr(), but is O(n).
|
||||
#Checking element-by-element would often be faster, but this is inefficient in R
|
||||
#and faster in C. const_vect1() and const_vect2() were two versions (first using
|
||||
#'inline', second just Rcpp), but couldn't get to work in building a package.
|
||||
#The Rcpp version is now in a separate file.
|
||||
# Standard way to check if vector is constant is const_vectr(), but is O(n).
|
||||
# Checking element-by-element would often be faster, but this is inefficient in R
|
||||
# and faster in C. const_vect1() and const_vect2() were two versions (first using
|
||||
# 'inline', second just Rcpp), but couldn't get to work in building a package.
|
||||
# The Rcpp version is now in a separate file.
|
||||
|
||||
const_vectr <- function(x) {
|
||||
if(length(x)==0) return(TRUE)
|
||||
r = range(x)
|
||||
return(r[1]==r[2])
|
||||
}
|
||||
# const_vectr <- function(x) {
|
||||
# if(length(x)==0) return(TRUE)
|
||||
# r = range(x)
|
||||
# return(r[1]==r[2])
|
||||
# }
|
||||
|
||||
# Fold utils --------------------------
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# Project
|
||||
Tools for finding heterogeneous treatment effects (and means) based on partitioning the covariate/feature space via full cross-cuts and solved via greedy search. A typical usage would be analyzing and experiment to find the high-level subgroups (a coarse partition that is useful to humans) that differ in their estimated treatment effects.
|
||||
Tools for finding heterogeneous treatment effects (and means) based on partitioning the covariate/feature space via full cross-cuts and solved via greedy search. A typical usage would be analyzing an experiment to find the high-level subgroups (a coarse partition that is useful to humans) that differ in their estimated treatment effects.
|
||||
|
||||
This package is inspired by, and uses ideas from, [Causal Tree](https://github.com/susanathey/causalTree) but aims to have the partition be more interpretable and have better accuracy. It is slower, though for high-level partitions this is usually not an issue.
|
||||
|
||||
|
|
10
SUPPORT.md
10
SUPPORT.md
|
@ -4,11 +4,15 @@ Requirements
|
|||
|
||||
Notes on building:
|
||||
- You will need RTools (probably at least v3.5)
|
||||
- install (renv)[https://rstudio.github.io/renv/articles/renv.html] package. Then after opening the project you should be able to use `renv::restore()`.
|
||||
- Given the cpp you should use "Install and restart" (and not use "Load All") to get the new library. On Windows, when building, you should restart the R session before this otherwise it can't copy over the DLL (it stays in memory).
|
||||
- If you want updated vignettes to show up when using "Load All", you can use `devtools::build_vignettes()`.
|
||||
- install (renv)[https://rstudio.github.io/renv/articles/renv.html] package. Then after opening the project you should be able to use `renv::restore()`. Some packages (such as `brio, cpp11, knitr, ragg, systemfonts, textshaping`) aren't mentioned directly, but are used in building vignettes.
|
||||
- Given the cpp you should use "Install and restart" (and not use "Load All") to get the new library (though you might be able to get away w/o it if you don't change the DLL). On Windows, when building, you should restart the R session before this otherwise it can't copy over the DLL (it stays in memory).
|
||||
- If you want updated vignettes to show up when using "Load All", you can use `devtools::build_vignettes()` (possibly with `install=FALSE` to speed things up). They will get placed in `doc/` (not `docs`).
|
||||
- To build the html help in `docs/` use `pkgdown::build_site()`.
|
||||
- Building copies everything over to temp dir and then deletes, so might want to move the large files (`project/sim.RData`) out to save time.
|
||||
|
||||
Project:
|
||||
- If using MRAN you might get warnings from `renv` about `Revoutils`. You can create a rename `renv/settings.dcf.bak` to `renv/settings.dcf` and edit the line `external.libraries: C:\Program Files\Microsoft\R Open\R-X.X.X\library` with the right library version.
|
||||
|
||||
|
||||
# Support
|
||||
|
||||
|
|
|
@ -120,9 +120,10 @@
|
|||
<p>Notes on building:</p>
|
||||
<ul>
|
||||
<li>You will need RTools (probably at least v3.5)</li>
|
||||
<li>install (renv)[<a href="https://rstudio.github.io/renv/articles/renv.html" class="uri">https://rstudio.github.io/renv/articles/renv.html</a>] package. Then after opening the project you should be able to use <code><a href="https://rstudio.github.io/renv//reference/restore.html">renv::restore()</a></code>.</li>
|
||||
<li>Given the cpp you should use “Install and restart” (and not use “Load All”) to get the new library. On Windows, when building, you should restart the R session before this otherwise it can’t copy over the DLL (it stays in memory).</li>
|
||||
<li>If you want updated vignettes to show up when using “Load All”, you can use <code><a href="https://devtools.r-lib.org//reference/build_vignettes.html">devtools::build_vignettes()</a></code>.</li>
|
||||
<li>install (renv)[<a href="https://rstudio.github.io/renv/articles/renv.html" class="uri">https://rstudio.github.io/renv/articles/renv.html</a>] package. Then after opening the project you should be able to use <code><a href="https://rstudio.github.io/renv//reference/restore.html">renv::restore()</a></code>. Some packages (such as <code>brio, cpp11, knitr, ragg, systemfonts, textshaping</code>) aren’t mentioned directly, but are used in building vignettes.</li>
|
||||
<li>Given the cpp you should use “Install and restart” (and not use “Load All”) to get the new library (though you might be able to get away w/o it if you don’t change the DLL). On Windows, when building, you should restart the R session before this otherwise it can’t copy over the DLL (it stays in memory).</li>
|
||||
<li>If you want updated vignettes to show up when using “Load All”, you can use <code><a href="https://devtools.r-lib.org//reference/build_vignettes.html">devtools::build_vignettes()</a></code> (possibly with <code>install=FALSE</code> to speed things up). They will get placed in <code>doc/</code> (not <code>docs</code>).</li>
|
||||
<li>To build the html help in <code>docs/</code> use <code><a href="https://pkgdown.r-lib.org/reference/build_site.html">pkgdown::build_site()</a></code>.</li>
|
||||
<li>Building copies everything over to temp dir and then deletes, so might want to move the large files (<code>project/sim.RData</code>) out to save time.</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
|
|
@ -94,23 +94,29 @@
|
|||
<span class="va">d</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/stats/Binomial.html">rbinom</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">1</span>, <span class="fl">0.5</span><span class="op">)</span> <span class="co">#treatment assignment</span>
|
||||
<span class="va">tau</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/integer.html">as.integer</a></span><span class="op">(</span><span class="va">X</span><span class="op">[</span>,<span class="fl">1</span><span class="op">]</span><span class="op">></span><span class="fl">.5</span><span class="op">)</span><span class="op">*</span><span class="fl">2</span><span class="op">-</span><span class="fl">1</span> <span class="co">#true treatment effect (just heterogeneous across X1)</span>
|
||||
<span class="va">y</span> <span class="op">=</span> <span class="va">d</span><span class="op">*</span><span class="va">tau</span> <span class="op">+</span> <span class="fu"><a href="https://rdrr.io/r/stats/Normal.html">rnorm</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">0</span>, <span class="va">err_sd</span><span class="op">)</span> <span class="co">#outcome</span></code></pre></div>
|
||||
<p>We typically want a high-level partition for “human” consumption. To save time, avoid cells with too few observations, and reduce the chance of splitting from running many noisy tests, it’s common to only look for a few splits per dimension. If we don’t specify this, the function will try every possible split across each dimension.</p>
|
||||
<div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="va">est_part0</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y</span>, <span class="va">X</span>, <span class="va">d</span>, cv_folds<span class="op">=</span><span class="fl">2</span><span class="op">)</span>
|
||||
<span class="fu"><a href="../reference/get_desc_df.html">get_desc_df</a></span><span class="op">(</span><span class="va">est_part0</span><span class="op">)</span>
|
||||
<span class="co">#> X1 N_est param_ests pval</span>
|
||||
<span class="co">#> 1 <=0.5050542 261 -0.7767593 3.681519e-09</span>
|
||||
<span class="co">#> 2 >0.5050542 239 0.8697289 3.792252e-11</span></code></pre></div>
|
||||
<p>We typically want a high-level partition for “human” consumption. To save time, avoid cells with too few observations, and reduce the chance of splitting from running many noisy tests, it’s common to only look for a few splits per dimension. If we don’t specify this, the function will try every possible split across each dimension.</p>
|
||||
<div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="co"># With just a scalar, we will split at points equal across the quantile-distribution for each feature.</span>
|
||||
<span class="va">breaks</span> <span class="op">=</span> <span class="fl">5</span>
|
||||
<span class="co">#Otherwise we can explicitly list the potential splits to evaluate.</span>
|
||||
<span class="va">breaks</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/rep.html">rep</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/seq.html">seq</a></span><span class="op">(</span><span class="va">breaks</span><span class="op">)</span><span class="op">/</span><span class="op">(</span><span class="va">breaks</span><span class="op">+</span><span class="fl">1</span><span class="op">)</span><span class="op">)</span>, <span class="va">K</span><span class="op">)</span>
|
||||
<span class="va">est_part</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y</span>, <span class="va">X</span>, <span class="va">d</span>, breaks_per_dim<span class="op">=</span><span class="va">breaks</span>, cv_folds<span class="op">=</span><span class="fl">2</span><span class="op">)</span></code></pre></div>
|
||||
<div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/graphics/plot.html">plot</a></span><span class="op">(</span><span class="va">est_part</span><span class="op">)</span></code></pre></div>
|
||||
<p><img src="vignette_files/figure-html/unnamed-chunk-5-1.png" width="700"></p>
|
||||
<div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="fu"><a href="../reference/get_desc_df.estimated_partition.html">get_desc_df.estimated_partition</a></span><span class="op">(</span><span class="va">est_part</span><span class="op">)</span>
|
||||
<span class="co">#> X1 N_est param_ests pval</span>
|
||||
<span class="co">#> 1 <=0.5 258 -0.7946142 1.950406e-09</span>
|
||||
<span class="co">#> 2 >0.5 242 0.8763644 1.757105e-11</span></code></pre></div>
|
||||
<p>We can manually estimate this simple model given the partition</p>
|
||||
<code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/graphics/plot.html">plot</a></span><span class="op">(</span><span class="va">est_part</span><span class="op">)</span></code></pre></div>
|
||||
<p><img src="vignette_files/figure-html/unnamed-chunk-6-1.png" width="700"></p>
|
||||
<div class="sourceCode" id="cb7"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="fu"><a href="../reference/get_desc_df.html">get_desc_df</a></span><span class="op">(</span><span class="va">est_part</span><span class="op">)</span>
|
||||
<span class="co">#> X1 N_est param_ests pval</span>
|
||||
<span class="co">#> 1 <=0.5 255 -0.9035292 3.379837e-12</span>
|
||||
<span class="co">#> 2 >0.5 245 0.9557823 2.823001e-12</span></code></pre></div>
|
||||
<p>We can manually estimate this simple model given the partition</p>
|
||||
<div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="va">est_df</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html">data.frame</a></span><span class="op">(</span>y<span class="op">=</span><span class="va">y</span>, d<span class="op">=</span><span class="va">d</span>, f<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/stats/predict.html">predict</a></span><span class="op">(</span><span class="va">est_part</span><span class="op">$</span><span class="va">partition</span>, <span class="va">X</span><span class="op">)</span><span class="op">)</span>
|
||||
<span class="fu"><a href="https://rdrr.io/r/base/summary.html">summary</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="va">y</span><span class="op">~</span><span class="fl">0</span><span class="op">+</span><span class="va">f</span><span class="op">+</span><span class="va">d</span><span class="op">:</span><span class="va">f</span>, data<span class="op">=</span><span class="va">est_df</span><span class="op">[</span><span class="op">-</span><span class="va">est_part</span><span class="op">$</span><span class="va">index_tr</span>,<span class="op">]</span><span class="op">)</span><span class="op">)</span>
|
||||
<span class="co">#> </span>
|
||||
|
@ -120,78 +126,78 @@
|
|||
<span class="co">#> </span>
|
||||
<span class="co">#> Residuals:</span>
|
||||
<span class="co">#> Min 1Q Median 3Q Max </span>
|
||||
<span class="co">#> -3.1460 -0.6776 0.0409 0.7216 3.0490 </span>
|
||||
<span class="co">#> -3.1360 -0.6908 -0.0019 0.7014 2.6253 </span>
|
||||
<span class="co">#> </span>
|
||||
<span class="co">#> Coefficients:</span>
|
||||
<span class="co">#> Estimate Std. Error t value Pr(>|t|) </span>
|
||||
<span class="co">#> f(-1,0.5].(-0.999,2].(-1,2] -0.11160 0.08705 -1.282 0.200 </span>
|
||||
<span class="co">#> f(0.5,2].(-0.999,2].(-1,2] 0.06623 0.08947 0.740 0.459 </span>
|
||||
<span class="co">#> f(-1,0.5].(-0.999,2].(-1,2]:d -0.79461 0.12407 -6.404 3.51e-10 ***</span>
|
||||
<span class="co">#> f(0.5,2].(-0.999,2].(-1,2]:d 0.87636 0.12813 6.840 2.35e-11 ***</span>
|
||||
<span class="co">#> f(-1,0.5].(-0.999,2].(-1,2] -0.09209 0.08434 -1.092 0.275 </span>
|
||||
<span class="co">#> f(0.5,2].(-0.999,2].(-1,2] 0.05627 0.08786 0.640 0.522 </span>
|
||||
<span class="co">#> f(-1,0.5].(-0.999,2].(-1,2]:d -0.90353 0.12559 -7.194 2.33e-12 ***</span>
|
||||
<span class="co">#> f(0.5,2].(-0.999,2].(-1,2]:d 0.95578 0.12769 7.485 3.28e-13 ***</span>
|
||||
<span class="co">#> ---</span>
|
||||
<span class="co">#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1</span>
|
||||
<span class="co">#> </span>
|
||||
<span class="co">#> Residual standard error: 0.9963 on 496 degrees of freedom</span>
|
||||
<span class="co">#> Multiple R-squared: 0.3003, Adjusted R-squared: 0.2947 </span>
|
||||
<span class="co">#> F-statistic: 53.22 on 4 and 496 DF, p-value: < 2.2e-16</span></code></pre></div>
|
||||
<span class="co">#> Residual standard error: 0.9979 on 496 degrees of freedom</span>
|
||||
<span class="co">#> Multiple R-squared: 0.3218, Adjusted R-squared: 0.3164 </span>
|
||||
<span class="co">#> F-statistic: 58.85 on 4 and 496 DF, p-value: < 2.2e-16</span></code></pre></div>
|
||||
<p>Sometimes we want a different level of complexity than that picked by CV. Either we can pre-specify which partition in the sequence that we want (using the <code>partition_i</code> parameter), or we can look at the sequence of objective function values and see where additional splits only provide marginal improvements.</p>
|
||||
<div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/paste.html">paste</a></span><span class="op">(</span><span class="st">"In-sample Objective function values: "</span>, <span class="fu"><a href="https://rdrr.io/r/base/paste.html">paste</a></span><span class="op">(</span><span class="va">est_part</span><span class="op">$</span><span class="va">is_obj_val_seq</span>, collapse<span class="op">=</span><span class="st">" "</span><span class="op">)</span><span class="op">)</span><span class="op">)</span>
|
||||
<span class="co">#> [1] "In-sample Objective function values: -0.00327762408600242 -1.0456589955051 -1.07936864222915 -1.13657424922757 -1.17891173047444 -1.22457907347801 -1.29651132376863 -1.40899836012424 -1.47955500597249 -1.56235757695552"</span></code></pre></div>
|
||||
<p>Compare this with the average treatment effect for the whole and estimation-only samples</p>
|
||||
<div class="sourceCode" id="cb9"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/paste.html">paste</a></span><span class="op">(</span><span class="st">"In-sample Objective function values: "</span>, <span class="fu"><a href="https://rdrr.io/r/base/paste.html">paste</a></span><span class="op">(</span><span class="va">est_part</span><span class="op">$</span><span class="va">is_obj_val_seq</span>, collapse<span class="op">=</span><span class="st">" "</span><span class="op">)</span><span class="op">)</span><span class="op">)</span>
|
||||
<span class="co">#> [1] "In-sample Objective function values: -0.00130792756957961 -0.860071145786862 -0.906298192191155 -0.974137597678458 -1.01692472825141 -1.05861007636975 -1.1137648149499 -1.16690858898675 -1.22036187891834 -1.23615779557238"</span></code></pre></div>
|
||||
<p>Compare this with the average treatment effect for the whole and estimation-only samples</p>
|
||||
<div class="sourceCode" id="cb10"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="va">est_part</span><span class="op">$</span><span class="va">full_stat_df</span>
|
||||
<span class="co">#> sample N_est param_ests var_ests tstats ci_u ci_l</span>
|
||||
<span class="co">#> N_tr all 1000 0.033383182 0.005710407 0.44176796 0.1816721 -0.1149057</span>
|
||||
<span class="co">#> N_es est 500 0.009358178 0.011304896 0.08801527 0.2182580 -0.1995417</span>
|
||||
<span class="co">#> sample N_est param_ests var_ests tstats ci_u ci_l</span>
|
||||
<span class="co">#> N_tr all 1000 0.03338318 0.005710407 0.4417680 0.1816721 -0.1149057</span>
|
||||
<span class="co">#> N_es est 500 0.03350558 0.011765447 0.3088965 0.2466182 -0.1796070</span>
|
||||
<span class="co">#> pval</span>
|
||||
<span class="co">#> N_tr 0.6587528</span>
|
||||
<span class="co">#> N_es 0.9298999</span></code></pre></div>
|
||||
<span class="co">#> N_es 0.7575295</span></code></pre></div>
|
||||
<p>How important are each of the dimensions of X for the objective function? We refit the model without each dimension and see the change in the objective function</p>
|
||||
<div class="sourceCode" id="cb10"><pre class="downlit sourceCode r">
|
||||
<div class="sourceCode" id="cb11"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="va">est_part</span><span class="op">$</span><span class="va">importance_weights</span>
|
||||
<span class="co">#> NULL</span></code></pre></div>
|
||||
<p>The first feature is the only one that is useful.</p>
|
||||
<p>Are there any interactions between the importances? (That is if we remove X1, does the importance of X2 change? This is done by dropping pairs of featurs at a time and see how they differ from single-feature droppings)</p>
|
||||
<div class="sourceCode" id="cb11"><pre class="downlit sourceCode r">
|
||||
<div class="sourceCode" id="cb12"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="va">est_part</span><span class="op">$</span><span class="va">interaction_weights</span>
|
||||
<span class="co">#> NULL</span></code></pre></div>
|
||||
<p>Essentially no.</p>
|
||||
<p>Get the observation-level estimated treatment effects.</p>
|
||||
<div class="sourceCode" id="cb12"><pre class="downlit sourceCode r">
|
||||
<div class="sourceCode" id="cb13"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="va">tau_hat</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/stats/predict.html">predict</a></span><span class="op">(</span><span class="va">est_part</span>, new_X<span class="op">=</span><span class="va">X</span><span class="op">)</span></code></pre></div>
|
||||
<p>With many estimates, we may wish to account for multiple testing when checking if “there are any negative (or positive) effects”</p>
|
||||
<div class="sourceCode" id="cb13"><pre class="downlit sourceCode r">
|
||||
<div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="va">any_neg</span> <span class="op">=</span> <span class="fu"><a href="../reference/test_any_sign_effect.html">test_any_sign_effect</a></span><span class="op">(</span><span class="va">est_part</span>, check_negative<span class="op">=</span><span class="cn">T</span><span class="op">)</span>
|
||||
<span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/paste.html">paste</a></span><span class="op">(</span><span class="st">"Adjusted 1-side p-values testing if negative:"</span>, <span class="fu"><a href="https://rdrr.io/r/base/paste.html">paste</a></span><span class="op">(</span><span class="va">any_neg</span><span class="op">$</span><span class="va">pval1s_fdr</span>, collapse<span class="op">=</span><span class="st">", "</span><span class="op">)</span><span class="op">)</span><span class="op">)</span>
|
||||
<span class="co">#> [1] "Adjusted 1-side p-values testing if negative: 1.95040604209589e-09, 0.999999999991214"</span></code></pre></div>
|
||||
<span class="co">#> [1] "Adjusted 1-side p-values testing if negative: 3.37983707236465e-12, 0.999999999998588"</span></code></pre></div>
|
||||
<p>Now let’s look at a case where there’s hereogeneity across all three dimensions.</p>
|
||||
<div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
|
||||
<div class="sourceCode" id="cb15"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="va">tau_3</span> <span class="op">=</span> <span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/integer.html">as.integer</a></span><span class="op">(</span><span class="va">X</span><span class="op">[</span>,<span class="fl">1</span><span class="op">]</span><span class="op">></span><span class="fl">0.5</span><span class="op">)</span><span class="op">*</span><span class="fl">2</span><span class="op">-</span><span class="fl">1</span><span class="op">)</span> <span class="op">+</span> <span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/integer.html">as.integer</a></span><span class="op">(</span><span class="va">X</span><span class="op">[</span>,<span class="fl">2</span><span class="op">]</span><span class="op">></span><span class="fl">0.5</span><span class="op">)</span><span class="op">*</span><span class="fl">2</span><span class="op">-</span><span class="fl">1</span><span class="op">)</span><span class="op">*</span><span class="fl">2</span> <span class="op">+</span> <span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/integer.html">as.integer</a></span><span class="op">(</span><span class="va">X</span><span class="op">[</span>,<span class="fl">3</span><span class="op">]</span><span class="op">></span><span class="fl">0.5</span><span class="op">)</span><span class="op">*</span><span class="fl">2</span><span class="op">-</span><span class="fl">1</span><span class="op">)</span><span class="op">*</span><span class="fl">3</span>
|
||||
<span class="va">y_3</span> <span class="op">=</span> <span class="va">d</span><span class="op">*</span><span class="va">tau_3</span> <span class="op">+</span> <span class="fu"><a href="https://rdrr.io/r/stats/Normal.html">rnorm</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">0</span>, <span class="va">err_sd</span><span class="op">)</span>
|
||||
<span class="va">est_part_3</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y_3</span>, <span class="va">X</span>, <span class="va">d</span>, breaks_per_dim<span class="op">=</span><span class="fl">5</span>, partition_i<span class="op">=</span><span class="fl">4</span><span class="op">)</span>
|
||||
<span class="fu"><a href="../reference/get_desc_df.estimated_partition.html">get_desc_df.estimated_partition</a></span><span class="op">(</span><span class="va">est_part_3</span><span class="op">)</span>
|
||||
<span class="fu"><a href="../reference/get_desc_df.html">get_desc_df</a></span><span class="op">(</span><span class="va">est_part_3</span><span class="op">)</span>
|
||||
<span class="co">#> X1 X2 X3 N_est param_ests pval</span>
|
||||
<span class="co">#> 1 <=0.4844432 <=0.5158526 <=0.4956977 62 -5.9629081 4.120118e-29</span>
|
||||
<span class="co">#> 2 >0.4844432 <=0.5158526 <=0.4956977 60 -3.9205417 1.021206e-21</span>
|
||||
<span class="co">#> 3 <=0.4844432 >0.5158526 <=0.4956977 65 -1.8856467 1.996540e-12</span>
|
||||
<span class="co">#> 4 >0.4844432 >0.5158526 <=0.4956977 64 -0.2638079 3.253642e-01</span>
|
||||
<span class="co">#> 5 <=0.4844432 <=0.5158526 >0.4956977 60 0.1919869 4.593925e-01</span>
|
||||
<span class="co">#> 6 >0.4844432 <=0.5158526 >0.4956977 69 1.9747946 1.272530e-09</span>
|
||||
<span class="co">#> 7 <=0.4844432 >0.5158526 >0.4956977 56 4.1635551 2.560607e-21</span>
|
||||
<span class="co">#> 8 >0.4844432 >0.5158526 >0.4956977 64 6.1160774 8.216963e-27</span></code></pre></div>
|
||||
<span class="co">#> 1 <=0.4844432 <=0.5158526 <=0.4956977 57 -6.0701075 2.650172e-30</span>
|
||||
<span class="co">#> 2 >0.4844432 <=0.5158526 <=0.4956977 59 -3.7226053 4.028245e-16</span>
|
||||
<span class="co">#> 3 <=0.4844432 >0.5158526 <=0.4956977 64 -1.9451039 1.181403e-10</span>
|
||||
<span class="co">#> 4 >0.4844432 >0.5158526 <=0.4956977 64 0.1690431 5.336151e-01</span>
|
||||
<span class="co">#> 5 <=0.4844432 <=0.5158526 >0.4956977 63 -0.6908844 3.478647e-02</span>
|
||||
<span class="co">#> 6 >0.4844432 <=0.5158526 >0.4956977 53 2.3977563 6.423076e-09</span>
|
||||
<span class="co">#> 7 <=0.4844432 >0.5158526 >0.4956977 72 3.8194994 1.243479e-24</span>
|
||||
<span class="co">#> 8 >0.4844432 >0.5158526 >0.4956977 68 6.1846918 6.820379e-36</span></code></pre></div>
|
||||
<p>One benefit of grid-based partitions is that you can view easily view 2D slices of full heterogeneity space.</p>
|
||||
<div class="sourceCode" id="cb15"><pre class="downlit sourceCode r">
|
||||
<div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="va">plts</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/graphics/plot.html">plot</a></span><span class="op">(</span><span class="va">est_part_3</span><span class="op">)</span>
|
||||
|
||||
<span class="fu"><a href="https://rdrr.io/pkg/gridExtra/man/arrangeGrob.html">grid.arrange</a></span><span class="op">(</span><span class="va">plts</span><span class="op">[[</span><span class="fl">1</span><span class="op">]</span><span class="op">]</span>, <span class="va">plts</span><span class="op">[[</span><span class="fl">2</span><span class="op">]</span><span class="op">]</span>, ncol<span class="op">=</span><span class="fl">2</span><span class="op">)</span></code></pre></div>
|
||||
<p><img src="vignette_files/figure-html/unnamed-chunk-15-1.png" width="576"></p>
|
||||
<p><img src="vignette_files/figure-html/unnamed-chunk-16-1.png" width="576"></p>
|
||||
</div>
|
||||
<div id="improving-the-partition" class="section level1">
|
||||
<h1 class="hasAnchor">
|
||||
<a href="#improving-the-partition" class="anchor"></a>Improving the partition</h1>
|
||||
<p>We can improve the partition by controlling for X’s (either local-linearly or global-flexibly) and using bootstrap “bumping”</p>
|
||||
<div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
|
||||
<div class="sourceCode" id="cb17"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="va">est_part_l</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y</span>, <span class="va">X</span>, <span class="va">d</span>, breaks_per_dim<span class="op">=</span><span class="fl">5</span>, ctrl_method <span class="op">=</span> <span class="st">"LassoCV"</span>, bump_samples <span class="op">=</span> <span class="fl">20</span>, partition_i<span class="op">=</span><span class="fl">2</span><span class="op">)</span></code></pre></div>
|
||||
<p><code>LassoCV</code> is a local-linear approach and we can use the global-flexible approach by setting <code>ctrl_method="RF"</code> for a random forest.</p>
|
||||
</div>
|
||||
|
@ -199,7 +205,7 @@
|
|||
<h1 class="hasAnchor">
|
||||
<a href="#parallel-processing" class="anchor"></a>Parallel-processing</h1>
|
||||
<p>Parallel-processing the outer-loops</p>
|
||||
<div class="sourceCode" id="cb17"><pre class="downlit sourceCode r">
|
||||
<div class="sourceCode" id="cb18"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="co">#library(parallel)</span>
|
||||
<span class="co">#cl <- makeCluster(getOption("cl.cores", default=3)) #see also detectCores()</span>
|
||||
<span class="co">#fit_res = fit_estimate_partition(..., pr_cl=cl)</span>
|
||||
|
@ -212,73 +218,73 @@
|
|||
<ol style="list-style-type: decimal">
|
||||
<li>Multiple outcomes, but same sample (single treatment)</li>
|
||||
</ol>
|
||||
<div class="sourceCode" id="cb18"><pre class="downlit sourceCode r">
|
||||
<div class="sourceCode" id="cb19"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="va">tau2</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/integer.html">as.integer</a></span><span class="op">(</span><span class="va">X</span><span class="op">[</span>,<span class="fl">2</span><span class="op">]</span><span class="op">></span><span class="fl">0.5</span><span class="op">)</span><span class="op">*</span><span class="fl">2</span><span class="op">-</span><span class="fl">1</span>
|
||||
<span class="va">y2_yM</span> <span class="op">=</span> <span class="va">d</span><span class="op">*</span><span class="va">tau2</span> <span class="op">+</span> <span class="fu"><a href="https://rdrr.io/r/stats/Normal.html">rnorm</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">0</span>, <span class="va">err_sd</span><span class="op">)</span>
|
||||
<span class="va">y_yM</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/cbind.html">cbind</a></span><span class="op">(</span><span class="va">y</span>, <span class="va">y2_yM</span><span class="op">)</span>
|
||||
<span class="va">est_part_yM</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y_yM</span>, <span class="va">X</span>, <span class="va">d</span>, breaks_per_dim<span class="op">=</span><span class="fl">5</span>, partition_i <span class="op">=</span> <span class="fl">3</span><span class="op">)</span>
|
||||
<span class="fu"><a href="../reference/get_desc_df.estimated_partition.html">get_desc_df.estimated_partition</a></span><span class="op">(</span><span class="va">est_part_yM</span><span class="op">)</span>
|
||||
<span class="fu"><a href="../reference/get_desc_df.html">get_desc_df</a></span><span class="op">(</span><span class="va">est_part_yM</span><span class="op">)</span>
|
||||
<span class="co">#> X1 X2 N_est1 N_est2 param_ests1 param_ests2 pval1</span>
|
||||
<span class="co">#> 1 <=0.4844432 <=0.5158526 117 117 -0.8049290 -0.7535126 6.329614e-05</span>
|
||||
<span class="co">#> 2 >0.4844432 <=0.5158526 118 118 0.8971755 -0.9804873 1.474554e-06</span>
|
||||
<span class="co">#> 3 <=0.4844432 >0.5158526 130 130 -0.6945345 0.8326545 7.255099e-05</span>
|
||||
<span class="co">#> 4 >0.4844432 >0.5158526 135 135 1.0975538 1.1039447 8.612765e-10</span>
|
||||
<span class="co">#> 1 <=0.4844432 <=0.5158526 130 130 -0.8150777 -1.0797322 1.661861e-05</span>
|
||||
<span class="co">#> 2 >0.4844432 <=0.5158526 118 118 0.7621012 -1.1433613 7.692081e-06</span>
|
||||
<span class="co">#> 3 <=0.4844432 >0.5158526 119 119 -1.0825123 0.9278330 1.023561e-08</span>
|
||||
<span class="co">#> 4 >0.4844432 >0.5158526 133 133 1.0348422 0.6970603 2.502101e-09</span>
|
||||
<span class="co">#> pval2</span>
|
||||
<span class="co">#> 1 3.570550e-06</span>
|
||||
<span class="co">#> 2 2.094914e-06</span>
|
||||
<span class="co">#> 3 3.361214e-06</span>
|
||||
<span class="co">#> 4 7.098278e-09</span></code></pre></div>
|
||||
<span class="co">#> 1 9.023729e-10</span>
|
||||
<span class="co">#> 2 2.241744e-07</span>
|
||||
<span class="co">#> 3 6.075092e-06</span>
|
||||
<span class="co">#> 4 1.456469e-04</span></code></pre></div>
|
||||
<ol start="2" style="list-style-type: decimal">
|
||||
<li>Multiple treatments, but same sample (single outcome)</li>
|
||||
</ol>
|
||||
<div class="sourceCode" id="cb19"><pre class="downlit sourceCode r">
|
||||
<div class="sourceCode" id="cb20"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="va">d2</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/stats/Binomial.html">rbinom</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">1</span>, <span class="fl">0.5</span><span class="op">)</span>
|
||||
<span class="va">d_dM</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/cbind.html">cbind</a></span><span class="op">(</span><span class="va">d</span>, <span class="va">d2</span><span class="op">)</span>
|
||||
<span class="va">y_dM</span> <span class="op">=</span> <span class="va">d</span><span class="op">*</span><span class="va">tau</span> <span class="op">+</span> <span class="va">d2</span><span class="op">*</span><span class="va">tau2</span> <span class="op">+</span> <span class="fu"><a href="https://rdrr.io/r/stats/Normal.html">rnorm</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">0</span>, <span class="va">err_sd</span><span class="op">)</span>
|
||||
<span class="va">est_part_dM</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y_dM</span>, <span class="va">X</span>, <span class="va">d_dM</span>, breaks_per_dim<span class="op">=</span><span class="fl">5</span>, partition_i <span class="op">=</span> <span class="fl">3</span><span class="op">)</span>
|
||||
<span class="fu"><a href="../reference/get_desc_df.estimated_partition.html">get_desc_df.estimated_partition</a></span><span class="op">(</span><span class="va">est_part_dM</span><span class="op">)</span>
|
||||
<span class="fu"><a href="../reference/get_desc_df.html">get_desc_df</a></span><span class="op">(</span><span class="va">est_part_dM</span><span class="op">)</span>
|
||||
<span class="co">#> X1 X2 N_est1 N_est2 param_ests1 param_ests2 pval1</span>
|
||||
<span class="co">#> 1 <=0.4844432 <=0.5158526 119 119 -0.7616140 -0.7161717 7.110389e-05</span>
|
||||
<span class="co">#> 2 >0.4844432 <=0.5158526 134 134 1.3065817 -0.8387614 8.582240e-10</span>
|
||||
<span class="co">#> 3 <=0.4844432 >0.5158526 127 127 -1.0672945 0.9263464 8.281467e-08</span>
|
||||
<span class="co">#> 4 >0.4844432 >0.5158526 120 120 0.8887389 1.2058123 1.896526e-05</span>
|
||||
<span class="co">#> 1 <=0.4844432 <=0.5158526 135 135 -0.9762579 -0.9840951 7.622683e-08</span>
|
||||
<span class="co">#> 2 >0.4844432 <=0.5158526 118 118 0.9325741 -0.7867792 5.249161e-06</span>
|
||||
<span class="co">#> 3 <=0.4844432 >0.5158526 116 116 -1.2541714 0.7402976 1.460617e-09</span>
|
||||
<span class="co">#> 4 >0.4844432 >0.5158526 131 131 1.1479998 0.8282607 1.158216e-08</span>
|
||||
<span class="co">#> pval2</span>
|
||||
<span class="co">#> 1 2.042986e-04</span>
|
||||
<span class="co">#> 2 3.964438e-05</span>
|
||||
<span class="co">#> 3 2.179978e-06</span>
|
||||
<span class="co">#> 4 7.681458e-09</span></code></pre></div>
|
||||
<span class="co">#> 1 5.822138e-08</span>
|
||||
<span class="co">#> 2 7.690174e-05</span>
|
||||
<span class="co">#> 3 1.333272e-04</span>
|
||||
<span class="co">#> 4 2.509196e-05</span></code></pre></div>
|
||||
<ol start="3" style="list-style-type: decimal">
|
||||
<li>Multiple separate samples, each having a single outcome and treatment</li>
|
||||
</ol>
|
||||
<div class="sourceCode" id="cb20"><pre class="downlit sourceCode r">
|
||||
<div class="sourceCode" id="cb21"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="va">y2_MM</span> <span class="op">=</span> <span class="va">d2</span><span class="op">*</span><span class="va">tau2</span> <span class="op">+</span> <span class="fu"><a href="https://rdrr.io/r/stats/Normal.html">rnorm</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">0</span>, <span class="va">err_sd</span><span class="op">)</span>
|
||||
<span class="va">y_MM</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span><span class="va">y</span>, <span class="va">y2_MM</span><span class="op">)</span>
|
||||
<span class="va">d_MM</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span><span class="va">d</span>, <span class="va">d2</span><span class="op">)</span>
|
||||
<span class="va">X_MM</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span><span class="va">X</span>, <span class="va">X</span><span class="op">)</span>
|
||||
<span class="va">est_part_MM</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y_MM</span>, <span class="va">X_MM</span>, <span class="va">d_MM</span>, breaks_per_dim<span class="op">=</span><span class="fl">5</span>, partition_i <span class="op">=</span> <span class="fl">3</span><span class="op">)</span>
|
||||
<span class="fu"><a href="../reference/get_desc_df.estimated_partition.html">get_desc_df.estimated_partition</a></span><span class="op">(</span><span class="va">est_part_MM</span><span class="op">)</span>
|
||||
<span class="fu"><a href="../reference/get_desc_df.html">get_desc_df</a></span><span class="op">(</span><span class="va">est_part_MM</span><span class="op">)</span>
|
||||
<span class="co">#> X1 X2 N_est1 N_est2 param_ests1 param_ests2 pval1</span>
|
||||
<span class="co">#> 1 <=0.4844432 <=0.5158526 125 109 -0.9005631 -0.871670 1.149034e-05</span>
|
||||
<span class="co">#> 2 >0.4844432 <=0.5158526 118 136 0.7295372 -1.076143 9.198309e-05</span>
|
||||
<span class="co">#> 3 <=0.4844432 >0.5158526 127 125 -0.8552986 1.297816 3.734232e-07</span>
|
||||
<span class="co">#> 4 >0.4844432 >0.5158526 130 130 1.0782197 1.161637 3.924776e-08</span>
|
||||
<span class="co">#> 1 <=0.4844432 <=0.5158526 130 124 -0.9043386 -0.8583915 6.680335e-06</span>
|
||||
<span class="co">#> 2 >0.4844432 <=0.5158526 122 126 0.7282198 -1.1698914 1.392059e-05</span>
|
||||
<span class="co">#> 3 <=0.4844432 >0.5158526 128 131 -1.0385968 1.4084632 3.053106e-09</span>
|
||||
<span class="co">#> 4 >0.4844432 >0.5158526 120 119 1.0489269 0.9796365 8.551001e-08</span>
|
||||
<span class="co">#> pval2</span>
|
||||
<span class="co">#> 1 1.198057e-05</span>
|
||||
<span class="co">#> 2 1.846102e-08</span>
|
||||
<span class="co">#> 3 1.459545e-10</span>
|
||||
<span class="co">#> 4 1.149917e-09</span></code></pre></div>
|
||||
<span class="co">#> 1 2.926864e-06</span>
|
||||
<span class="co">#> 2 2.092628e-08</span>
|
||||
<span class="co">#> 3 3.320067e-12</span>
|
||||
<span class="co">#> 4 2.845363e-06</span></code></pre></div>
|
||||
</div>
|
||||
<div id="mean-outcome-prediction" class="section level1">
|
||||
<h1 class="hasAnchor">
|
||||
<a href="#mean-outcome-prediction" class="anchor"></a>Mean-outcome prediction</h1>
|
||||
<div class="sourceCode" id="cb21"><pre class="downlit sourceCode r">
|
||||
<div class="sourceCode" id="cb22"><pre class="downlit sourceCode r">
|
||||
<code class="sourceCode R"><span class="va">alpha</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/integer.html">as.integer</a></span><span class="op">(</span><span class="va">X</span><span class="op">[</span>,<span class="fl">1</span><span class="op">]</span><span class="op">></span><span class="fl">0.5</span><span class="op">)</span><span class="op">*</span><span class="fl">2</span><span class="op">-</span><span class="fl">1</span> <span class="co">#true average outcome effect (just heterogeneous across X1)</span>
|
||||
<span class="va">y_y</span> <span class="op">=</span> <span class="va">alpha</span> <span class="op">+</span> <span class="fu"><a href="https://rdrr.io/r/stats/Normal.html">rnorm</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">0</span>, <span class="va">err_sd</span><span class="op">)</span> <span class="co">#outcome</span>
|
||||
<span class="va">est_part_y</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y_y</span>, <span class="va">X</span>, breaks_per_dim<span class="op">=</span><span class="fl">5</span>, partition_i<span class="op">=</span><span class="fl">2</span><span class="op">)</span>
|
||||
<span class="fu"><a href="../reference/get_desc_df.estimated_partition.html">get_desc_df.estimated_partition</a></span><span class="op">(</span><span class="va">est_part_y</span><span class="op">)</span>
|
||||
<span class="fu"><a href="../reference/get_desc_df.html">get_desc_df</a></span><span class="op">(</span><span class="va">est_part_y</span><span class="op">)</span>
|
||||
<span class="co">#> X1 N_est param_ests pval</span>
|
||||
<span class="co">#> 1 <=0.4844432 252 -0.9742096 4.803432e-39</span>
|
||||
<span class="co">#> 2 >0.4844432 248 0.9288032 8.498185e-29</span></code></pre></div>
|
||||
<span class="co">#> 1 <=0.4844432 247 -1.0304578 5.228963e-43</span>
|
||||
<span class="co">#> 2 >0.4844432 253 0.9447789 5.803037e-32</span></code></pre></div>
|
||||
</div>
|
||||
<div id="minor-things-to-add" class="section level1">
|
||||
<h1 class="hasAnchor">
|
||||
|
|
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 31 KiB |
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 18 KiB |
|
@ -68,9 +68,10 @@
|
|||
<div id="project" class="section level1">
|
||||
<div class="page-header"><h1 class="hasAnchor">
|
||||
<a href="#project" class="anchor"></a>Project</h1></div>
|
||||
<p>Tools for finding heterogeneous treatment effects (and means) based on partitioning the covariate/feature space via full cross-cuts and solved via greedy search. A typical usage would be analyzing and experiment to find the high-level subgroups (a coarse partition that is useful to humans) that differ in their estimated treatment effects.</p>
|
||||
<p>Tools for finding heterogeneous treatment effects (and means) based on partitioning the covariate/feature space via full cross-cuts and solved via greedy search. A typical usage would be analyzing an experiment to find the high-level subgroups (a coarse partition that is useful to humans) that differ in their estimated treatment effects.</p>
|
||||
<p>This package is inspired by, and uses ideas from, <a href="https://github.com/susanathey/causalTree">Causal Tree</a> but aims to have the partition be more interpretable and have better accuracy. It is slower, though for high-level partitions this is usually not an issue.</p>
|
||||
<p>This project is currently in an advanced prototype stage. Issues may still be found in common usage. Please create issues for these!</p>
|
||||
<p>Documentation can be found online <a href="https://microsoft.github.io/CausalGrid/index.html">here</a> (and in the package).</p>
|
||||
<div id="contributing" class="section level2">
|
||||
<h2 class="hasAnchor">
|
||||
<a href="#contributing" class="anchor"></a>Contributing</h2>
|
||||
|
|
|
@ -3,5 +3,5 @@ pkgdown: 1.6.1.9000
|
|||
pkgdown_sha: 84f234cf19153ce99bf1bcf875dc56248ddf0cbe
|
||||
articles:
|
||||
vignette: vignette.html
|
||||
last_built: 2021-01-26T16:10Z
|
||||
last_built: 2021-01-30T02:16Z
|
||||
|
||||
|
|
|
@ -159,6 +159,14 @@ should be normalized to have the same variance. With multiple core estimates see
|
|||
<th>partition_i</th>
|
||||
<td><p>partition_i - 1 is the last include in split_seq included in new partition</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>index_tr</th>
|
||||
<td><p>Split between train and estimate samples (default is to get from <code>fit</code>)</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>split_seq</th>
|
||||
<td><p>sequential list of splits (default is to get from <code>fit</code>)</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>est_plan</th>
|
||||
<td><p><a href='EstimatorPlan.html'>EstimatorPlan</a>.</p></td>
|
||||
|
|
|
@ -137,7 +137,7 @@
|
|||
</tr>
|
||||
<tr>
|
||||
<th>...</th>
|
||||
<td></td>
|
||||
<td><p>Additional arguments</p></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
|
|
|
@ -254,6 +254,10 @@ single - (smart) redo full fitting removing each possible dimension
|
|||
interaction - (smart) redo full fitting removing each pair of dimensions
|
||||
"" - Nothing</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>...</th>
|
||||
<td><p>Additional params.</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>x</th>
|
||||
<td><p>an R object</p></td>
|
||||
|
|
|
@ -249,6 +249,10 @@ If m_mode==DS.MULTI_SAMPLE then each item is a sublist with such bootstrap sampl
|
|||
<th>bump_ratio</th>
|
||||
<td><p>For bootstraps the ratio of sample size to sample (between 0 and 1, default 1)</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>...</th>
|
||||
<td><p>Additional params.</p></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Get descriptive data.frame for an estimated_partition — get_desc_df.estimated_partition • CausalGrid</title>
|
||||
<title>Get descriptive data.frame — get_desc_df.estimated_partition • CausalGrid</title>
|
||||
|
||||
<!-- jquery -->
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script>
|
||||
|
@ -36,9 +36,9 @@
|
|||
|
||||
|
||||
|
||||
<meta property="og:title" content="Get descriptive data.frame for an estimated_partition — get_desc_df.estimated_partition" />
|
||||
<meta property="og:title" content="Get descriptive data.frame — get_desc_df.estimated_partition" />
|
||||
|
||||
<meta property="og:description" content="Get statistics for each cell (feature boundary, and estimated cell stats)" />
|
||||
<meta property="og:description" content="Get information for each cell" />
|
||||
|
||||
|
||||
|
||||
|
@ -111,23 +111,27 @@
|
|||
<div class="row">
|
||||
<div class="col-md-9 contents">
|
||||
<div class="page-header">
|
||||
<h1>Get descriptive data.frame for an estimated_partition</h1>
|
||||
<h1>Get descriptive data.frame</h1>
|
||||
|
||||
<div class="hidden name"><code>get_desc_df.estimated_partition.Rd</code></div>
|
||||
</div>
|
||||
|
||||
<div class="ref-description">
|
||||
|
||||
<p>Get statistics for each cell (feature boundary, and estimated cell stats)</p>
|
||||
<p>Get information for each cell</p>
|
||||
|
||||
</div>
|
||||
|
||||
<pre class="usage"><span class='fu'>get_desc_df.estimated_partition</span><span class='op'>(</span>
|
||||
<pre class="usage"><span class='co'># S3 method for estimated_partition</span>
|
||||
<span class='fu'><a href='get_desc_df.html'>get_desc_df</a></span><span class='op'>(</span>
|
||||
<span class='va'>obj</span>,
|
||||
cont_bounds_inf <span class='op'>=</span> <span class='cn'>TRUE</span>,
|
||||
do_str <span class='op'>=</span> <span class='cn'>TRUE</span>,
|
||||
drop_unsplit <span class='op'>=</span> <span class='cn'>TRUE</span>,
|
||||
digits <span class='op'>=</span> <span class='cn'>NULL</span>,
|
||||
import_order <span class='op'>=</span> <span class='cn'>FALSE</span>
|
||||
unsplit_cat_star <span class='op'>=</span> <span class='cn'>TRUE</span>,
|
||||
import_order <span class='op'>=</span> <span class='cn'>FALSE</span>,
|
||||
<span class='va'>...</span>
|
||||
<span class='op'>)</span></pre>
|
||||
|
||||
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
|
||||
|
@ -135,30 +139,48 @@
|
|||
<colgroup><col class="name" /><col class="desc" /></colgroup>
|
||||
<tr>
|
||||
<th>obj</th>
|
||||
<td><p>estimated_partition object</p></td>
|
||||
<td><p>partition object</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>cont_bounds_inf</th>
|
||||
<td><p>Should "Inf" be used for continuous bounds (otherwise
|
||||
the bounds from X_range)</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>do_str</th>
|
||||
<td><p>If True, use a string like "(a, b]", otherwise have two separate columns with a and b</p></td>
|
||||
<td><p>If True, use a string like "(a, b]", otherwise have two
|
||||
separate columns with a and b</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>drop_unsplit</th>
|
||||
<td><p>If True, drop columns for variables overwhich the partition did not split</p></td>
|
||||
<td><p>If True, drop columns for variables overwhich the
|
||||
partition did not split</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>digits</th>
|
||||
<td><p>digits Option (default is NULL)</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>unsplit_cat_star</th>
|
||||
<td><p>Should unsplit categorical variables be listed as
|
||||
"*", otherwise all factor labels will be used.</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>import_order</th>
|
||||
<td><p>Should we use importance ordering (most important on the left) or input ordering (default) for features.
|
||||
Rows will be ordered so that the right-most will change most frequently.</p></td>
|
||||
<td><p>Whether should use importance ordering
|
||||
(most important on the left) or input ordering (default) for features. Rows
|
||||
will be ordered so that the right-most will change most frequently.</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>...</th>
|
||||
<td><p>Additional arguments.</p></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
|
||||
|
||||
<p>data.frame with columns: partitionin columns, N_est, param_ests, pval per estimate</p>
|
||||
<p>data.frame with columns: partitioning columns, N_est, param_ests,
|
||||
pval per estimate</p>
|
||||
|
||||
|
||||
</div>
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Get descriptive data.frame for grid_partition — get_desc_df.grid_partition • CausalGrid</title>
|
||||
<title>Get descriptive data.frame — get_desc_df.grid_partition • CausalGrid</title>
|
||||
|
||||
<!-- jquery -->
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script>
|
||||
|
@ -36,9 +36,9 @@
|
|||
|
||||
|
||||
|
||||
<meta property="og:title" content="Get descriptive data.frame for grid_partition — get_desc_df.grid_partition" />
|
||||
<meta property="og:title" content="Get descriptive data.frame — get_desc_df.grid_partition" />
|
||||
|
||||
<meta property="og:description" content="A dataset with rows for each cell and columns defining partitioning" />
|
||||
<meta property="og:description" content="Get information for each cell" />
|
||||
|
||||
|
||||
|
||||
|
@ -111,58 +111,68 @@
|
|||
<div class="row">
|
||||
<div class="col-md-9 contents">
|
||||
<div class="page-header">
|
||||
<h1>Get descriptive data.frame for grid_partition</h1>
|
||||
<h1>Get descriptive data.frame</h1>
|
||||
|
||||
<div class="hidden name"><code>get_desc_df.grid_partition.Rd</code></div>
|
||||
</div>
|
||||
|
||||
<div class="ref-description">
|
||||
|
||||
<p>A dataset with rows for each cell and columns defining partitioning</p>
|
||||
<p>Get information for each cell</p>
|
||||
|
||||
</div>
|
||||
|
||||
<pre class="usage"><span class='fu'>get_desc_df.grid_partition</span><span class='op'>(</span>
|
||||
<span class='va'>partition</span>,
|
||||
<pre class="usage"><span class='co'># S3 method for grid_partition</span>
|
||||
<span class='fu'><a href='get_desc_df.html'>get_desc_df</a></span><span class='op'>(</span>
|
||||
<span class='va'>obj</span>,
|
||||
cont_bounds_inf <span class='op'>=</span> <span class='cn'>TRUE</span>,
|
||||
do_str <span class='op'>=</span> <span class='cn'>FALSE</span>,
|
||||
drop_unsplit <span class='op'>=</span> <span class='cn'>FALSE</span>,
|
||||
digits <span class='op'>=</span> <span class='cn'>NULL</span>,
|
||||
unsplit_cat_star <span class='op'>=</span> <span class='cn'>TRUE</span>
|
||||
unsplit_cat_star <span class='op'>=</span> <span class='cn'>TRUE</span>,
|
||||
<span class='va'>...</span>
|
||||
<span class='op'>)</span></pre>
|
||||
|
||||
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
|
||||
<table class="ref-arguments">
|
||||
<colgroup><col class="name" /><col class="desc" /></colgroup>
|
||||
<tr>
|
||||
<th>partition</th>
|
||||
<td><p>Partition</p></td>
|
||||
<th>obj</th>
|
||||
<td><p>partition object</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>cont_bounds_inf</th>
|
||||
<td><p>If True, will put continuous bounds as -Inf/Inf. Otherwise will use X_range bounds</p></td>
|
||||
<td><p>Should "Inf" be used for continuous bounds (otherwise
|
||||
the bounds from X_range)</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>do_str</th>
|
||||
<td><p>If True, use a string like "(a, b]", otherwise have two separate columns with a and b</p></td>
|
||||
<td><p>If True, use a string like "(a, b]", otherwise have two
|
||||
separate columns with a and b</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>drop_unsplit</th>
|
||||
<td><p>If True, drop columns for variables overwhich the partition did not split</p></td>
|
||||
<td><p>If True, drop columns for variables overwhich the
|
||||
partition did not split</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>digits</th>
|
||||
<td><p>digits option</p></td>
|
||||
<td><p>digits Option (default is NULL)</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>unsplit_cat_star</th>
|
||||
<td><p>if we don't split on a categorical var, should we show as "*" (otherwise list all levels)</p></td>
|
||||
<td><p>Should unsplit categorical variables be listed as
|
||||
"*", otherwise all factor labels will be used.</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>...</th>
|
||||
<td><p>Additional arguments.</p></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
|
||||
|
||||
<p>data.frame</p>
|
||||
<p>data.frame with columns: partitioning columns</p>
|
||||
|
||||
|
||||
</div>
|
||||
|
|
|
@ -0,0 +1,204 @@
|
|||
<!-- Generated by pkgdown: do not edit by hand -->
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Get descriptive data.frame — get_desc_df • CausalGrid</title>
|
||||
|
||||
<!-- jquery -->
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script>
|
||||
<!-- Bootstrap -->
|
||||
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous" />
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script>
|
||||
|
||||
<!-- bootstrap-toc -->
|
||||
<link rel="stylesheet" href="../bootstrap-toc.css">
|
||||
<script src="../bootstrap-toc.js"></script>
|
||||
|
||||
<!-- Font Awesome icons -->
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous" />
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous" />
|
||||
|
||||
<!-- clipboard.js -->
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script>
|
||||
|
||||
<!-- headroom.js -->
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script>
|
||||
|
||||
<!-- pkgdown -->
|
||||
<link href="../pkgdown.css" rel="stylesheet">
|
||||
<script src="../pkgdown.js"></script>
|
||||
|
||||
|
||||
|
||||
<meta property="og:title" content="Get descriptive data.frame — get_desc_df" />
|
||||
|
||||
<meta property="og:description" content="Get information for each cell" />
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<!-- mathjax -->
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script>
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
|
||||
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
|
||||
|
||||
</head>
|
||||
|
||||
<body data-spy="scroll" data-target="#toc">
|
||||
<div class="container template-reference-topic">
|
||||
<header>
|
||||
<div class="navbar navbar-default navbar-fixed-top" role="navigation">
|
||||
<div class="container">
|
||||
<div class="navbar-header">
|
||||
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
|
||||
<span class="sr-only">Toggle navigation</span>
|
||||
<span class="icon-bar"></span>
|
||||
<span class="icon-bar"></span>
|
||||
<span class="icon-bar"></span>
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="../index.html">CausalGrid</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.2</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div id="navbar" class="navbar-collapse collapse">
|
||||
<ul class="nav navbar-nav">
|
||||
<li>
|
||||
<a href="../reference/index.html">Reference</a>
|
||||
</li>
|
||||
<li class="dropdown">
|
||||
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
|
||||
Articles
|
||||
|
||||
<span class="caret"></span>
|
||||
</a>
|
||||
<ul class="dropdown-menu" role="menu">
|
||||
<li>
|
||||
<a href="../articles/vignette.html">High-Level Analysis</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<ul class="nav navbar-nav navbar-right">
|
||||
|
||||
</ul>
|
||||
|
||||
</div><!--/.nav-collapse -->
|
||||
</div><!--/.container -->
|
||||
</div><!--/.navbar -->
|
||||
|
||||
|
||||
|
||||
</header>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-md-9 contents">
|
||||
<div class="page-header">
|
||||
<h1>Get descriptive data.frame</h1>
|
||||
|
||||
<div class="hidden name"><code>get_desc_df.Rd</code></div>
|
||||
</div>
|
||||
|
||||
<div class="ref-description">
|
||||
|
||||
<p>Get information for each cell</p>
|
||||
|
||||
</div>
|
||||
|
||||
<pre class="usage"><span class='fu'>get_desc_df</span><span class='op'>(</span>
|
||||
<span class='va'>obj</span>,
|
||||
cont_bounds_inf <span class='op'>=</span> <span class='cn'>TRUE</span>,
|
||||
do_str <span class='op'>=</span> <span class='cn'>FALSE</span>,
|
||||
drop_unsplit <span class='op'>=</span> <span class='cn'>FALSE</span>,
|
||||
digits <span class='op'>=</span> <span class='cn'>NULL</span>,
|
||||
unsplit_cat_star <span class='op'>=</span> <span class='cn'>TRUE</span>,
|
||||
<span class='va'>...</span>
|
||||
<span class='op'>)</span></pre>
|
||||
|
||||
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
|
||||
<table class="ref-arguments">
|
||||
<colgroup><col class="name" /><col class="desc" /></colgroup>
|
||||
<tr>
|
||||
<th>obj</th>
|
||||
<td><p>partition object</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>cont_bounds_inf</th>
|
||||
<td><p>Should "Inf" be used for continuous bounds (otherwise
|
||||
the bounds from X_range)</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>do_str</th>
|
||||
<td><p>If True, use a string like "(a, b]", otherwise have two
|
||||
separate columns with a and b</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>drop_unsplit</th>
|
||||
<td><p>If True, drop columns for variables overwhich the
|
||||
partition did not split</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>digits</th>
|
||||
<td><p>digits Option (default is NULL)</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>unsplit_cat_star</th>
|
||||
<td><p>Should unsplit categorical variables be listed as
|
||||
"*", otherwise all factor labels will be used.</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>...</th>
|
||||
<td><p>Additional arguments.</p></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
|
||||
|
||||
<p>data.frame with columns: partitioning columns</p>
|
||||
|
||||
|
||||
</div>
|
||||
<div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
|
||||
<nav id="toc" data-toggle="toc" class="sticky-top">
|
||||
<h2 data-toc-skip>Contents</h2>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<footer>
|
||||
<div class="copyright">
|
||||
<p>Developed by Brian Quistorff.</p>
|
||||
</div>
|
||||
|
||||
<div class="pkgdown">
|
||||
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.6.1.9000.</p>
|
||||
</div>
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
|
@ -39,7 +39,7 @@
|
|||
<meta property="og:title" content="Create a null grid_partition — grid_partition" />
|
||||
|
||||
<meta property="og:description" content="Create a empty partition. Splits can be added using add_partition_split.
|
||||
Information about a split can be retrieved using num_cells, get_desc_df.grid_partition and print
|
||||
Information about a split can be retrieved using num_cells, get_desc_df and print
|
||||
With data, one can determine the cell for each observation using predict
|
||||
Test whether an object is an grid_function" />
|
||||
|
||||
|
@ -114,7 +114,7 @@ Test whether an object is an grid_function" />
|
|||
<div class="row">
|
||||
<div class="col-md-9 contents">
|
||||
<div class="page-header">
|
||||
<h1>Create a null grid_partition</h1>
|
||||
<h1>Create a null <code>grid_partition</code></h1>
|
||||
|
||||
<div class="hidden name"><code>grid_partition.Rd</code></div>
|
||||
</div>
|
||||
|
@ -122,7 +122,7 @@ Test whether an object is an grid_function" />
|
|||
<div class="ref-description">
|
||||
|
||||
<p>Create a empty partition. Splits can be added using <code><a href='add_partition_split.html'>add_partition_split</a></code>.
|
||||
Information about a split can be retrieved using <code><a href='num_cells.estimated_partition.html'>num_cells</a></code>, <code><a href='get_desc_df.grid_partition.html'>get_desc_df.grid_partition</a></code> and <code><a href='https://rdrr.io/r/base/print.html'>print</a></code>
|
||||
Information about a split can be retrieved using <code><a href='num_cells.estimated_partition.html'>num_cells</a></code>, <code><a href='get_desc_df.html'>get_desc_df</a></code> and <code><a href='https://rdrr.io/r/base/print.html'>print</a></code>
|
||||
With data, one can determine the cell for each observation using <code><a href='https://rdrr.io/r/stats/predict.html'>predict</a></code></p>
|
||||
<p>Test whether an object is an <code>grid_function</code></p>
|
||||
|
||||
|
|
|
@ -150,12 +150,6 @@
|
|||
<td><p>Change the complexity of a fit_estimate_partition</p></td>
|
||||
</tr><tr>
|
||||
|
||||
<td>
|
||||
<p><code><a href="const_vectr.html">const_vectr()</a></code> </p>
|
||||
</td>
|
||||
<td><p>inline', second just Rcpp), but couldn't get to work in building a package.</p></td>
|
||||
</tr><tr>
|
||||
|
||||
<td>
|
||||
<p><code><a href="estimated_partition.html">estimated_partition()</a></code> </p>
|
||||
</td>
|
||||
|
@ -205,15 +199,21 @@
|
|||
</tr><tr>
|
||||
|
||||
<td>
|
||||
<p><code><a href="get_desc_df.estimated_partition.html">get_desc_df.estimated_partition()</a></code> </p>
|
||||
<p><code><a href="get_desc_df.estimated_partition.html">get_desc_df(<i><estimated_partition></i>)</a></code> </p>
|
||||
</td>
|
||||
<td><p>Get descriptive data.frame for an estimated_partition</p></td>
|
||||
<td><p>Get descriptive data.frame</p></td>
|
||||
</tr><tr>
|
||||
|
||||
<td>
|
||||
<p><code><a href="get_desc_df.grid_partition.html">get_desc_df.grid_partition()</a></code> </p>
|
||||
<p><code><a href="get_desc_df.grid_partition.html">get_desc_df(<i><grid_partition></i>)</a></code> </p>
|
||||
</td>
|
||||
<td><p>Get descriptive data.frame for grid_partition</p></td>
|
||||
<td><p>Get descriptive data.frame</p></td>
|
||||
</tr><tr>
|
||||
|
||||
<td>
|
||||
<p><code><a href="get_desc_df.html">get_desc_df()</a></code> </p>
|
||||
</td>
|
||||
<td><p>Get descriptive data.frame</p></td>
|
||||
</tr><tr>
|
||||
|
||||
<td>
|
||||
|
@ -237,13 +237,7 @@
|
|||
<td>
|
||||
<p><code><a href="grid_partition.html">grid_partition()</a></code> <code><a href="grid_partition.html">is_grid_partition()</a></code> </p>
|
||||
</td>
|
||||
<td><p>Create a null grid_partition</p></td>
|
||||
</tr><tr>
|
||||
|
||||
<td>
|
||||
<p><code><a href="grid_partition_split.html">is_grid_partition_split()</a></code> </p>
|
||||
</td>
|
||||
<td><p>Is grid_partition_split</p></td>
|
||||
<td><p>Create a null <code>grid_partition</code></p></td>
|
||||
</tr><tr>
|
||||
|
||||
<td>
|
||||
|
@ -265,7 +259,7 @@
|
|||
</tr><tr>
|
||||
|
||||
<td>
|
||||
<p><code><a href="partition_split.html">partition_split()</a></code> </p>
|
||||
<p><code><a href="partition_split.html">partition_split()</a></code> <code><a href="partition_split.html">is_partition_split()</a></code> </p>
|
||||
</td>
|
||||
<td><p>Create partition_split</p></td>
|
||||
</tr><tr>
|
||||
|
|
|
@ -38,7 +38,8 @@
|
|||
|
||||
<meta property="og:title" content="Create partition_split — partition_split" />
|
||||
|
||||
<meta property="og:description" content="Describes a single partition split. Used with add_partition_split." />
|
||||
<meta property="og:description" content="Describes a single partition split. Used with add_partition_split.
|
||||
Tests whether or not an object is a partition_split." />
|
||||
|
||||
|
||||
|
||||
|
@ -119,10 +120,13 @@
|
|||
<div class="ref-description">
|
||||
|
||||
<p>Describes a single partition split. Used with <code><a href='add_partition_split.html'>add_partition_split</a></code>.</p>
|
||||
<p>Tests whether or not an object is a <code>partition_split</code>.</p>
|
||||
|
||||
</div>
|
||||
|
||||
<pre class="usage"><span class='fu'>partition_split</span><span class='op'>(</span><span class='va'>k</span>, <span class='va'>X_k_cut</span><span class='op'>)</span></pre>
|
||||
<pre class="usage"><span class='fu'>partition_split</span><span class='op'>(</span><span class='va'>k</span>, <span class='va'>X_k_cut</span><span class='op'>)</span>
|
||||
|
||||
<span class='fu'>is_partition_split</span><span class='op'>(</span><span class='va'>x</span><span class='op'>)</span></pre>
|
||||
|
||||
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
|
||||
<table class="ref-arguments">
|
||||
|
@ -135,11 +139,23 @@
|
|||
<th>X_k_cut</th>
|
||||
<td><p>cut value</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>x</th>
|
||||
<td><p>an R object</p></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
|
||||
|
||||
<p>Partition Split</p>
|
||||
<p>Boolean</p>
|
||||
|
||||
<h2 class="hasAnchor" id="functions"><a class="anchor" href="#functions"></a>Functions</h2>
|
||||
|
||||
|
||||
<ul>
|
||||
<li><p><code>is_partition_split</code>: is partition_split</p></li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
|
|
|
@ -123,19 +123,23 @@
|
|||
</div>
|
||||
|
||||
<pre class="usage"><span class='co'># S3 method for estimated_partition</span>
|
||||
<span class='fu'><a href='https://rdrr.io/r/graphics/plot.html'>plot</a></span><span class='op'>(</span><span class='va'>grid_fit</span>, X_names_2D <span class='op'>=</span> <span class='cn'>NULL</span><span class='op'>)</span></pre>
|
||||
<span class='fu'><a href='https://rdrr.io/r/graphics/plot.html'>plot</a></span><span class='op'>(</span><span class='va'>x</span>, X_names_2D <span class='op'>=</span> <span class='cn'>NULL</span>, <span class='va'>...</span><span class='op'>)</span></pre>
|
||||
|
||||
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
|
||||
<table class="ref-arguments">
|
||||
<colgroup><col class="name" /><col class="desc" /></colgroup>
|
||||
<tr>
|
||||
<th>grid_fit</th>
|
||||
<th>x</th>
|
||||
<td><p>grid_fit</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>X_names_2D</th>
|
||||
<td><p>X_names_2D</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>...</th>
|
||||
<td><p>Additional arguments. Unused.</p></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
|
||||
|
|
|
@ -123,13 +123,13 @@
|
|||
</div>
|
||||
|
||||
<pre class="usage"><span class='co'># S3 method for estimated_partition</span>
|
||||
<span class='fu'><a href='https://rdrr.io/r/stats/predict.html'>predict</a></span><span class='op'>(</span><span class='va'>obj</span>, <span class='va'>new_X</span>, new_d <span class='op'>=</span> <span class='cn'>NULL</span>, type <span class='op'>=</span> <span class='st'>"effect"</span><span class='op'>)</span></pre>
|
||||
<span class='fu'><a href='https://rdrr.io/r/stats/predict.html'>predict</a></span><span class='op'>(</span><span class='va'>object</span>, <span class='va'>new_X</span>, new_d <span class='op'>=</span> <span class='cn'>NULL</span>, type <span class='op'>=</span> <span class='st'>"effect"</span>, <span class='va'>...</span><span class='op'>)</span></pre>
|
||||
|
||||
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
|
||||
<table class="ref-arguments">
|
||||
<colgroup><col class="name" /><col class="desc" /></colgroup>
|
||||
<tr>
|
||||
<th>obj</th>
|
||||
<th>object</th>
|
||||
<td><p>estimated_partition object</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
@ -144,6 +144,10 @@
|
|||
<th>type</th>
|
||||
<td><p>"effect" or "outcome" (currently not implemented)</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>...</th>
|
||||
<td><p>Additional arguments. Unused.</p></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
|
||||
|
|
|
@ -125,11 +125,15 @@ then we will return NA unless you provide and updated X_range.</p>
|
|||
</div>
|
||||
|
||||
<pre class="usage"><span class='co'># S3 method for grid_partition</span>
|
||||
<span class='fu'><a href='https://rdrr.io/r/stats/predict.html'>predict</a></span><span class='op'>(</span><span class='va'>obj</span>, <span class='va'>X</span>, X_range <span class='op'>=</span> <span class='cn'>NULL</span><span class='op'>)</span></pre>
|
||||
<span class='fu'><a href='https://rdrr.io/r/stats/predict.html'>predict</a></span><span class='op'>(</span><span class='va'>object</span>, <span class='va'>X</span>, X_range <span class='op'>=</span> <span class='cn'>NULL</span>, <span class='va'>...</span><span class='op'>)</span></pre>
|
||||
|
||||
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
|
||||
<table class="ref-arguments">
|
||||
<colgroup><col class="name" /><col class="desc" /></colgroup>
|
||||
<tr>
|
||||
<th>object</th>
|
||||
<td><p>partition</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>X</th>
|
||||
<td><p>X data or list of X</p></td>
|
||||
|
@ -139,8 +143,8 @@ then we will return NA unless you provide and updated X_range.</p>
|
|||
<td><p>(Optional) overrides the partition$X_range</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>partition</th>
|
||||
<td><p>partition</p></td>
|
||||
<th>...</th>
|
||||
<td><p>Additional arguments. Unused.</p></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
<meta property="og:title" content="Print estimated_partition — print.estimated_partition" />
|
||||
|
||||
<meta property="og:description" content="Print a summary of the estimated partition. Uses get_desc_df.estimated_partition" />
|
||||
<meta property="og:description" content="Print a summary of the estimated partition. Uses get_desc_df" />
|
||||
|
||||
|
||||
|
||||
|
@ -118,7 +118,7 @@
|
|||
|
||||
<div class="ref-description">
|
||||
|
||||
<p>Print a summary of the estimated partition. Uses <code><a href='get_desc_df.estimated_partition.html'>get_desc_df.estimated_partition</a></code></p>
|
||||
<p>Print a summary of the estimated partition. Uses <code><a href='get_desc_df.html'>get_desc_df</a></code></p>
|
||||
|
||||
</div>
|
||||
|
||||
|
@ -141,11 +141,13 @@
|
|||
</tr>
|
||||
<tr>
|
||||
<th>do_str</th>
|
||||
<td><p>If True, use a string like "(a, b]", otherwise have two separate columns with a and b</p></td>
|
||||
<td><p>If True, use a string like "(a, b]", otherwise have two
|
||||
separate columns with a and b</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>drop_unsplit</th>
|
||||
<td><p>If True, drop columns for variables overwhich the partition did not split</p></td>
|
||||
<td><p>If True, drop columns for variables overwhich the
|
||||
partition did not split</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>digits</th>
|
||||
|
@ -153,8 +155,8 @@
|
|||
</tr>
|
||||
<tr>
|
||||
<th>import_order</th>
|
||||
<td><p>Should we use importance ordering (most important on the left) or input ordering (default) for features.
|
||||
Rows will be ordered so that the right-most will change most frequently.</p></td>
|
||||
<td><p>Whether should use importance ordering
|
||||
(most important on the left) or input ordering (default) for features.</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>...</th>
|
||||
|
|
|
@ -27,6 +27,10 @@ should be normalized to have the same variance. With multiple core estimates see
|
|||
|
||||
\item{partition_i}{partition_i - 1 is the last include in split_seq included in new partition}
|
||||
|
||||
\item{index_tr}{Split between train and estimate samples (default is to get from \code{fit})}
|
||||
|
||||
\item{split_seq}{sequential list of splits (default is to get from \code{fit})}
|
||||
|
||||
\item{est_plan}{\link{EstimatorPlan}.}
|
||||
}
|
||||
\value{
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/utils.R
|
||||
\name{const_vectr}
|
||||
\alias{const_vectr}
|
||||
\title{inline', second just Rcpp), but couldn't get to work in building a package.}
|
||||
\usage{
|
||||
const_vectr(x)
|
||||
}
|
||||
\description{
|
||||
inline', second just Rcpp), but couldn't get to work in building a package.
|
||||
}
|
|
@ -11,7 +11,7 @@ estimated_partition(partition, cell_stats, ...)
|
|||
|
||||
\item{cell_stats}{cell_stats}
|
||||
|
||||
\item{...}{}
|
||||
\item{...}{Additional arguments}
|
||||
}
|
||||
\value{
|
||||
object of class estimated_partition
|
||||
|
|
|
@ -93,6 +93,8 @@ single - (smart) redo full fitting removing each possible dimension
|
|||
interaction - (smart) redo full fitting removing each pair of dimensions
|
||||
"" - Nothing}
|
||||
|
||||
\item{...}{Additional params.}
|
||||
|
||||
\item{x}{an R object}
|
||||
}
|
||||
\value{
|
||||
|
|
|
@ -86,6 +86,8 @@ for treated and controls.}
|
|||
If m_mode==DS.MULTI_SAMPLE then each item is a sublist with such bootstrap samples over each dataset.}
|
||||
|
||||
\item{bump_ratio}{For bootstraps the ratio of sample size to sample (between 0 and 1, default 1)}
|
||||
|
||||
\item{...}{Additional params.}
|
||||
}
|
||||
\value{
|
||||
An object.
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/utils.R
|
||||
\name{get_desc_df}
|
||||
\alias{get_desc_df}
|
||||
\title{Get descriptive data.frame}
|
||||
\usage{
|
||||
get_desc_df(
|
||||
obj,
|
||||
cont_bounds_inf = TRUE,
|
||||
do_str = FALSE,
|
||||
drop_unsplit = FALSE,
|
||||
digits = NULL,
|
||||
unsplit_cat_star = TRUE,
|
||||
...
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{obj}{partition object}
|
||||
|
||||
\item{cont_bounds_inf}{Should "Inf" be used for continuous bounds (otherwise
|
||||
the bounds from X_range)}
|
||||
|
||||
\item{do_str}{If True, use a string like "(a, b]", otherwise have two
|
||||
separate columns with a and b}
|
||||
|
||||
\item{drop_unsplit}{If True, drop columns for variables overwhich the
|
||||
partition did not split}
|
||||
|
||||
\item{digits}{digits Option (default is NULL)}
|
||||
|
||||
\item{unsplit_cat_star}{Should unsplit categorical variables be listed as
|
||||
"*", otherwise all factor labels will be used.}
|
||||
|
||||
\item{...}{Additional arguments.}
|
||||
}
|
||||
\value{
|
||||
data.frame with columns: partitioning columns
|
||||
}
|
||||
\description{
|
||||
Get information for each cell
|
||||
}
|
|
@ -2,31 +2,46 @@
|
|||
% Please edit documentation in R/fit_estimate.R
|
||||
\name{get_desc_df.estimated_partition}
|
||||
\alias{get_desc_df.estimated_partition}
|
||||
\title{Get descriptive data.frame for an estimated_partition}
|
||||
\title{Get descriptive data.frame}
|
||||
\usage{
|
||||
get_desc_df.estimated_partition(
|
||||
\method{get_desc_df}{estimated_partition}(
|
||||
obj,
|
||||
cont_bounds_inf = TRUE,
|
||||
do_str = TRUE,
|
||||
drop_unsplit = TRUE,
|
||||
digits = NULL,
|
||||
import_order = FALSE
|
||||
unsplit_cat_star = TRUE,
|
||||
import_order = FALSE,
|
||||
...
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{obj}{estimated_partition object}
|
||||
\item{obj}{partition object}
|
||||
|
||||
\item{do_str}{If True, use a string like "(a, b]", otherwise have two separate columns with a and b}
|
||||
\item{cont_bounds_inf}{Should "Inf" be used for continuous bounds (otherwise
|
||||
the bounds from X_range)}
|
||||
|
||||
\item{drop_unsplit}{If True, drop columns for variables overwhich the partition did not split}
|
||||
\item{do_str}{If True, use a string like "(a, b]", otherwise have two
|
||||
separate columns with a and b}
|
||||
|
||||
\item{drop_unsplit}{If True, drop columns for variables overwhich the
|
||||
partition did not split}
|
||||
|
||||
\item{digits}{digits Option (default is NULL)}
|
||||
|
||||
\item{import_order}{Should we use importance ordering (most important on the left) or input ordering (default) for features.
|
||||
Rows will be ordered so that the right-most will change most frequently.}
|
||||
\item{unsplit_cat_star}{Should unsplit categorical variables be listed as
|
||||
"*", otherwise all factor labels will be used.}
|
||||
|
||||
\item{import_order}{Whether should use importance ordering
|
||||
(most important on the left) or input ordering (default) for features. Rows
|
||||
will be ordered so that the right-most will change most frequently.}
|
||||
|
||||
\item{...}{Additional arguments.}
|
||||
}
|
||||
\value{
|
||||
data.frame with columns: partitionin columns, {N_est, param_ests, pval} per estimate
|
||||
data.frame with columns: partitioning columns, {N_est, param_ests,
|
||||
pval} per estimate
|
||||
}
|
||||
\description{
|
||||
Get statistics for each cell (feature boundary, and estimated cell stats)
|
||||
Get information for each cell
|
||||
}
|
||||
|
|
|
@ -2,33 +2,40 @@
|
|||
% Please edit documentation in R/grid_partition.R
|
||||
\name{get_desc_df.grid_partition}
|
||||
\alias{get_desc_df.grid_partition}
|
||||
\title{Get descriptive data.frame for grid_partition}
|
||||
\title{Get descriptive data.frame}
|
||||
\usage{
|
||||
get_desc_df.grid_partition(
|
||||
partition,
|
||||
\method{get_desc_df}{grid_partition}(
|
||||
obj,
|
||||
cont_bounds_inf = TRUE,
|
||||
do_str = FALSE,
|
||||
drop_unsplit = FALSE,
|
||||
digits = NULL,
|
||||
unsplit_cat_star = TRUE
|
||||
unsplit_cat_star = TRUE,
|
||||
...
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{partition}{Partition}
|
||||
\item{obj}{partition object}
|
||||
|
||||
\item{cont_bounds_inf}{If True, will put continuous bounds as -Inf/Inf. Otherwise will use X_range bounds}
|
||||
\item{cont_bounds_inf}{Should "Inf" be used for continuous bounds (otherwise
|
||||
the bounds from X_range)}
|
||||
|
||||
\item{do_str}{If True, use a string like "(a, b]", otherwise have two separate columns with a and b}
|
||||
\item{do_str}{If True, use a string like "(a, b]", otherwise have two
|
||||
separate columns with a and b}
|
||||
|
||||
\item{drop_unsplit}{If True, drop columns for variables overwhich the partition did not split}
|
||||
\item{drop_unsplit}{If True, drop columns for variables overwhich the
|
||||
partition did not split}
|
||||
|
||||
\item{digits}{digits option}
|
||||
\item{digits}{digits Option (default is NULL)}
|
||||
|
||||
\item{unsplit_cat_star}{if we don't split on a categorical var, should we show as "*" (otherwise list all levels)}
|
||||
\item{unsplit_cat_star}{Should unsplit categorical variables be listed as
|
||||
"*", otherwise all factor labels will be used.}
|
||||
|
||||
\item{...}{Additional arguments.}
|
||||
}
|
||||
\value{
|
||||
data.frame
|
||||
data.frame with columns: partitioning columns
|
||||
}
|
||||
\description{
|
||||
A dataset with rows for each cell and columns defining partitioning
|
||||
Get information for each cell
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
\name{grid_partition}
|
||||
\alias{grid_partition}
|
||||
\alias{is_grid_partition}
|
||||
\title{Create a null grid_partition}
|
||||
\title{Create a null \code{grid_partition}}
|
||||
\usage{
|
||||
grid_partition(X_range, varnames = NULL)
|
||||
|
||||
|
@ -23,7 +23,7 @@ True if x is a grid_partition
|
|||
}
|
||||
\description{
|
||||
Create a empty partition. Splits can be added using \code{\link{add_partition_split}}.
|
||||
Information about a split can be retrieved using \code{\link{num_cells}}, \code{\link{get_desc_df.grid_partition}} and \code{\link{print}}
|
||||
Information about a split can be retrieved using \code{\link{num_cells}}, \code{\link{get_desc_df}} and \code{\link{print}}
|
||||
With data, one can determine the cell for each observation using \code{\link{predict}}
|
||||
|
||||
Test whether an object is an \code{grid_function}
|
||||
|
|
|
@ -1,22 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/grid_partition.R
|
||||
\name{is_grid_partition_split}
|
||||
\alias{is_grid_partition_split}
|
||||
\title{Is grid_partition_split}
|
||||
\usage{
|
||||
is_grid_partition_split(x)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{an R object}
|
||||
}
|
||||
\value{
|
||||
Boolean
|
||||
}
|
||||
\description{
|
||||
Tests whether or not an object is a \code{partition_split}.
|
||||
}
|
||||
\section{Functions}{
|
||||
\itemize{
|
||||
\item \code{is_grid_partition_split}: is grid_partition_split
|
||||
}}
|
||||
|
|
@ -2,18 +2,32 @@
|
|||
% Please edit documentation in R/grid_partition.R
|
||||
\name{partition_split}
|
||||
\alias{partition_split}
|
||||
\alias{is_partition_split}
|
||||
\title{Create partition_split}
|
||||
\usage{
|
||||
partition_split(k, X_k_cut)
|
||||
|
||||
is_partition_split(x)
|
||||
}
|
||||
\arguments{
|
||||
\item{k}{dimension}
|
||||
|
||||
\item{X_k_cut}{cut value}
|
||||
|
||||
\item{x}{an R object}
|
||||
}
|
||||
\value{
|
||||
Partition Split
|
||||
|
||||
Boolean
|
||||
}
|
||||
\description{
|
||||
Describes a single partition split. Used with \code{\link{add_partition_split}}.
|
||||
|
||||
Tests whether or not an object is a \code{partition_split}.
|
||||
}
|
||||
\section{Functions}{
|
||||
\itemize{
|
||||
\item \code{is_partition_split}: is partition_split
|
||||
}}
|
||||
|
||||
|
|
|
@ -4,12 +4,14 @@
|
|||
\alias{plot.estimated_partition}
|
||||
\title{Create 2D plots of parameter estimates}
|
||||
\usage{
|
||||
\method{plot}{estimated_partition}(grid_fit, X_names_2D = NULL)
|
||||
\method{plot}{estimated_partition}(x, X_names_2D = NULL, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{grid_fit}{grid_fit}
|
||||
\item{x}{grid_fit}
|
||||
|
||||
\item{X_names_2D}{X_names_2D}
|
||||
|
||||
\item{...}{Additional arguments. Unused.}
|
||||
}
|
||||
\value{
|
||||
ggplot2 object or list of such objects
|
||||
|
|
|
@ -4,16 +4,18 @@
|
|||
\alias{predict.estimated_partition}
|
||||
\title{Generate predicted estimates per observations}
|
||||
\usage{
|
||||
\method{predict}{estimated_partition}(obj, new_X, new_d = NULL, type = "effect")
|
||||
\method{predict}{estimated_partition}(object, new_X, new_d = NULL, type = "effect", ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{obj}{estimated_partition object}
|
||||
\item{object}{estimated_partition object}
|
||||
|
||||
\item{new_X}{new X}
|
||||
|
||||
\item{new_d}{new d. Required for type="outcome"}
|
||||
|
||||
\item{type}{"effect" or "outcome" (currently not implemented)}
|
||||
|
||||
\item{...}{Additional arguments. Unused.}
|
||||
}
|
||||
\value{
|
||||
predicted treatment effect
|
||||
|
|
|
@ -4,14 +4,16 @@
|
|||
\alias{predict.grid_partition}
|
||||
\title{Get factor describing cell number fo each observation}
|
||||
\usage{
|
||||
\method{predict}{grid_partition}(obj, X, X_range = NULL)
|
||||
\method{predict}{grid_partition}(object, X, X_range = NULL, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{obj}{partition}
|
||||
\item{object}{partition}
|
||||
|
||||
\item{X}{X data or list of X}
|
||||
|
||||
\item{X_range}{(Optional) overrides the partition$X_range}
|
||||
|
||||
\item{...}{Additional arguments. Unused.}
|
||||
}
|
||||
\value{
|
||||
Factor
|
||||
|
|
|
@ -16,14 +16,16 @@
|
|||
\arguments{
|
||||
\item{x}{estimated_partition object}
|
||||
|
||||
\item{do_str}{If True, use a string like "(a, b]", otherwise have two separate columns with a and b}
|
||||
\item{do_str}{If True, use a string like "(a, b]", otherwise have two
|
||||
separate columns with a and b}
|
||||
|
||||
\item{drop_unsplit}{If True, drop columns for variables overwhich the partition did not split}
|
||||
\item{drop_unsplit}{If True, drop columns for variables overwhich the
|
||||
partition did not split}
|
||||
|
||||
\item{digits}{digits Option (default is NULL)}
|
||||
|
||||
\item{import_order}{Should we use importance ordering (most important on the left) or input ordering (default) for features.
|
||||
Rows will be ordered so that the right-most will change most frequently.}
|
||||
\item{import_order}{Whether should use importance ordering
|
||||
(most important on the left) or input ordering (default) for features.}
|
||||
|
||||
\item{...}{Additional arguments. These will be passed to print.data.frame}
|
||||
}
|
||||
|
@ -31,5 +33,5 @@ Rows will be ordered so that the right-most will change most frequently.}
|
|||
string (and displayed)
|
||||
}
|
||||
\description{
|
||||
Print a summary of the estimated partition. Uses \code{\link{get_desc_df.estimated_partition}}
|
||||
Print a summary of the estimated partition. Uses \code{\link{get_desc_df}}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
external.libraries: C:\Program Files\Microsoft\R Open\R-3.5.3\library
|
||||
ignored.packages:
|
||||
package.dependency.fields: Imports, Depends, LinkingTo
|
||||
snapshot.type: implicit
|
||||
use.cache: TRUE
|
||||
vcs.ignore.library: TRUE
|
|
@ -12,7 +12,8 @@ vignette: >
|
|||
```{r, include = FALSE}
|
||||
knitr::opts_chunk$set(
|
||||
collapse = TRUE,
|
||||
comment = "#>"
|
||||
comment = "#>",
|
||||
dev = "ragg_png"
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -39,6 +40,11 @@ tau = as.integer(X[,1]>.5)*2-1 #true treatment effect (just heterogeneous across
|
|||
y = d*tau + rnorm(N, 0, err_sd) #outcome
|
||||
```
|
||||
|
||||
```{r}
|
||||
est_part0 = fit_estimate_partition(y, X, d, cv_folds=2)
|
||||
get_desc_df(est_part0)
|
||||
```
|
||||
|
||||
We typically want a high-level partition for "human" consumption. To save time, avoid cells with too few observations, and reduce the chance of splitting from running many noisy tests, it's common to only look for a few splits per dimension. If we don't specify this, the function will try every possible split across each dimension.
|
||||
```{r}
|
||||
# With just a scalar, we will split at points equal across the quantile-distribution for each feature.
|
||||
|
@ -53,7 +59,7 @@ plot(est_part)
|
|||
```
|
||||
|
||||
```{r}
|
||||
get_desc_df.estimated_partition(est_part)
|
||||
get_desc_df(est_part)
|
||||
```
|
||||
|
||||
We can manually estimate this simple model given the partition
|
||||
|
@ -101,7 +107,7 @@ Now let's look at a case where there's hereogeneity across all three dimensions.
|
|||
tau_3 = (as.integer(X[,1]>0.5)*2-1) + (as.integer(X[,2]>0.5)*2-1)*2 + (as.integer(X[,3]>0.5)*2-1)*3
|
||||
y_3 = d*tau_3 + rnorm(N, 0, err_sd)
|
||||
est_part_3 = fit_estimate_partition(y_3, X, d, breaks_per_dim=5, partition_i=4)
|
||||
get_desc_df.estimated_partition(est_part_3)
|
||||
get_desc_df(est_part_3)
|
||||
```
|
||||
|
||||
One benefit of grid-based partitions is that you can view easily view 2D slices of full heterogeneity space.
|
||||
|
@ -137,7 +143,7 @@ tau2 = as.integer(X[,2]>0.5)*2-1
|
|||
y2_yM = d*tau2 + rnorm(N, 0, err_sd)
|
||||
y_yM = cbind(y, y2_yM)
|
||||
est_part_yM = fit_estimate_partition(y_yM, X, d, breaks_per_dim=5, partition_i = 3)
|
||||
get_desc_df.estimated_partition(est_part_yM)
|
||||
get_desc_df(est_part_yM)
|
||||
```
|
||||
|
||||
2) Multiple treatments, but same sample (single outcome)
|
||||
|
@ -146,7 +152,7 @@ d2 = rbinom(N, 1, 0.5)
|
|||
d_dM = cbind(d, d2)
|
||||
y_dM = d*tau + d2*tau2 + rnorm(N, 0, err_sd)
|
||||
est_part_dM = fit_estimate_partition(y_dM, X, d_dM, breaks_per_dim=5, partition_i = 3)
|
||||
get_desc_df.estimated_partition(est_part_dM)
|
||||
get_desc_df(est_part_dM)
|
||||
```
|
||||
|
||||
3) Multiple separate samples, each having a single outcome and treatment
|
||||
|
@ -156,14 +162,14 @@ y_MM = list(y, y2_MM)
|
|||
d_MM = list(d, d2)
|
||||
X_MM = list(X, X)
|
||||
est_part_MM = fit_estimate_partition(y_MM, X_MM, d_MM, breaks_per_dim=5, partition_i = 3)
|
||||
get_desc_df.estimated_partition(est_part_MM)
|
||||
get_desc_df(est_part_MM)
|
||||
```
|
||||
# Mean-outcome prediction
|
||||
```{r}
|
||||
alpha = as.integer(X[,1]>0.5)*2-1 #true average outcome effect (just heterogeneous across X1)
|
||||
y_y = alpha + rnorm(N, 0, err_sd) #outcome
|
||||
est_part_y = fit_estimate_partition(y_y, X, breaks_per_dim=5, partition_i=2)
|
||||
get_desc_df.estimated_partition(est_part_y)
|
||||
get_desc_df(est_part_y)
|
||||
```
|
||||
|
||||
# Minor things to add
|
||||
|
|
Загрузка…
Ссылка в новой задаче