This commit is contained in:
Brian Quistorff 2021-06-23 11:35:39 -07:00
Родитель 4060b05233 35b7975e42
Коммит d572525c38
48 изменённых файлов: 727 добавлений и 329 удалений

Просмотреть файл

@ -14,3 +14,6 @@
^_pkgdown\.yml$
^docs$
^pkgdown$
^CODE_OF_CONDUCT\.md$
^SUPPORT\.md$
^SECURITY\.md$

1
.gitignore поставляемый
Просмотреть файл

@ -49,3 +49,4 @@ vignettes/*.pdf
.Rproj.user
doc/
Meta
doc

Просмотреть файл

@ -4,10 +4,6 @@ Version: 0.2
Authors@R: person("Brian", "Quistorff", email = "Brian.Quistorff@microsoft.com",
role = c("aut", "cre"))
Description: Analysis of Subgroups.
Depends: R (>= 3.1.0),
caret,
gsubfn,
assertthat
License: MIT + file LICENSE
LazyData: true
RoxygenNote: 7.1.1
@ -20,9 +16,15 @@ Suggests:
pbapply,
testthat,
knitr,
rmarkdown
rmarkdown,
ragg,
gridExtra
BuildVignettes: true
Imports: Rcpp (>= 1.0.1)
Imports: Rcpp (>= 1.0.1),
gsubfn,
assertthat,
caret,
purrr
LinkingTo: Rcpp
Encoding: UTF-8
VignetteBuilder: knitr

Просмотреть файл

@ -3,6 +3,8 @@
S3method(est_params,grid_rf)
S3method(est_params,lm_est)
S3method(est_params,simple_est)
S3method(get_desc_df,estimated_partition)
S3method(get_desc_df,grid_partition)
S3method(num_cells,estimated_partition)
S3method(num_cells,grid_partition)
S3method(plot,estimated_partition)
@ -22,15 +24,14 @@ export(fit_estimate_partition)
export(fit_on_train)
export(fit_partition)
export(get_X_range)
export(get_desc_df.estimated_partition)
export(get_desc_df.grid_partition)
export(get_desc_df)
export(grid_partition)
export(grid_rf)
export(is_estimated_partition)
export(is_grid_partition)
export(is_grid_partition_split)
export(is_grid_rf)
export(is_lm_est)
export(is_partition_split)
export(is_simple_est)
export(lm_est)
export(num_cells)
@ -38,11 +39,11 @@ export(partition_split)
export(residualize)
export(simple_est)
export(test_any_sign_effect)
import(Rcpp)
import(assertthat)
import(caret)
import(gsubfn)
importFrom(Rcpp,sourceCpp)
importFrom(caret,createFolds)
importFrom(gsubfn,"[<-.result")
importFrom(gsubfn,list)
importFrom(purrr,partial)
importFrom(stats,coef)
importFrom(stats,formula)
@ -58,4 +59,7 @@ importFrom(stats,sd)
importFrom(stats,var)
importFrom(stats,vcov)
importFrom(utils,combn)
importFrom(utils,getTxtProgressBar)
importFrom(utils,setTxtProgressBar)
importFrom(utils,txtProgressBar)
useDynLib(CausalGrid, .registration = TRUE)

Просмотреть файл

@ -39,10 +39,9 @@
#' @importFrom Rcpp sourceCpp
#' @importFrom stats coef formula lm model.matrix p.adjust pt qt quantile sd
#' vcov var predict rnorm
#' @importFrom utils combn
#' @import caret
#' @import gsubfn
#' @import Rcpp
#' @importFrom utils combn txtProgressBar setTxtProgressBar getTxtProgressBar
#' @importFrom caret createFolds
#' @importFrom gsubfn list [<-.result
#' @import assertthat
#' @importFrom purrr partial
#' @docType package

Просмотреть файл

@ -4,7 +4,7 @@
#'
#' @param partition partition
#' @param cell_stats cell_stats
#' @param ...
#' @param ... Additional arguments
#'
#' @return object of class estimated_partition
#' @export
@ -203,6 +203,8 @@ num_cells.estimated_partition <- function(obj) {
#' @param fit estimated_partition
#' @param partition_i partition_i - 1 is the last include in split_seq included in new partition
#' @inheritParams fit_partition
#' @param index_tr Split between train and estimate samples (default is to get from \code{fit})
#' @param split_seq sequential list of splits (default is to get from \code{fit})
#'
#' @return updated estimated_partition
#' @export
@ -222,23 +224,23 @@ change_complexity <- function(fit, y, X, d=NULL, partition_i, index_tr = fit$ind
}
#' Get descriptive data.frame for an estimated_partition
#' Get descriptive data.frame
#'
#' Get information for each cell
#'
#' @inheritParams get_desc_df
#' @param import_order Whether should use importance ordering
#' (most important on the left) or input ordering (default) for features. Rows
#' will be ordered so that the right-most will change most frequently.
#'
#' Get statistics for each cell (feature boundary, and estimated cell stats)
#'
#' @param obj estimated_partition object
#' @param do_str If True, use a string like "(a, b]", otherwise have two separate columns with a and b
#' @param drop_unsplit If True, drop columns for variables overwhich the partition did not split
#' @param digits digits Option (default is NULL)
#' @param import_order Should we use importance ordering (most important on the left) or input ordering (default) for features.
#' Rows will be ordered so that the right-most will change most frequently.
#'
#' @return data.frame with columns: partitionin columns, {N_est, param_ests, pval} per estimate
#' @export
get_desc_df.estimated_partition <- function(obj, do_str=TRUE, drop_unsplit=TRUE, digits=NULL, import_order=FALSE) {
#'
#' @return data.frame with columns: partitioning columns, {N_est, param_ests,
#' pval} per estimate
#' @export
get_desc_df.estimated_partition <- function(obj, cont_bounds_inf=TRUE, do_str=TRUE, drop_unsplit=TRUE, digits=NULL, unsplit_cat_star=TRUE, import_order=FALSE, ...) {
M = obj$M
stats = obj$cell_stats[c(F, rep(T,M), rep(T,M), rep(F,M),rep(F,M), rep(F,M), rep(F,M), rep(T,M), rep(F,M), rep(F,M))]
part_df = get_desc_df.grid_partition(obj$partition, do_str=do_str, drop_unsplit=drop_unsplit, digits=digits)
part_df = get_desc_df(obj$partition, cont_bounds_inf=cont_bounds_inf, do_str=do_str, drop_unsplit=drop_unsplit, digits=digits, unsplit_cat_star=unsplit_cat_star)
imp_weights = obj$importance_weights
if(drop_unsplit) {
@ -255,16 +257,18 @@ get_desc_df.estimated_partition <- function(obj, do_str=TRUE, drop_unsplit=TRUE,
# Inherited params: do_str, drop_unsplit, digits, import_order
#' Print estimated_partition
#'
#' Print a summary of the estimated partition. Uses \code{\link{get_desc_df.estimated_partition}}
#' Print a summary of the estimated partition. Uses \code{\link{get_desc_df}}
#'
#' @param x estimated_partition object
#' @inheritParams get_desc_df.estimated_partition
#' @inheritParams get_desc_df
#' @param import_order Whether should use importance ordering
#' (most important on the left) or input ordering (default) for features.
#' @param ... Additional arguments. These will be passed to print.data.frame
#'
#' @return string (and displayed)
#' @export
print.estimated_partition <- function(x, do_str=TRUE, drop_unsplit=TRUE, digits=NULL, import_order=FALSE, ...) {
return(print(get_desc_df.estimated_partition(x, do_str, drop_unsplit, digits, import_order=import_order),
return(print(get_desc_df(x, do_str, drop_unsplit, digits, import_order=import_order),
digits=digits, ...))
}
@ -426,26 +430,27 @@ est_full_stats <- function(y, X, d, est_plan, y_es=NULL, X_es=NULL, d_es=NULL, i
#'
#' Predicted unit-level treatment effect or outcome
#'
#' @param obj estimated_partition object
#' @param object estimated_partition object
#' @param new_X new X
#' @param new_d new d. Required for type="outcome"
#' @param type "effect" or "outcome" (currently not implemented)
#' @param ... Additional arguments. Unused.
#'
#' @return predicted treatment effect
#' @export
predict.estimated_partition <- function(obj, new_X, new_d=NULL, type="effect") {
predict.estimated_partition <- function(object, new_X, new_d = NULL, type = "effect", ...) {
#TODO: for mode 1 &2 maybe return a matrix rather than list
new_X = ensure_good_X(new_X)
new_X_range = get_X_range(new_X)
cell_factor = predict(obj$partition, new_X, new_X_range)
M = obj$M
cell_factor = predict(object$partition, new_X, new_X_range)
M = object$M
if(M==1) {
N=nrow(new_X)
cell_factor_df = data.frame(id=1:N, cell_i = as.integer(cell_factor))
m_df = merge(cell_factor_df, obj$cell_stats)
m_df = merge(cell_factor_df, object$cell_stats)
m_df = m_df[order(m_df[["id"]]), ]
return(m_df[["param_ests"]])
}
@ -453,7 +458,7 @@ predict.estimated_partition <- function(obj, new_X, new_d=NULL, type="effect") {
rets = list()
for(m in 1:M) {
cell_factor_df = data.frame(id=1:N[m], cell_i = as.integer(cell_factor[[m]]))
m_df = merge(cell_factor_df, obj$cell_stats)
m_df = merge(cell_factor_df, object$cell_stats)
m_df = m_df[order(m_df[["id"]]), ]
rets[[m]] = m_df[["param_ests"]]
}

Просмотреть файл

@ -4,54 +4,57 @@
#'
#' Creates a 2D plot of parameter estimates or a series of such slices if partition is across >2 features.
#'
#' @param grid_fit grid_fit
#' @param x grid_fit
#' @param X_names_2D X_names_2D
#' @param ... Additional arguments. Unused.
#'
#' @return ggplot2 object or list of such objects
#' @export
plot.estimated_partition <- function(grid_fit, X_names_2D=NULL) {
plot.estimated_partition <- function(x, X_names_2D=NULL, ...) {
if (!requireNamespace("ggplot2", quietly = TRUE)) {
stop("Package \"ggplot2\" needed for this function to work. Please install it.",
call. = FALSE)
}
split_dims = (grid_fit$partition$nsplits_by_dim > 0)
split_dims = (x$partition$nsplits_by_dim > 0)
n_split_dims = sum(split_dims)
if(n_split_dims==0) {
print("Nothing to graph as no heterogeneity")
return(NULL)
}
desc_range_df = get_desc_df.grid_partition(grid_fit$partition, drop_unsplit=TRUE, cont_bounds_inf=FALSE)
desc_range_df = get_desc_df(x$partition, drop_unsplit=TRUE, cont_bounds_inf=FALSE)
if(n_split_dims==1) {
desc_range_df = do.call(cbind, lapply(desc_range_df, function(c) as.data.frame(t(matrix(unlist(c), nrow=2)))))
desc_range_df['ymin'] = 0
desc_range_df['ymax'] = 1
colnames(desc_range_df)<-c("xmin", "xmax", "ymin", "ymax")
desc_range_df["estimate"] = grid_fit$cell_stats$param_ests
xname = if(!is.null(X_names_2D)) X_names_2D[1] else grid_fit$partition$varnames[split_dims]
desc_range_df["estimate"] = x$cell_stats$param_ests
xname = if(!is.null(X_names_2D)) X_names_2D[1] else x$partition$varnames[split_dims]
plt = ggplot2::ggplot() +
ggplot2::scale_x_continuous(name=xname) +
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) + xlab(xname) +
ggplot2::theme(axis.title.y=ggplot2::element_blank(),
axis.text.y=ggplot2::element_blank(),
axis.ticks.y=ggplot2::element_blank()) +
ggplot2::xlab(xname) +
ggplot2::geom_rect(data=desc_range_df, mapping=ggplot2::aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, fill=estimate), color="black")
return(plt)
}
if(n_split_dims==2){
if(is.null(X_names_2D)) X_names_2D = grid_fit$partition$varnames[split_dims]
return(gen_one_plt(desc_range_df, grid_fit$cell_stats$param_ests, X_names_2D))
if(is.null(X_names_2D)) X_names_2D = x$partition$varnames[split_dims]
return(gen_one_plt(desc_range_df, x$cell_stats$param_ests, X_names_2D))
}
desc_range_df_fact = data.frame(lapply(get_desc_df.grid_partition(grid_fit$partition, drop_unsplit=TRUE, do_str=TRUE), unclass))
desc_range_df_fact = data.frame(lapply(get_desc_df(x$partition, drop_unsplit=TRUE, do_str=TRUE), unclass))
if(is.null(X_names_2D)){
if(is.null(grid_fit$importance_weights)) {
X_names_2D = grid_fit$partition$varnames[split_dims][1:2]
if(is.null(x$importance_weights)) {
X_names_2D = x$partition$varnames[split_dims][1:2]
}
else {
X_names_2D = grid_fit$partition$varnames[order(imp_weights, decreasing=FALSE)]
X_names_2D = x$partition$varnames[order(imp_weights, decreasing=FALSE)]
}
}
other_idx = !(names(desc_range_df) %in% X_names_2D)
n_segs_other = (grid_fit$partition$nsplits_by_dim+1)[other_idx]
n_segs_other = (x$partition$nsplits_by_dim+1)[other_idx]
names_other = names(desc_range_df)[other_idx]
size_other = cumprod(n_segs_other)
test_row_equals_vec <- function(M, v) {
@ -66,8 +69,8 @@ plot.estimated_partition <- function(grid_fit, X_names_2D=NULL) {
for(k in 1:length(segment_indexes)){
levels_desc[k] = levels(desc_range_df_fact[,which(other_idx)[k]])[segment_indexes[k]]
}
plts[[slice_i]] = gen_one_plt(desc_range_df[row_idx,X_names_2D], grid_fit$cell_stats$param_ests[row_idx], X_names_2D) +
ggtitle(paste(paste(names_other, levels_desc), collapse=", "))
plts[[slice_i]] = gen_one_plt(desc_range_df[row_idx,X_names_2D], x$cell_stats$param_ests[row_idx], X_names_2D) +
ggplot2::ggtitle(paste(paste(names_other, levels_desc), collapse=", "))
}
return(plts)

Просмотреть файл

@ -12,10 +12,10 @@
NULL
#> NULL
#' Create a null grid_partition
#' Create a null \code{grid_partition}
#'
#' Create a empty partition. Splits can be added using \code{\link{add_partition_split}}.
#' Information about a split can be retrieved using \code{\link{num_cells}}, \code{\link{get_desc_df.grid_partition}} and \code{\link{print}}
#' Information about a split can be retrieved using \code{\link{num_cells}}, \code{\link{get_desc_df}} and \code{\link{print}}
#' With data, one can determine the cell for each observation using \code{\link{predict}}
#'
#' @param X_range Such as from \code{\link{get_X_range}}
@ -93,14 +93,15 @@ get_X_range <- function(X) {
#' Note that currently if X has values more extreme (e.g., for numeric or factor levels ) than was used to generate the partition
#' then we will return NA unless you provide and updated X_range.
#'
#' @param obj partition
#' @param object partition
#' @param X X data or list of X
#' @param X_range (Optional) overrides the partition$X_range
#' @param ... Additional arguments. Unused.
#'
#' @return Factor
#' @export
predict.grid_partition <- function(obj, X, X_range=NULL) {
facts = get_factors_from_partition(obj, X, X_range=X_range)
predict.grid_partition <- function(object, X, X_range=NULL, ...) {
facts = get_factors_from_partition(object, X, X_range=X_range)
return(interaction_m(facts, is_sep_sample(X)))
}
@ -126,31 +127,27 @@ num_cells.grid_partition <- function(obj) {
print.grid_partition <- function(x, do_str=TRUE, drop_unsplit=TRUE, digits=NULL, ...) {
#To check: digits
assert_that(is.flag(do_str), is.flag(drop_unsplit), msg="One of do_str or drop_unsplit are not flags")
return(print(get_desc_df.grid_partition(x, do_str=do_str, drop_unsplit=drop_unsplit, digits=digits),
return(print(get_desc_df(x, do_str=do_str, drop_unsplit=drop_unsplit, digits=digits),
digits=digits, ...))
}
#' Get descriptive data.frame for grid_partition
#' Get descriptive data.frame
#'
#' Get information for each cell
#'
#' @inheritParams get_desc_df
#'
#' A dataset with rows for each cell and columns defining partitioning
#'
#' @param partition Partition
#' @param cont_bounds_inf If True, will put continuous bounds as -Inf/Inf. Otherwise will use X_range bounds
#' @param do_str If True, use a string like "(a, b]", otherwise have two separate columns with a and b
#' @param drop_unsplit If True, drop columns for variables overwhich the partition did not split
#' @param digits digits option
#' @param unsplit_cat_star if we don't split on a categorical var, should we show as "*" (otherwise list all levels)
#'
#' @return data.frame
#'
#' @return data.frame with columns: partitioning columns
#' @export
get_desc_df.grid_partition <- function(partition, cont_bounds_inf=TRUE, do_str=FALSE, drop_unsplit=FALSE,
digits=NULL, unsplit_cat_star=TRUE) {
get_desc_df.grid_partition <- function(obj, cont_bounds_inf=TRUE, do_str=FALSE, drop_unsplit=FALSE,
digits=NULL, unsplit_cat_star=TRUE, ...) {
#To check: digits
assert_that(is.flag(cont_bounds_inf), is.flag(do_str), is.flag(drop_unsplit), is.flag(unsplit_cat_star), msg="One (cont_bounds_inf, do_str, drop_unsplit, unsplit_cat_star)of are not flags.")
# A split at x_k means that we split to those <= and >
n_segs = partition$nsplits_by_dim+1
n_segs = obj$nsplits_by_dim+1
n_cells = prod(n_segs)
if(n_cells==1 & drop_unsplit) return(as.data.frame(matrix(NA, nrow=1, ncol=0)))
@ -160,19 +157,19 @@ get_desc_df.grid_partition <- function(partition, cont_bounds_inf=TRUE, do_str=F
#desc_df = data.frame(labels=levels(grid_fit$cell_stats$cell_factor),
# stringsAsFactors = FALSE) %>% separate(labels, names(X), "(?<=]).(?=[(])", PERL=TRUE)
K = length(partition$nsplits_by_dim)
X_range = partition$X_range
K = length(obj$nsplits_by_dim)
X_range = obj$X_range
if(cont_bounds_inf) {
for(k in 1:K) {
if(!k %in% partition$dim_cat) X_range[[k]] = c(-Inf, Inf)
if(!k %in% obj$dim_cat) X_range[[k]] = c(-Inf, Inf)
}
}
colnames=partition$varnames
colnames=obj$varnames
if(is.null(colnames)) colnames = paste("X", 1:K, sep="")
list_of_windows = list()
for(k in 1:K) {
list_of_windows[[k]] = if(k %in% partition$dim_cat) get_windows_cat(partition$s_by_dim[[k]], X_range[[k]]) else get_window_cont(partition$s_by_dim[[k]], X_range[[k]])
list_of_windows[[k]] = if(k %in% obj$dim_cat) get_windows_cat(obj$s_by_dim[[k]], X_range[[k]]) else get_window_cont(obj$s_by_dim[[k]], X_range[[k]])
}
format_cell_cat <- function(win, unsplit_cat_star, n_tot_dim, sep=", ") {
@ -195,7 +192,7 @@ get_desc_df.grid_partition <- function(partition, cont_bounds_inf=TRUE, do_str=F
segment_indexes = segment_indexes_from_cell_i(cell_i, n_segs)
win = list_of_windows[[k]][[segment_indexes[k]]]
raw_data_k[[cell_i]] = win
str_data_k[cell_i] = if(k %in% partition$dim_cat) format_cell_cat(win, unsplit_cat_star, length(list_of_windows[[k]])) else format_cell_cont(win)
str_data_k[cell_i] = if(k %in% obj$dim_cat) format_cell_cat(win, unsplit_cat_star, length(list_of_windows[[k]])) else format_cell_cont(win)
}
raw_data[[colnames[k]]] = cbind(raw_data_k) #make a list-column: https://stackoverflow.com/a/51308306
str_data[[colnames[k]]] = factor(str_data_k, levels=unique(str_data_k)) #will be in low-high order
@ -334,7 +331,7 @@ partition_split <- function(k, X_k_cut) {
return(structure(list(k=k, X_k_cut=X_k_cut), class=c("partition_split")))
}
#' Is grid_partition_split
#' Is \code{partition_split}
#'
#' Tests whether or not an object is a \code{partition_split}.
#'
@ -342,8 +339,8 @@ partition_split <- function(k, X_k_cut) {
#'
#' @return Boolean
#' @export
#' @describeIn grid_partition_split is grid_partition_split
is_grid_partition_split <- function(x){
#' @describeIn partition_split is partition_split
is_partition_split <- function(x){
inherits(x, "partition_split")
}
@ -414,6 +411,7 @@ print.partition_split <- function(x, ...) {
#' @param bump_samples Number of bump bootstraps (default 0), or list of such length where each items is a bootstrap sample.
#' If m_mode==DS.MULTI_SAMPLE then each item is a sublist with such bootstrap samples over each dataset.
#' @param bump_ratio For bootstraps the ratio of sample size to sample (between 0 and 1, default 1)
#' @param ... Additional params.
#'
#' @return An object.
#' \item{partition}{Grid Partition (type=\code{\link{grid_partition}})}
@ -863,7 +861,7 @@ fit_partition_full_k <- function(k, y, X_d, d, X_range, pb, debug, valid_breaks,
win_mask = gen_cont_window_mask_m(X_d, k, win_LB, win_UB)
win_mask_aux = gen_cont_window_mask_m(X_aux, k, win_LB, win_UB)
for(X_k_cut_i in seq_len(n_pot_break_points_k)) { #cut-point is top end of segment,
if (verbosity>0 && !is.null(pb)) utils::setTxtProgressBar(pb, utils::getTxtProgressBar(pb)+1)
if (verbosity>0 && !is.null(pb)) setTxtProgressBar(pb, getTxtProgressBar(pb)+1)
X_k_cut = breaks_per_dim[[k]][X_k_cut_i]
if(X_k_cut %in% partition$s_by_dim[[k]]) {
prev_split_checked = X_k_cut
@ -961,7 +959,7 @@ fit_partition_full_k <- function(k, y, X_d, d, X_range, pb, debug, valid_breaks,
for(win_split_i in seq_len(length(pot_splits))) {
win_split_val = pot_splits[[win_split_i]]
#TODO: Refactor with continuous case
if (verbosity>0 && !is.null(pb)) utils::setTxtProgressBar(pb, utils::getTxtProgressBar(pb)+1)
if (verbosity>0 && !is.null(pb)) setTxtProgressBar(pb, getTxtProgressBar(pb)+1)
if(!valid_breaks_k[[window_i]][win_split_i]) next
new_split = partition_split(k, win_split_val)
@ -1092,7 +1090,7 @@ fit_partition_full <- function(y, X, d=NULL, X_aux=NULL, d_aux=NULL, X_range, ma
if(verbosity>0) {
cat(paste("Grid > Fitting > split ", split_i, ": Started\n"))
t1 = Sys.time()
if(is.null(pr_cl)) pb = utils::txtProgressBar(0, n_cuts_total, style = style)
if(is.null(pr_cl)) pb = txtProgressBar(0, n_cuts_total, style = style)
}
params = c(list(y=y, X_d=X, d=d, X_range=X_range, pb=NULL, debug=debug, valid_breaks=valid_breaks,

Просмотреть файл

@ -15,6 +15,29 @@ num_cells <- function(obj) {
UseMethod("num_cells", obj)
}
#' Get descriptive data.frame
#'
#' Get information for each cell
#'
#' @param obj partition object
#' @param cont_bounds_inf Should "Inf" be used for continuous bounds (otherwise
#' the bounds from X_range)
#' @param do_str If True, use a string like "(a, b]", otherwise have two
#' separate columns with a and b
#' @param drop_unsplit If True, drop columns for variables overwhich the
#' partition did not split
#' @param digits digits Option (default is NULL)
#' @param unsplit_cat_star Should unsplit categorical variables be listed as
#' "*", otherwise all factor labels will be used.
#' @param ... Additional arguments.
#'
#' @return data.frame with columns: partitioning columns
#' @export
get_desc_df <- function(obj, cont_bounds_inf=TRUE, do_str=FALSE, drop_unsplit=FALSE,
digits=NULL, unsplit_cat_star=TRUE, ...) {
UseMethod("get_desc_df", obj)
}
# General Utils ----------------
#handles vectors and 2D structures
@ -69,17 +92,17 @@ is_factor_dim_k <- function(X, k) {
}
#Standard way to check if vector is constant is const_vectr(), but is O(n).
#Checking element-by-element would often be faster, but this is inefficient in R
#and faster in C. const_vect1() and const_vect2() were two versions (first using
#'inline', second just Rcpp), but couldn't get to work in building a package.
#The Rcpp version is now in a separate file.
# Standard way to check if vector is constant is const_vectr(), but is O(n).
# Checking element-by-element would often be faster, but this is inefficient in R
# and faster in C. const_vect1() and const_vect2() were two versions (first using
# 'inline', second just Rcpp), but couldn't get to work in building a package.
# The Rcpp version is now in a separate file.
const_vectr <- function(x) {
if(length(x)==0) return(TRUE)
r = range(x)
return(r[1]==r[2])
}
# const_vectr <- function(x) {
# if(length(x)==0) return(TRUE)
# r = range(x)
# return(r[1]==r[2])
# }
# Fold utils --------------------------

Просмотреть файл

@ -1,5 +1,5 @@
# Project
Tools for finding heterogeneous treatment effects (and means) based on partitioning the covariate/feature space via full cross-cuts and solved via greedy search. A typical usage would be analyzing and experiment to find the high-level subgroups (a coarse partition that is useful to humans) that differ in their estimated treatment effects.
Tools for finding heterogeneous treatment effects (and means) based on partitioning the covariate/feature space via full cross-cuts and solved via greedy search. A typical usage would be analyzing an experiment to find the high-level subgroups (a coarse partition that is useful to humans) that differ in their estimated treatment effects.
This package is inspired by, and uses ideas from, [Causal Tree](https://github.com/susanathey/causalTree) but aims to have the partition be more interpretable and have better accuracy. It is slower, though for high-level partitions this is usually not an issue.

Просмотреть файл

@ -4,11 +4,15 @@ Requirements
Notes on building:
- You will need RTools (probably at least v3.5)
- install (renv)[https://rstudio.github.io/renv/articles/renv.html] package. Then after opening the project you should be able to use `renv::restore()`.
- Given the cpp you should use "Install and restart" (and not use "Load All") to get the new library. On Windows, when building, you should restart the R session before this otherwise it can't copy over the DLL (it stays in memory).
- If you want updated vignettes to show up when using "Load All", you can use `devtools::build_vignettes()`.
- install (renv)[https://rstudio.github.io/renv/articles/renv.html] package. Then after opening the project you should be able to use `renv::restore()`. Some packages (such as `brio, cpp11, knitr, ragg, systemfonts, textshaping`) aren't mentioned directly, but are used in building vignettes.
- Given the cpp you should use "Install and restart" (and not use "Load All") to get the new library (though you might be able to get away w/o it if you don't change the DLL). On Windows, when building, you should restart the R session before this otherwise it can't copy over the DLL (it stays in memory).
- If you want updated vignettes to show up when using "Load All", you can use `devtools::build_vignettes()` (possibly with `install=FALSE` to speed things up). They will get placed in `doc/` (not `docs`).
- To build the html help in `docs/` use `pkgdown::build_site()`.
- Building copies everything over to temp dir and then deletes, so might want to move the large files (`project/sim.RData`) out to save time.
Project:
- If using MRAN you might get warnings from `renv` about `Revoutils`. You can create a rename `renv/settings.dcf.bak` to `renv/settings.dcf` and edit the line `external.libraries: C:\Program Files\Microsoft\R Open\R-X.X.X\library` with the right library version.
# Support

Просмотреть файл

@ -120,9 +120,10 @@
<p>Notes on building:</p>
<ul>
<li>You will need RTools (probably at least v3.5)</li>
<li>install (renv)[<a href="https://rstudio.github.io/renv/articles/renv.html" class="uri">https://rstudio.github.io/renv/articles/renv.html</a>] package. Then after opening the project you should be able to use <code><a href="https://rstudio.github.io/renv//reference/restore.html">renv::restore()</a></code>.</li>
<li>Given the cpp you should use “Install and restart” (and not use “Load All”) to get the new library. On Windows, when building, you should restart the R session before this otherwise it cant copy over the DLL (it stays in memory).</li>
<li>If you want updated vignettes to show up when using “Load All”, you can use <code><a href="https://devtools.r-lib.org//reference/build_vignettes.html">devtools::build_vignettes()</a></code>.</li>
<li>install (renv)[<a href="https://rstudio.github.io/renv/articles/renv.html" class="uri">https://rstudio.github.io/renv/articles/renv.html</a>] package. Then after opening the project you should be able to use <code><a href="https://rstudio.github.io/renv//reference/restore.html">renv::restore()</a></code>. Some packages (such as <code>brio, cpp11, knitr, ragg, systemfonts, textshaping</code>) arent mentioned directly, but are used in building vignettes.</li>
<li>Given the cpp you should use “Install and restart” (and not use “Load All”) to get the new library (though you might be able to get away w/o it if you dont change the DLL). On Windows, when building, you should restart the R session before this otherwise it cant copy over the DLL (it stays in memory).</li>
<li>If you want updated vignettes to show up when using “Load All”, you can use <code><a href="https://devtools.r-lib.org//reference/build_vignettes.html">devtools::build_vignettes()</a></code> (possibly with <code>install=FALSE</code> to speed things up). They will get placed in <code>doc/</code> (not <code>docs</code>).</li>
<li>To build the html help in <code>docs/</code> use <code><a href="https://pkgdown.r-lib.org/reference/build_site.html">pkgdown::build_site()</a></code>.</li>
<li>Building copies everything over to temp dir and then deletes, so might want to move the large files (<code>project/sim.RData</code>) out to save time.</li>
</ul>
</div>

Просмотреть файл

@ -94,23 +94,29 @@
<span class="va">d</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/stats/Binomial.html">rbinom</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">1</span>, <span class="fl">0.5</span><span class="op">)</span> <span class="co">#treatment assignment</span>
<span class="va">tau</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/integer.html">as.integer</a></span><span class="op">(</span><span class="va">X</span><span class="op">[</span>,<span class="fl">1</span><span class="op">]</span><span class="op">&gt;</span><span class="fl">.5</span><span class="op">)</span><span class="op">*</span><span class="fl">2</span><span class="op">-</span><span class="fl">1</span> <span class="co">#true treatment effect (just heterogeneous across X1)</span>
<span class="va">y</span> <span class="op">=</span> <span class="va">d</span><span class="op">*</span><span class="va">tau</span> <span class="op">+</span> <span class="fu"><a href="https://rdrr.io/r/stats/Normal.html">rnorm</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">0</span>, <span class="va">err_sd</span><span class="op">)</span> <span class="co">#outcome</span></code></pre></div>
<p>We typically want a high-level partition for “human” consumption. To save time, avoid cells with too few observations, and reduce the chance of splitting from running many noisy tests, its common to only look for a few splits per dimension. If we dont specify this, the function will try every possible split across each dimension.</p>
<div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">est_part0</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y</span>, <span class="va">X</span>, <span class="va">d</span>, cv_folds<span class="op">=</span><span class="fl">2</span><span class="op">)</span>
<span class="fu"><a href="../reference/get_desc_df.html">get_desc_df</a></span><span class="op">(</span><span class="va">est_part0</span><span class="op">)</span>
<span class="co">#&gt; X1 N_est param_ests pval</span>
<span class="co">#&gt; 1 &lt;=0.5050542 261 -0.7767593 3.681519e-09</span>
<span class="co">#&gt; 2 &gt;0.5050542 239 0.8697289 3.792252e-11</span></code></pre></div>
<p>We typically want a high-level partition for “human” consumption. To save time, avoid cells with too few observations, and reduce the chance of splitting from running many noisy tests, its common to only look for a few splits per dimension. If we dont specify this, the function will try every possible split across each dimension.</p>
<div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="co"># With just a scalar, we will split at points equal across the quantile-distribution for each feature.</span>
<span class="va">breaks</span> <span class="op">=</span> <span class="fl">5</span>
<span class="co">#Otherwise we can explicitly list the potential splits to evaluate.</span>
<span class="va">breaks</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/rep.html">rep</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/seq.html">seq</a></span><span class="op">(</span><span class="va">breaks</span><span class="op">)</span><span class="op">/</span><span class="op">(</span><span class="va">breaks</span><span class="op">+</span><span class="fl">1</span><span class="op">)</span><span class="op">)</span>, <span class="va">K</span><span class="op">)</span>
<span class="va">est_part</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y</span>, <span class="va">X</span>, <span class="va">d</span>, breaks_per_dim<span class="op">=</span><span class="va">breaks</span>, cv_folds<span class="op">=</span><span class="fl">2</span><span class="op">)</span></code></pre></div>
<div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/graphics/plot.html">plot</a></span><span class="op">(</span><span class="va">est_part</span><span class="op">)</span></code></pre></div>
<p><img src="vignette_files/figure-html/unnamed-chunk-5-1.png" width="700"></p>
<div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="fu"><a href="../reference/get_desc_df.estimated_partition.html">get_desc_df.estimated_partition</a></span><span class="op">(</span><span class="va">est_part</span><span class="op">)</span>
<span class="co">#&gt; X1 N_est param_ests pval</span>
<span class="co">#&gt; 1 &lt;=0.5 258 -0.7946142 1.950406e-09</span>
<span class="co">#&gt; 2 &gt;0.5 242 0.8763644 1.757105e-11</span></code></pre></div>
<p>We can manually estimate this simple model given the partition</p>
<code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/graphics/plot.html">plot</a></span><span class="op">(</span><span class="va">est_part</span><span class="op">)</span></code></pre></div>
<p><img src="vignette_files/figure-html/unnamed-chunk-6-1.png" width="700"></p>
<div class="sourceCode" id="cb7"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="fu"><a href="../reference/get_desc_df.html">get_desc_df</a></span><span class="op">(</span><span class="va">est_part</span><span class="op">)</span>
<span class="co">#&gt; X1 N_est param_ests pval</span>
<span class="co">#&gt; 1 &lt;=0.5 255 -0.9035292 3.379837e-12</span>
<span class="co">#&gt; 2 &gt;0.5 245 0.9557823 2.823001e-12</span></code></pre></div>
<p>We can manually estimate this simple model given the partition</p>
<div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">est_df</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html">data.frame</a></span><span class="op">(</span>y<span class="op">=</span><span class="va">y</span>, d<span class="op">=</span><span class="va">d</span>, f<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/stats/predict.html">predict</a></span><span class="op">(</span><span class="va">est_part</span><span class="op">$</span><span class="va">partition</span>, <span class="va">X</span><span class="op">)</span><span class="op">)</span>
<span class="fu"><a href="https://rdrr.io/r/base/summary.html">summary</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="va">y</span><span class="op">~</span><span class="fl">0</span><span class="op">+</span><span class="va">f</span><span class="op">+</span><span class="va">d</span><span class="op">:</span><span class="va">f</span>, data<span class="op">=</span><span class="va">est_df</span><span class="op">[</span><span class="op">-</span><span class="va">est_part</span><span class="op">$</span><span class="va">index_tr</span>,<span class="op">]</span><span class="op">)</span><span class="op">)</span>
<span class="co">#&gt; </span>
@ -120,78 +126,78 @@
<span class="co">#&gt; </span>
<span class="co">#&gt; Residuals:</span>
<span class="co">#&gt; Min 1Q Median 3Q Max </span>
<span class="co">#&gt; -3.1460 -0.6776 0.0409 0.7216 3.0490 </span>
<span class="co">#&gt; -3.1360 -0.6908 -0.0019 0.7014 2.6253 </span>
<span class="co">#&gt; </span>
<span class="co">#&gt; Coefficients:</span>
<span class="co">#&gt; Estimate Std. Error t value Pr(&gt;|t|) </span>
<span class="co">#&gt; f(-1,0.5].(-0.999,2].(-1,2] -0.11160 0.08705 -1.282 0.200 </span>
<span class="co">#&gt; f(0.5,2].(-0.999,2].(-1,2] 0.06623 0.08947 0.740 0.459 </span>
<span class="co">#&gt; f(-1,0.5].(-0.999,2].(-1,2]:d -0.79461 0.12407 -6.404 3.51e-10 ***</span>
<span class="co">#&gt; f(0.5,2].(-0.999,2].(-1,2]:d 0.87636 0.12813 6.840 2.35e-11 ***</span>
<span class="co">#&gt; f(-1,0.5].(-0.999,2].(-1,2] -0.09209 0.08434 -1.092 0.275 </span>
<span class="co">#&gt; f(0.5,2].(-0.999,2].(-1,2] 0.05627 0.08786 0.640 0.522 </span>
<span class="co">#&gt; f(-1,0.5].(-0.999,2].(-1,2]:d -0.90353 0.12559 -7.194 2.33e-12 ***</span>
<span class="co">#&gt; f(0.5,2].(-0.999,2].(-1,2]:d 0.95578 0.12769 7.485 3.28e-13 ***</span>
<span class="co">#&gt; ---</span>
<span class="co">#&gt; Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1</span>
<span class="co">#&gt; </span>
<span class="co">#&gt; Residual standard error: 0.9963 on 496 degrees of freedom</span>
<span class="co">#&gt; Multiple R-squared: 0.3003, Adjusted R-squared: 0.2947 </span>
<span class="co">#&gt; F-statistic: 53.22 on 4 and 496 DF, p-value: &lt; 2.2e-16</span></code></pre></div>
<span class="co">#&gt; Residual standard error: 0.9979 on 496 degrees of freedom</span>
<span class="co">#&gt; Multiple R-squared: 0.3218, Adjusted R-squared: 0.3164 </span>
<span class="co">#&gt; F-statistic: 58.85 on 4 and 496 DF, p-value: &lt; 2.2e-16</span></code></pre></div>
<p>Sometimes we want a different level of complexity than that picked by CV. Either we can pre-specify which partition in the sequence that we want (using the <code>partition_i</code> parameter), or we can look at the sequence of objective function values and see where additional splits only provide marginal improvements.</p>
<div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/paste.html">paste</a></span><span class="op">(</span><span class="st">"In-sample Objective function values: "</span>, <span class="fu"><a href="https://rdrr.io/r/base/paste.html">paste</a></span><span class="op">(</span><span class="va">est_part</span><span class="op">$</span><span class="va">is_obj_val_seq</span>, collapse<span class="op">=</span><span class="st">" "</span><span class="op">)</span><span class="op">)</span><span class="op">)</span>
<span class="co">#&gt; [1] "In-sample Objective function values: -0.00327762408600242 -1.0456589955051 -1.07936864222915 -1.13657424922757 -1.17891173047444 -1.22457907347801 -1.29651132376863 -1.40899836012424 -1.47955500597249 -1.56235757695552"</span></code></pre></div>
<p>Compare this with the average treatment effect for the whole and estimation-only samples</p>
<div class="sourceCode" id="cb9"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/paste.html">paste</a></span><span class="op">(</span><span class="st">"In-sample Objective function values: "</span>, <span class="fu"><a href="https://rdrr.io/r/base/paste.html">paste</a></span><span class="op">(</span><span class="va">est_part</span><span class="op">$</span><span class="va">is_obj_val_seq</span>, collapse<span class="op">=</span><span class="st">" "</span><span class="op">)</span><span class="op">)</span><span class="op">)</span>
<span class="co">#&gt; [1] "In-sample Objective function values: -0.00130792756957961 -0.860071145786862 -0.906298192191155 -0.974137597678458 -1.01692472825141 -1.05861007636975 -1.1137648149499 -1.16690858898675 -1.22036187891834 -1.23615779557238"</span></code></pre></div>
<p>Compare this with the average treatment effect for the whole and estimation-only samples</p>
<div class="sourceCode" id="cb10"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">est_part</span><span class="op">$</span><span class="va">full_stat_df</span>
<span class="co">#&gt; sample N_est param_ests var_ests tstats ci_u ci_l</span>
<span class="co">#&gt; N_tr all 1000 0.033383182 0.005710407 0.44176796 0.1816721 -0.1149057</span>
<span class="co">#&gt; N_es est 500 0.009358178 0.011304896 0.08801527 0.2182580 -0.1995417</span>
<span class="co">#&gt; sample N_est param_ests var_ests tstats ci_u ci_l</span>
<span class="co">#&gt; N_tr all 1000 0.03338318 0.005710407 0.4417680 0.1816721 -0.1149057</span>
<span class="co">#&gt; N_es est 500 0.03350558 0.011765447 0.3088965 0.2466182 -0.1796070</span>
<span class="co">#&gt; pval</span>
<span class="co">#&gt; N_tr 0.6587528</span>
<span class="co">#&gt; N_es 0.9298999</span></code></pre></div>
<span class="co">#&gt; N_es 0.7575295</span></code></pre></div>
<p>How important are each of the dimensions of X for the objective function? We refit the model without each dimension and see the change in the objective function</p>
<div class="sourceCode" id="cb10"><pre class="downlit sourceCode r">
<div class="sourceCode" id="cb11"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">est_part</span><span class="op">$</span><span class="va">importance_weights</span>
<span class="co">#&gt; NULL</span></code></pre></div>
<p>The first feature is the only one that is useful.</p>
<p>Are there any interactions between the importances? (That is if we remove X1, does the importance of X2 change? This is done by dropping pairs of featurs at a time and see how they differ from single-feature droppings)</p>
<div class="sourceCode" id="cb11"><pre class="downlit sourceCode r">
<div class="sourceCode" id="cb12"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">est_part</span><span class="op">$</span><span class="va">interaction_weights</span>
<span class="co">#&gt; NULL</span></code></pre></div>
<p>Essentially no.</p>
<p>Get the observation-level estimated treatment effects.</p>
<div class="sourceCode" id="cb12"><pre class="downlit sourceCode r">
<div class="sourceCode" id="cb13"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">tau_hat</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/stats/predict.html">predict</a></span><span class="op">(</span><span class="va">est_part</span>, new_X<span class="op">=</span><span class="va">X</span><span class="op">)</span></code></pre></div>
<p>With many estimates, we may wish to account for multiple testing when checking if “there are any negative (or positive) effects”</p>
<div class="sourceCode" id="cb13"><pre class="downlit sourceCode r">
<div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">any_neg</span> <span class="op">=</span> <span class="fu"><a href="../reference/test_any_sign_effect.html">test_any_sign_effect</a></span><span class="op">(</span><span class="va">est_part</span>, check_negative<span class="op">=</span><span class="cn">T</span><span class="op">)</span>
<span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/paste.html">paste</a></span><span class="op">(</span><span class="st">"Adjusted 1-side p-values testing if negative:"</span>, <span class="fu"><a href="https://rdrr.io/r/base/paste.html">paste</a></span><span class="op">(</span><span class="va">any_neg</span><span class="op">$</span><span class="va">pval1s_fdr</span>, collapse<span class="op">=</span><span class="st">", "</span><span class="op">)</span><span class="op">)</span><span class="op">)</span>
<span class="co">#&gt; [1] "Adjusted 1-side p-values testing if negative: 1.95040604209589e-09, 0.999999999991214"</span></code></pre></div>
<span class="co">#&gt; [1] "Adjusted 1-side p-values testing if negative: 3.37983707236465e-12, 0.999999999998588"</span></code></pre></div>
<p>Now lets look at a case where theres hereogeneity across all three dimensions.</p>
<div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
<div class="sourceCode" id="cb15"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">tau_3</span> <span class="op">=</span> <span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/integer.html">as.integer</a></span><span class="op">(</span><span class="va">X</span><span class="op">[</span>,<span class="fl">1</span><span class="op">]</span><span class="op">&gt;</span><span class="fl">0.5</span><span class="op">)</span><span class="op">*</span><span class="fl">2</span><span class="op">-</span><span class="fl">1</span><span class="op">)</span> <span class="op">+</span> <span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/integer.html">as.integer</a></span><span class="op">(</span><span class="va">X</span><span class="op">[</span>,<span class="fl">2</span><span class="op">]</span><span class="op">&gt;</span><span class="fl">0.5</span><span class="op">)</span><span class="op">*</span><span class="fl">2</span><span class="op">-</span><span class="fl">1</span><span class="op">)</span><span class="op">*</span><span class="fl">2</span> <span class="op">+</span> <span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/integer.html">as.integer</a></span><span class="op">(</span><span class="va">X</span><span class="op">[</span>,<span class="fl">3</span><span class="op">]</span><span class="op">&gt;</span><span class="fl">0.5</span><span class="op">)</span><span class="op">*</span><span class="fl">2</span><span class="op">-</span><span class="fl">1</span><span class="op">)</span><span class="op">*</span><span class="fl">3</span>
<span class="va">y_3</span> <span class="op">=</span> <span class="va">d</span><span class="op">*</span><span class="va">tau_3</span> <span class="op">+</span> <span class="fu"><a href="https://rdrr.io/r/stats/Normal.html">rnorm</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">0</span>, <span class="va">err_sd</span><span class="op">)</span>
<span class="va">est_part_3</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y_3</span>, <span class="va">X</span>, <span class="va">d</span>, breaks_per_dim<span class="op">=</span><span class="fl">5</span>, partition_i<span class="op">=</span><span class="fl">4</span><span class="op">)</span>
<span class="fu"><a href="../reference/get_desc_df.estimated_partition.html">get_desc_df.estimated_partition</a></span><span class="op">(</span><span class="va">est_part_3</span><span class="op">)</span>
<span class="fu"><a href="../reference/get_desc_df.html">get_desc_df</a></span><span class="op">(</span><span class="va">est_part_3</span><span class="op">)</span>
<span class="co">#&gt; X1 X2 X3 N_est param_ests pval</span>
<span class="co">#&gt; 1 &lt;=0.4844432 &lt;=0.5158526 &lt;=0.4956977 62 -5.9629081 4.120118e-29</span>
<span class="co">#&gt; 2 &gt;0.4844432 &lt;=0.5158526 &lt;=0.4956977 60 -3.9205417 1.021206e-21</span>
<span class="co">#&gt; 3 &lt;=0.4844432 &gt;0.5158526 &lt;=0.4956977 65 -1.8856467 1.996540e-12</span>
<span class="co">#&gt; 4 &gt;0.4844432 &gt;0.5158526 &lt;=0.4956977 64 -0.2638079 3.253642e-01</span>
<span class="co">#&gt; 5 &lt;=0.4844432 &lt;=0.5158526 &gt;0.4956977 60 0.1919869 4.593925e-01</span>
<span class="co">#&gt; 6 &gt;0.4844432 &lt;=0.5158526 &gt;0.4956977 69 1.9747946 1.272530e-09</span>
<span class="co">#&gt; 7 &lt;=0.4844432 &gt;0.5158526 &gt;0.4956977 56 4.1635551 2.560607e-21</span>
<span class="co">#&gt; 8 &gt;0.4844432 &gt;0.5158526 &gt;0.4956977 64 6.1160774 8.216963e-27</span></code></pre></div>
<span class="co">#&gt; 1 &lt;=0.4844432 &lt;=0.5158526 &lt;=0.4956977 57 -6.0701075 2.650172e-30</span>
<span class="co">#&gt; 2 &gt;0.4844432 &lt;=0.5158526 &lt;=0.4956977 59 -3.7226053 4.028245e-16</span>
<span class="co">#&gt; 3 &lt;=0.4844432 &gt;0.5158526 &lt;=0.4956977 64 -1.9451039 1.181403e-10</span>
<span class="co">#&gt; 4 &gt;0.4844432 &gt;0.5158526 &lt;=0.4956977 64 0.1690431 5.336151e-01</span>
<span class="co">#&gt; 5 &lt;=0.4844432 &lt;=0.5158526 &gt;0.4956977 63 -0.6908844 3.478647e-02</span>
<span class="co">#&gt; 6 &gt;0.4844432 &lt;=0.5158526 &gt;0.4956977 53 2.3977563 6.423076e-09</span>
<span class="co">#&gt; 7 &lt;=0.4844432 &gt;0.5158526 &gt;0.4956977 72 3.8194994 1.243479e-24</span>
<span class="co">#&gt; 8 &gt;0.4844432 &gt;0.5158526 &gt;0.4956977 68 6.1846918 6.820379e-36</span></code></pre></div>
<p>One benefit of grid-based partitions is that you can view easily view 2D slices of full heterogeneity space.</p>
<div class="sourceCode" id="cb15"><pre class="downlit sourceCode r">
<div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">plts</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/graphics/plot.html">plot</a></span><span class="op">(</span><span class="va">est_part_3</span><span class="op">)</span>
<span class="fu"><a href="https://rdrr.io/pkg/gridExtra/man/arrangeGrob.html">grid.arrange</a></span><span class="op">(</span><span class="va">plts</span><span class="op">[[</span><span class="fl">1</span><span class="op">]</span><span class="op">]</span>, <span class="va">plts</span><span class="op">[[</span><span class="fl">2</span><span class="op">]</span><span class="op">]</span>, ncol<span class="op">=</span><span class="fl">2</span><span class="op">)</span></code></pre></div>
<p><img src="vignette_files/figure-html/unnamed-chunk-15-1.png" width="576"></p>
<p><img src="vignette_files/figure-html/unnamed-chunk-16-1.png" width="576"></p>
</div>
<div id="improving-the-partition" class="section level1">
<h1 class="hasAnchor">
<a href="#improving-the-partition" class="anchor"></a>Improving the partition</h1>
<p>We can improve the partition by controlling for Xs (either local-linearly or global-flexibly) and using bootstrap “bumping”</p>
<div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
<div class="sourceCode" id="cb17"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">est_part_l</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y</span>, <span class="va">X</span>, <span class="va">d</span>, breaks_per_dim<span class="op">=</span><span class="fl">5</span>, ctrl_method <span class="op">=</span> <span class="st">"LassoCV"</span>, bump_samples <span class="op">=</span> <span class="fl">20</span>, partition_i<span class="op">=</span><span class="fl">2</span><span class="op">)</span></code></pre></div>
<p><code>LassoCV</code> is a local-linear approach and we can use the global-flexible approach by setting <code>ctrl_method="RF"</code> for a random forest.</p>
</div>
@ -199,7 +205,7 @@
<h1 class="hasAnchor">
<a href="#parallel-processing" class="anchor"></a>Parallel-processing</h1>
<p>Parallel-processing the outer-loops</p>
<div class="sourceCode" id="cb17"><pre class="downlit sourceCode r">
<div class="sourceCode" id="cb18"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="co">#library(parallel)</span>
<span class="co">#cl &lt;- makeCluster(getOption("cl.cores", default=3)) #see also detectCores()</span>
<span class="co">#fit_res = fit_estimate_partition(..., pr_cl=cl)</span>
@ -212,73 +218,73 @@
<ol style="list-style-type: decimal">
<li>Multiple outcomes, but same sample (single treatment)</li>
</ol>
<div class="sourceCode" id="cb18"><pre class="downlit sourceCode r">
<div class="sourceCode" id="cb19"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">tau2</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/integer.html">as.integer</a></span><span class="op">(</span><span class="va">X</span><span class="op">[</span>,<span class="fl">2</span><span class="op">]</span><span class="op">&gt;</span><span class="fl">0.5</span><span class="op">)</span><span class="op">*</span><span class="fl">2</span><span class="op">-</span><span class="fl">1</span>
<span class="va">y2_yM</span> <span class="op">=</span> <span class="va">d</span><span class="op">*</span><span class="va">tau2</span> <span class="op">+</span> <span class="fu"><a href="https://rdrr.io/r/stats/Normal.html">rnorm</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">0</span>, <span class="va">err_sd</span><span class="op">)</span>
<span class="va">y_yM</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/cbind.html">cbind</a></span><span class="op">(</span><span class="va">y</span>, <span class="va">y2_yM</span><span class="op">)</span>
<span class="va">est_part_yM</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y_yM</span>, <span class="va">X</span>, <span class="va">d</span>, breaks_per_dim<span class="op">=</span><span class="fl">5</span>, partition_i <span class="op">=</span> <span class="fl">3</span><span class="op">)</span>
<span class="fu"><a href="../reference/get_desc_df.estimated_partition.html">get_desc_df.estimated_partition</a></span><span class="op">(</span><span class="va">est_part_yM</span><span class="op">)</span>
<span class="fu"><a href="../reference/get_desc_df.html">get_desc_df</a></span><span class="op">(</span><span class="va">est_part_yM</span><span class="op">)</span>
<span class="co">#&gt; X1 X2 N_est1 N_est2 param_ests1 param_ests2 pval1</span>
<span class="co">#&gt; 1 &lt;=0.4844432 &lt;=0.5158526 117 117 -0.8049290 -0.7535126 6.329614e-05</span>
<span class="co">#&gt; 2 &gt;0.4844432 &lt;=0.5158526 118 118 0.8971755 -0.9804873 1.474554e-06</span>
<span class="co">#&gt; 3 &lt;=0.4844432 &gt;0.5158526 130 130 -0.6945345 0.8326545 7.255099e-05</span>
<span class="co">#&gt; 4 &gt;0.4844432 &gt;0.5158526 135 135 1.0975538 1.1039447 8.612765e-10</span>
<span class="co">#&gt; 1 &lt;=0.4844432 &lt;=0.5158526 130 130 -0.8150777 -1.0797322 1.661861e-05</span>
<span class="co">#&gt; 2 &gt;0.4844432 &lt;=0.5158526 118 118 0.7621012 -1.1433613 7.692081e-06</span>
<span class="co">#&gt; 3 &lt;=0.4844432 &gt;0.5158526 119 119 -1.0825123 0.9278330 1.023561e-08</span>
<span class="co">#&gt; 4 &gt;0.4844432 &gt;0.5158526 133 133 1.0348422 0.6970603 2.502101e-09</span>
<span class="co">#&gt; pval2</span>
<span class="co">#&gt; 1 3.570550e-06</span>
<span class="co">#&gt; 2 2.094914e-06</span>
<span class="co">#&gt; 3 3.361214e-06</span>
<span class="co">#&gt; 4 7.098278e-09</span></code></pre></div>
<span class="co">#&gt; 1 9.023729e-10</span>
<span class="co">#&gt; 2 2.241744e-07</span>
<span class="co">#&gt; 3 6.075092e-06</span>
<span class="co">#&gt; 4 1.456469e-04</span></code></pre></div>
<ol start="2" style="list-style-type: decimal">
<li>Multiple treatments, but same sample (single outcome)</li>
</ol>
<div class="sourceCode" id="cb19"><pre class="downlit sourceCode r">
<div class="sourceCode" id="cb20"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">d2</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/stats/Binomial.html">rbinom</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">1</span>, <span class="fl">0.5</span><span class="op">)</span>
<span class="va">d_dM</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/cbind.html">cbind</a></span><span class="op">(</span><span class="va">d</span>, <span class="va">d2</span><span class="op">)</span>
<span class="va">y_dM</span> <span class="op">=</span> <span class="va">d</span><span class="op">*</span><span class="va">tau</span> <span class="op">+</span> <span class="va">d2</span><span class="op">*</span><span class="va">tau2</span> <span class="op">+</span> <span class="fu"><a href="https://rdrr.io/r/stats/Normal.html">rnorm</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">0</span>, <span class="va">err_sd</span><span class="op">)</span>
<span class="va">est_part_dM</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y_dM</span>, <span class="va">X</span>, <span class="va">d_dM</span>, breaks_per_dim<span class="op">=</span><span class="fl">5</span>, partition_i <span class="op">=</span> <span class="fl">3</span><span class="op">)</span>
<span class="fu"><a href="../reference/get_desc_df.estimated_partition.html">get_desc_df.estimated_partition</a></span><span class="op">(</span><span class="va">est_part_dM</span><span class="op">)</span>
<span class="fu"><a href="../reference/get_desc_df.html">get_desc_df</a></span><span class="op">(</span><span class="va">est_part_dM</span><span class="op">)</span>
<span class="co">#&gt; X1 X2 N_est1 N_est2 param_ests1 param_ests2 pval1</span>
<span class="co">#&gt; 1 &lt;=0.4844432 &lt;=0.5158526 119 119 -0.7616140 -0.7161717 7.110389e-05</span>
<span class="co">#&gt; 2 &gt;0.4844432 &lt;=0.5158526 134 134 1.3065817 -0.8387614 8.582240e-10</span>
<span class="co">#&gt; 3 &lt;=0.4844432 &gt;0.5158526 127 127 -1.0672945 0.9263464 8.281467e-08</span>
<span class="co">#&gt; 4 &gt;0.4844432 &gt;0.5158526 120 120 0.8887389 1.2058123 1.896526e-05</span>
<span class="co">#&gt; 1 &lt;=0.4844432 &lt;=0.5158526 135 135 -0.9762579 -0.9840951 7.622683e-08</span>
<span class="co">#&gt; 2 &gt;0.4844432 &lt;=0.5158526 118 118 0.9325741 -0.7867792 5.249161e-06</span>
<span class="co">#&gt; 3 &lt;=0.4844432 &gt;0.5158526 116 116 -1.2541714 0.7402976 1.460617e-09</span>
<span class="co">#&gt; 4 &gt;0.4844432 &gt;0.5158526 131 131 1.1479998 0.8282607 1.158216e-08</span>
<span class="co">#&gt; pval2</span>
<span class="co">#&gt; 1 2.042986e-04</span>
<span class="co">#&gt; 2 3.964438e-05</span>
<span class="co">#&gt; 3 2.179978e-06</span>
<span class="co">#&gt; 4 7.681458e-09</span></code></pre></div>
<span class="co">#&gt; 1 5.822138e-08</span>
<span class="co">#&gt; 2 7.690174e-05</span>
<span class="co">#&gt; 3 1.333272e-04</span>
<span class="co">#&gt; 4 2.509196e-05</span></code></pre></div>
<ol start="3" style="list-style-type: decimal">
<li>Multiple separate samples, each having a single outcome and treatment</li>
</ol>
<div class="sourceCode" id="cb20"><pre class="downlit sourceCode r">
<div class="sourceCode" id="cb21"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">y2_MM</span> <span class="op">=</span> <span class="va">d2</span><span class="op">*</span><span class="va">tau2</span> <span class="op">+</span> <span class="fu"><a href="https://rdrr.io/r/stats/Normal.html">rnorm</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">0</span>, <span class="va">err_sd</span><span class="op">)</span>
<span class="va">y_MM</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span><span class="va">y</span>, <span class="va">y2_MM</span><span class="op">)</span>
<span class="va">d_MM</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span><span class="va">d</span>, <span class="va">d2</span><span class="op">)</span>
<span class="va">X_MM</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span><span class="va">X</span>, <span class="va">X</span><span class="op">)</span>
<span class="va">est_part_MM</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y_MM</span>, <span class="va">X_MM</span>, <span class="va">d_MM</span>, breaks_per_dim<span class="op">=</span><span class="fl">5</span>, partition_i <span class="op">=</span> <span class="fl">3</span><span class="op">)</span>
<span class="fu"><a href="../reference/get_desc_df.estimated_partition.html">get_desc_df.estimated_partition</a></span><span class="op">(</span><span class="va">est_part_MM</span><span class="op">)</span>
<span class="fu"><a href="../reference/get_desc_df.html">get_desc_df</a></span><span class="op">(</span><span class="va">est_part_MM</span><span class="op">)</span>
<span class="co">#&gt; X1 X2 N_est1 N_est2 param_ests1 param_ests2 pval1</span>
<span class="co">#&gt; 1 &lt;=0.4844432 &lt;=0.5158526 125 109 -0.9005631 -0.871670 1.149034e-05</span>
<span class="co">#&gt; 2 &gt;0.4844432 &lt;=0.5158526 118 136 0.7295372 -1.076143 9.198309e-05</span>
<span class="co">#&gt; 3 &lt;=0.4844432 &gt;0.5158526 127 125 -0.8552986 1.297816 3.734232e-07</span>
<span class="co">#&gt; 4 &gt;0.4844432 &gt;0.5158526 130 130 1.0782197 1.161637 3.924776e-08</span>
<span class="co">#&gt; 1 &lt;=0.4844432 &lt;=0.5158526 130 124 -0.9043386 -0.8583915 6.680335e-06</span>
<span class="co">#&gt; 2 &gt;0.4844432 &lt;=0.5158526 122 126 0.7282198 -1.1698914 1.392059e-05</span>
<span class="co">#&gt; 3 &lt;=0.4844432 &gt;0.5158526 128 131 -1.0385968 1.4084632 3.053106e-09</span>
<span class="co">#&gt; 4 &gt;0.4844432 &gt;0.5158526 120 119 1.0489269 0.9796365 8.551001e-08</span>
<span class="co">#&gt; pval2</span>
<span class="co">#&gt; 1 1.198057e-05</span>
<span class="co">#&gt; 2 1.846102e-08</span>
<span class="co">#&gt; 3 1.459545e-10</span>
<span class="co">#&gt; 4 1.149917e-09</span></code></pre></div>
<span class="co">#&gt; 1 2.926864e-06</span>
<span class="co">#&gt; 2 2.092628e-08</span>
<span class="co">#&gt; 3 3.320067e-12</span>
<span class="co">#&gt; 4 2.845363e-06</span></code></pre></div>
</div>
<div id="mean-outcome-prediction" class="section level1">
<h1 class="hasAnchor">
<a href="#mean-outcome-prediction" class="anchor"></a>Mean-outcome prediction</h1>
<div class="sourceCode" id="cb21"><pre class="downlit sourceCode r">
<div class="sourceCode" id="cb22"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">alpha</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/integer.html">as.integer</a></span><span class="op">(</span><span class="va">X</span><span class="op">[</span>,<span class="fl">1</span><span class="op">]</span><span class="op">&gt;</span><span class="fl">0.5</span><span class="op">)</span><span class="op">*</span><span class="fl">2</span><span class="op">-</span><span class="fl">1</span> <span class="co">#true average outcome effect (just heterogeneous across X1)</span>
<span class="va">y_y</span> <span class="op">=</span> <span class="va">alpha</span> <span class="op">+</span> <span class="fu"><a href="https://rdrr.io/r/stats/Normal.html">rnorm</a></span><span class="op">(</span><span class="va">N</span>, <span class="fl">0</span>, <span class="va">err_sd</span><span class="op">)</span> <span class="co">#outcome</span>
<span class="va">est_part_y</span> <span class="op">=</span> <span class="fu"><a href="../reference/fit_estimate_partition.html">fit_estimate_partition</a></span><span class="op">(</span><span class="va">y_y</span>, <span class="va">X</span>, breaks_per_dim<span class="op">=</span><span class="fl">5</span>, partition_i<span class="op">=</span><span class="fl">2</span><span class="op">)</span>
<span class="fu"><a href="../reference/get_desc_df.estimated_partition.html">get_desc_df.estimated_partition</a></span><span class="op">(</span><span class="va">est_part_y</span><span class="op">)</span>
<span class="fu"><a href="../reference/get_desc_df.html">get_desc_df</a></span><span class="op">(</span><span class="va">est_part_y</span><span class="op">)</span>
<span class="co">#&gt; X1 N_est param_ests pval</span>
<span class="co">#&gt; 1 &lt;=0.4844432 252 -0.9742096 4.803432e-39</span>
<span class="co">#&gt; 2 &gt;0.4844432 248 0.9288032 8.498185e-29</span></code></pre></div>
<span class="co">#&gt; 1 &lt;=0.4844432 247 -1.0304578 5.228963e-43</span>
<span class="co">#&gt; 2 &gt;0.4844432 253 0.9447789 5.803037e-32</span></code></pre></div>
</div>
<div id="minor-things-to-add" class="section level1">
<h1 class="hasAnchor">

Двоичные данные
docs/articles/vignette_files/figure-html/unnamed-chunk-16-1.png Normal file

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 31 KiB

Двоичные данные
docs/articles/vignette_files/figure-html/unnamed-chunk-6-1.png Normal file

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 18 KiB

Просмотреть файл

@ -68,9 +68,10 @@
<div id="project" class="section level1">
<div class="page-header"><h1 class="hasAnchor">
<a href="#project" class="anchor"></a>Project</h1></div>
<p>Tools for finding heterogeneous treatment effects (and means) based on partitioning the covariate/feature space via full cross-cuts and solved via greedy search. A typical usage would be analyzing and experiment to find the high-level subgroups (a coarse partition that is useful to humans) that differ in their estimated treatment effects.</p>
<p>Tools for finding heterogeneous treatment effects (and means) based on partitioning the covariate/feature space via full cross-cuts and solved via greedy search. A typical usage would be analyzing an experiment to find the high-level subgroups (a coarse partition that is useful to humans) that differ in their estimated treatment effects.</p>
<p>This package is inspired by, and uses ideas from, <a href="https://github.com/susanathey/causalTree">Causal Tree</a> but aims to have the partition be more interpretable and have better accuracy. It is slower, though for high-level partitions this is usually not an issue.</p>
<p>This project is currently in an advanced prototype stage. Issues may still be found in common usage. Please create issues for these!</p>
<p>Documentation can be found online <a href="https://microsoft.github.io/CausalGrid/index.html">here</a> (and in the package).</p>
<div id="contributing" class="section level2">
<h2 class="hasAnchor">
<a href="#contributing" class="anchor"></a>Contributing</h2>

Просмотреть файл

@ -3,5 +3,5 @@ pkgdown: 1.6.1.9000
pkgdown_sha: 84f234cf19153ce99bf1bcf875dc56248ddf0cbe
articles:
vignette: vignette.html
last_built: 2021-01-26T16:10Z
last_built: 2021-01-30T02:16Z

Просмотреть файл

@ -159,6 +159,14 @@ should be normalized to have the same variance. With multiple core estimates see
<th>partition_i</th>
<td><p>partition_i - 1 is the last include in split_seq included in new partition</p></td>
</tr>
<tr>
<th>index_tr</th>
<td><p>Split between train and estimate samples (default is to get from <code>fit</code>)</p></td>
</tr>
<tr>
<th>split_seq</th>
<td><p>sequential list of splits (default is to get from <code>fit</code>)</p></td>
</tr>
<tr>
<th>est_plan</th>
<td><p><a href='EstimatorPlan.html'>EstimatorPlan</a>.</p></td>

Просмотреть файл

@ -137,7 +137,7 @@
</tr>
<tr>
<th>...</th>
<td></td>
<td><p>Additional arguments</p></td>
</tr>
</table>

Просмотреть файл

@ -254,6 +254,10 @@ single - (smart) redo full fitting removing each possible dimension
interaction - (smart) redo full fitting removing each pair of dimensions
"" - Nothing</p></td>
</tr>
<tr>
<th>...</th>
<td><p>Additional params.</p></td>
</tr>
<tr>
<th>x</th>
<td><p>an R object</p></td>

Просмотреть файл

@ -249,6 +249,10 @@ If m_mode==DS.MULTI_SAMPLE then each item is a sublist with such bootstrap sampl
<th>bump_ratio</th>
<td><p>For bootstraps the ratio of sample size to sample (between 0 and 1, default 1)</p></td>
</tr>
<tr>
<th>...</th>
<td><p>Additional params.</p></td>
</tr>
</table>
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>

Просмотреть файл

@ -6,7 +6,7 @@
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Get descriptive data.frame for an estimated_partition — get_desc_df.estimated_partition • CausalGrid</title>
<title>Get descriptive data.frame — get_desc_df.estimated_partition • CausalGrid</title>
<!-- jquery -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script>
@ -36,9 +36,9 @@
<meta property="og:title" content="Get descriptive data.frame for an estimated_partition — get_desc_df.estimated_partition" />
<meta property="og:title" content="Get descriptive data.frame — get_desc_df.estimated_partition" />
<meta property="og:description" content="Get statistics for each cell (feature boundary, and estimated cell stats)" />
<meta property="og:description" content="Get information for each cell" />
@ -111,23 +111,27 @@
<div class="row">
<div class="col-md-9 contents">
<div class="page-header">
<h1>Get descriptive data.frame for an estimated_partition</h1>
<h1>Get descriptive data.frame</h1>
<div class="hidden name"><code>get_desc_df.estimated_partition.Rd</code></div>
</div>
<div class="ref-description">
<p>Get statistics for each cell (feature boundary, and estimated cell stats)</p>
<p>Get information for each cell</p>
</div>
<pre class="usage"><span class='fu'>get_desc_df.estimated_partition</span><span class='op'>(</span>
<pre class="usage"><span class='co'># S3 method for estimated_partition</span>
<span class='fu'><a href='get_desc_df.html'>get_desc_df</a></span><span class='op'>(</span>
<span class='va'>obj</span>,
cont_bounds_inf <span class='op'>=</span> <span class='cn'>TRUE</span>,
do_str <span class='op'>=</span> <span class='cn'>TRUE</span>,
drop_unsplit <span class='op'>=</span> <span class='cn'>TRUE</span>,
digits <span class='op'>=</span> <span class='cn'>NULL</span>,
import_order <span class='op'>=</span> <span class='cn'>FALSE</span>
unsplit_cat_star <span class='op'>=</span> <span class='cn'>TRUE</span>,
import_order <span class='op'>=</span> <span class='cn'>FALSE</span>,
<span class='va'>...</span>
<span class='op'>)</span></pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
@ -135,30 +139,48 @@
<colgroup><col class="name" /><col class="desc" /></colgroup>
<tr>
<th>obj</th>
<td><p>estimated_partition object</p></td>
<td><p>partition object</p></td>
</tr>
<tr>
<th>cont_bounds_inf</th>
<td><p>Should "Inf" be used for continuous bounds (otherwise
the bounds from X_range)</p></td>
</tr>
<tr>
<th>do_str</th>
<td><p>If True, use a string like "(a, b]", otherwise have two separate columns with a and b</p></td>
<td><p>If True, use a string like "(a, b]", otherwise have two
separate columns with a and b</p></td>
</tr>
<tr>
<th>drop_unsplit</th>
<td><p>If True, drop columns for variables overwhich the partition did not split</p></td>
<td><p>If True, drop columns for variables overwhich the
partition did not split</p></td>
</tr>
<tr>
<th>digits</th>
<td><p>digits Option (default is NULL)</p></td>
</tr>
<tr>
<th>unsplit_cat_star</th>
<td><p>Should unsplit categorical variables be listed as
"*", otherwise all factor labels will be used.</p></td>
</tr>
<tr>
<th>import_order</th>
<td><p>Should we use importance ordering (most important on the left) or input ordering (default) for features.
Rows will be ordered so that the right-most will change most frequently.</p></td>
<td><p>Whether should use importance ordering
(most important on the left) or input ordering (default) for features. Rows
will be ordered so that the right-most will change most frequently.</p></td>
</tr>
<tr>
<th>...</th>
<td><p>Additional arguments.</p></td>
</tr>
</table>
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
<p>data.frame with columns: partitionin columns, N_est, param_ests, pval per estimate</p>
<p>data.frame with columns: partitioning columns, N_est, param_ests,
pval per estimate</p>
</div>

Просмотреть файл

@ -6,7 +6,7 @@
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Get descriptive data.frame for grid_partition — get_desc_df.grid_partition • CausalGrid</title>
<title>Get descriptive data.frame — get_desc_df.grid_partition • CausalGrid</title>
<!-- jquery -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script>
@ -36,9 +36,9 @@
<meta property="og:title" content="Get descriptive data.frame for grid_partition — get_desc_df.grid_partition" />
<meta property="og:title" content="Get descriptive data.frame — get_desc_df.grid_partition" />
<meta property="og:description" content="A dataset with rows for each cell and columns defining partitioning" />
<meta property="og:description" content="Get information for each cell" />
@ -111,58 +111,68 @@
<div class="row">
<div class="col-md-9 contents">
<div class="page-header">
<h1>Get descriptive data.frame for grid_partition</h1>
<h1>Get descriptive data.frame</h1>
<div class="hidden name"><code>get_desc_df.grid_partition.Rd</code></div>
</div>
<div class="ref-description">
<p>A dataset with rows for each cell and columns defining partitioning</p>
<p>Get information for each cell</p>
</div>
<pre class="usage"><span class='fu'>get_desc_df.grid_partition</span><span class='op'>(</span>
<span class='va'>partition</span>,
<pre class="usage"><span class='co'># S3 method for grid_partition</span>
<span class='fu'><a href='get_desc_df.html'>get_desc_df</a></span><span class='op'>(</span>
<span class='va'>obj</span>,
cont_bounds_inf <span class='op'>=</span> <span class='cn'>TRUE</span>,
do_str <span class='op'>=</span> <span class='cn'>FALSE</span>,
drop_unsplit <span class='op'>=</span> <span class='cn'>FALSE</span>,
digits <span class='op'>=</span> <span class='cn'>NULL</span>,
unsplit_cat_star <span class='op'>=</span> <span class='cn'>TRUE</span>
unsplit_cat_star <span class='op'>=</span> <span class='cn'>TRUE</span>,
<span class='va'>...</span>
<span class='op'>)</span></pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
<table class="ref-arguments">
<colgroup><col class="name" /><col class="desc" /></colgroup>
<tr>
<th>partition</th>
<td><p>Partition</p></td>
<th>obj</th>
<td><p>partition object</p></td>
</tr>
<tr>
<th>cont_bounds_inf</th>
<td><p>If True, will put continuous bounds as -Inf/Inf. Otherwise will use X_range bounds</p></td>
<td><p>Should "Inf" be used for continuous bounds (otherwise
the bounds from X_range)</p></td>
</tr>
<tr>
<th>do_str</th>
<td><p>If True, use a string like "(a, b]", otherwise have two separate columns with a and b</p></td>
<td><p>If True, use a string like "(a, b]", otherwise have two
separate columns with a and b</p></td>
</tr>
<tr>
<th>drop_unsplit</th>
<td><p>If True, drop columns for variables overwhich the partition did not split</p></td>
<td><p>If True, drop columns for variables overwhich the
partition did not split</p></td>
</tr>
<tr>
<th>digits</th>
<td><p>digits option</p></td>
<td><p>digits Option (default is NULL)</p></td>
</tr>
<tr>
<th>unsplit_cat_star</th>
<td><p>if we don't split on a categorical var, should we show as "*" (otherwise list all levels)</p></td>
<td><p>Should unsplit categorical variables be listed as
"*", otherwise all factor labels will be used.</p></td>
</tr>
<tr>
<th>...</th>
<td><p>Additional arguments.</p></td>
</tr>
</table>
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
<p>data.frame</p>
<p>data.frame with columns: partitioning columns</p>
</div>

Просмотреть файл

@ -0,0 +1,204 @@
<!-- Generated by pkgdown: do not edit by hand -->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Get descriptive data.frame — get_desc_df • CausalGrid</title>
<!-- jquery -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script>
<!-- Bootstrap -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous" />
<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script>
<!-- bootstrap-toc -->
<link rel="stylesheet" href="../bootstrap-toc.css">
<script src="../bootstrap-toc.js"></script>
<!-- Font Awesome icons -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous" />
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous" />
<!-- clipboard.js -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script>
<!-- headroom.js -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script>
<!-- pkgdown -->
<link href="../pkgdown.css" rel="stylesheet">
<script src="../pkgdown.js"></script>
<meta property="og:title" content="Get descriptive data.frame — get_desc_df" />
<meta property="og:description" content="Get information for each cell" />
<!-- mathjax -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script>
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body data-spy="scroll" data-target="#toc">
<div class="container template-reference-topic">
<header>
<div class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">CausalGrid</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.2</span>
</span>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li>
<a href="../reference/index.html">Reference</a>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
Articles
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="../articles/vignette.html">High-Level Analysis</a>
</li>
</ul>
</li>
</ul>
<ul class="nav navbar-nav navbar-right">
</ul>
</div><!--/.nav-collapse -->
</div><!--/.container -->
</div><!--/.navbar -->
</header>
<div class="row">
<div class="col-md-9 contents">
<div class="page-header">
<h1>Get descriptive data.frame</h1>
<div class="hidden name"><code>get_desc_df.Rd</code></div>
</div>
<div class="ref-description">
<p>Get information for each cell</p>
</div>
<pre class="usage"><span class='fu'>get_desc_df</span><span class='op'>(</span>
<span class='va'>obj</span>,
cont_bounds_inf <span class='op'>=</span> <span class='cn'>TRUE</span>,
do_str <span class='op'>=</span> <span class='cn'>FALSE</span>,
drop_unsplit <span class='op'>=</span> <span class='cn'>FALSE</span>,
digits <span class='op'>=</span> <span class='cn'>NULL</span>,
unsplit_cat_star <span class='op'>=</span> <span class='cn'>TRUE</span>,
<span class='va'>...</span>
<span class='op'>)</span></pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
<table class="ref-arguments">
<colgroup><col class="name" /><col class="desc" /></colgroup>
<tr>
<th>obj</th>
<td><p>partition object</p></td>
</tr>
<tr>
<th>cont_bounds_inf</th>
<td><p>Should "Inf" be used for continuous bounds (otherwise
the bounds from X_range)</p></td>
</tr>
<tr>
<th>do_str</th>
<td><p>If True, use a string like "(a, b]", otherwise have two
separate columns with a and b</p></td>
</tr>
<tr>
<th>drop_unsplit</th>
<td><p>If True, drop columns for variables overwhich the
partition did not split</p></td>
</tr>
<tr>
<th>digits</th>
<td><p>digits Option (default is NULL)</p></td>
</tr>
<tr>
<th>unsplit_cat_star</th>
<td><p>Should unsplit categorical variables be listed as
"*", otherwise all factor labels will be used.</p></td>
</tr>
<tr>
<th>...</th>
<td><p>Additional arguments.</p></td>
</tr>
</table>
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
<p>data.frame with columns: partitioning columns</p>
</div>
<div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
<nav id="toc" data-toggle="toc" class="sticky-top">
<h2 data-toc-skip>Contents</h2>
</nav>
</div>
</div>
<footer>
<div class="copyright">
<p>Developed by Brian Quistorff.</p>
</div>
<div class="pkgdown">
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.6.1.9000.</p>
</div>
</footer>
</div>
</body>
</html>

Просмотреть файл

@ -39,7 +39,7 @@
<meta property="og:title" content="Create a null grid_partition — grid_partition" />
<meta property="og:description" content="Create a empty partition. Splits can be added using add_partition_split.
Information about a split can be retrieved using num_cells, get_desc_df.grid_partition and print
Information about a split can be retrieved using num_cells, get_desc_df and print
With data, one can determine the cell for each observation using predict
Test whether an object is an grid_function" />
@ -114,7 +114,7 @@ Test whether an object is an grid_function" />
<div class="row">
<div class="col-md-9 contents">
<div class="page-header">
<h1>Create a null grid_partition</h1>
<h1>Create a null <code>grid_partition</code></h1>
<div class="hidden name"><code>grid_partition.Rd</code></div>
</div>
@ -122,7 +122,7 @@ Test whether an object is an grid_function" />
<div class="ref-description">
<p>Create a empty partition. Splits can be added using <code><a href='add_partition_split.html'>add_partition_split</a></code>.
Information about a split can be retrieved using <code><a href='num_cells.estimated_partition.html'>num_cells</a></code>, <code><a href='get_desc_df.grid_partition.html'>get_desc_df.grid_partition</a></code> and <code><a href='https://rdrr.io/r/base/print.html'>print</a></code>
Information about a split can be retrieved using <code><a href='num_cells.estimated_partition.html'>num_cells</a></code>, <code><a href='get_desc_df.html'>get_desc_df</a></code> and <code><a href='https://rdrr.io/r/base/print.html'>print</a></code>
With data, one can determine the cell for each observation using <code><a href='https://rdrr.io/r/stats/predict.html'>predict</a></code></p>
<p>Test whether an object is an <code>grid_function</code></p>

Просмотреть файл

@ -150,12 +150,6 @@
<td><p>Change the complexity of a fit_estimate_partition</p></td>
</tr><tr>
<td>
<p><code><a href="const_vectr.html">const_vectr()</a></code> </p>
</td>
<td><p>inline', second just Rcpp), but couldn't get to work in building a package.</p></td>
</tr><tr>
<td>
<p><code><a href="estimated_partition.html">estimated_partition()</a></code> </p>
</td>
@ -205,15 +199,21 @@
</tr><tr>
<td>
<p><code><a href="get_desc_df.estimated_partition.html">get_desc_df.estimated_partition()</a></code> </p>
<p><code><a href="get_desc_df.estimated_partition.html">get_desc_df(<i>&lt;estimated_partition&gt;</i>)</a></code> </p>
</td>
<td><p>Get descriptive data.frame for an estimated_partition</p></td>
<td><p>Get descriptive data.frame</p></td>
</tr><tr>
<td>
<p><code><a href="get_desc_df.grid_partition.html">get_desc_df.grid_partition()</a></code> </p>
<p><code><a href="get_desc_df.grid_partition.html">get_desc_df(<i>&lt;grid_partition&gt;</i>)</a></code> </p>
</td>
<td><p>Get descriptive data.frame for grid_partition</p></td>
<td><p>Get descriptive data.frame</p></td>
</tr><tr>
<td>
<p><code><a href="get_desc_df.html">get_desc_df()</a></code> </p>
</td>
<td><p>Get descriptive data.frame</p></td>
</tr><tr>
<td>
@ -237,13 +237,7 @@
<td>
<p><code><a href="grid_partition.html">grid_partition()</a></code> <code><a href="grid_partition.html">is_grid_partition()</a></code> </p>
</td>
<td><p>Create a null grid_partition</p></td>
</tr><tr>
<td>
<p><code><a href="grid_partition_split.html">is_grid_partition_split()</a></code> </p>
</td>
<td><p>Is grid_partition_split</p></td>
<td><p>Create a null <code>grid_partition</code></p></td>
</tr><tr>
<td>
@ -265,7 +259,7 @@
</tr><tr>
<td>
<p><code><a href="partition_split.html">partition_split()</a></code> </p>
<p><code><a href="partition_split.html">partition_split()</a></code> <code><a href="partition_split.html">is_partition_split()</a></code> </p>
</td>
<td><p>Create partition_split</p></td>
</tr><tr>

Просмотреть файл

@ -38,7 +38,8 @@
<meta property="og:title" content="Create partition_split — partition_split" />
<meta property="og:description" content="Describes a single partition split. Used with add_partition_split." />
<meta property="og:description" content="Describes a single partition split. Used with add_partition_split.
Tests whether or not an object is a partition_split." />
@ -119,10 +120,13 @@
<div class="ref-description">
<p>Describes a single partition split. Used with <code><a href='add_partition_split.html'>add_partition_split</a></code>.</p>
<p>Tests whether or not an object is a <code>partition_split</code>.</p>
</div>
<pre class="usage"><span class='fu'>partition_split</span><span class='op'>(</span><span class='va'>k</span>, <span class='va'>X_k_cut</span><span class='op'>)</span></pre>
<pre class="usage"><span class='fu'>partition_split</span><span class='op'>(</span><span class='va'>k</span>, <span class='va'>X_k_cut</span><span class='op'>)</span>
<span class='fu'>is_partition_split</span><span class='op'>(</span><span class='va'>x</span><span class='op'>)</span></pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
<table class="ref-arguments">
@ -135,11 +139,23 @@
<th>X_k_cut</th>
<td><p>cut value</p></td>
</tr>
<tr>
<th>x</th>
<td><p>an R object</p></td>
</tr>
</table>
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
<p>Partition Split</p>
<p>Boolean</p>
<h2 class="hasAnchor" id="functions"><a class="anchor" href="#functions"></a>Functions</h2>
<ul>
<li><p><code>is_partition_split</code>: is partition_split</p></li>
</ul>
</div>

Просмотреть файл

@ -123,19 +123,23 @@
</div>
<pre class="usage"><span class='co'># S3 method for estimated_partition</span>
<span class='fu'><a href='https://rdrr.io/r/graphics/plot.html'>plot</a></span><span class='op'>(</span><span class='va'>grid_fit</span>, X_names_2D <span class='op'>=</span> <span class='cn'>NULL</span><span class='op'>)</span></pre>
<span class='fu'><a href='https://rdrr.io/r/graphics/plot.html'>plot</a></span><span class='op'>(</span><span class='va'>x</span>, X_names_2D <span class='op'>=</span> <span class='cn'>NULL</span>, <span class='va'>...</span><span class='op'>)</span></pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
<table class="ref-arguments">
<colgroup><col class="name" /><col class="desc" /></colgroup>
<tr>
<th>grid_fit</th>
<th>x</th>
<td><p>grid_fit</p></td>
</tr>
<tr>
<th>X_names_2D</th>
<td><p>X_names_2D</p></td>
</tr>
<tr>
<th>...</th>
<td><p>Additional arguments. Unused.</p></td>
</tr>
</table>
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>

Просмотреть файл

@ -123,13 +123,13 @@
</div>
<pre class="usage"><span class='co'># S3 method for estimated_partition</span>
<span class='fu'><a href='https://rdrr.io/r/stats/predict.html'>predict</a></span><span class='op'>(</span><span class='va'>obj</span>, <span class='va'>new_X</span>, new_d <span class='op'>=</span> <span class='cn'>NULL</span>, type <span class='op'>=</span> <span class='st'>"effect"</span><span class='op'>)</span></pre>
<span class='fu'><a href='https://rdrr.io/r/stats/predict.html'>predict</a></span><span class='op'>(</span><span class='va'>object</span>, <span class='va'>new_X</span>, new_d <span class='op'>=</span> <span class='cn'>NULL</span>, type <span class='op'>=</span> <span class='st'>"effect"</span>, <span class='va'>...</span><span class='op'>)</span></pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
<table class="ref-arguments">
<colgroup><col class="name" /><col class="desc" /></colgroup>
<tr>
<th>obj</th>
<th>object</th>
<td><p>estimated_partition object</p></td>
</tr>
<tr>
@ -144,6 +144,10 @@
<th>type</th>
<td><p>"effect" or "outcome" (currently not implemented)</p></td>
</tr>
<tr>
<th>...</th>
<td><p>Additional arguments. Unused.</p></td>
</tr>
</table>
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>

Просмотреть файл

@ -125,11 +125,15 @@ then we will return NA unless you provide and updated X_range.</p>
</div>
<pre class="usage"><span class='co'># S3 method for grid_partition</span>
<span class='fu'><a href='https://rdrr.io/r/stats/predict.html'>predict</a></span><span class='op'>(</span><span class='va'>obj</span>, <span class='va'>X</span>, X_range <span class='op'>=</span> <span class='cn'>NULL</span><span class='op'>)</span></pre>
<span class='fu'><a href='https://rdrr.io/r/stats/predict.html'>predict</a></span><span class='op'>(</span><span class='va'>object</span>, <span class='va'>X</span>, X_range <span class='op'>=</span> <span class='cn'>NULL</span>, <span class='va'>...</span><span class='op'>)</span></pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
<table class="ref-arguments">
<colgroup><col class="name" /><col class="desc" /></colgroup>
<tr>
<th>object</th>
<td><p>partition</p></td>
</tr>
<tr>
<th>X</th>
<td><p>X data or list of X</p></td>
@ -139,8 +143,8 @@ then we will return NA unless you provide and updated X_range.</p>
<td><p>(Optional) overrides the partition$X_range</p></td>
</tr>
<tr>
<th>partition</th>
<td><p>partition</p></td>
<th>...</th>
<td><p>Additional arguments. Unused.</p></td>
</tr>
</table>

Просмотреть файл

@ -38,7 +38,7 @@
<meta property="og:title" content="Print estimated_partition — print.estimated_partition" />
<meta property="og:description" content="Print a summary of the estimated partition. Uses get_desc_df.estimated_partition" />
<meta property="og:description" content="Print a summary of the estimated partition. Uses get_desc_df" />
@ -118,7 +118,7 @@
<div class="ref-description">
<p>Print a summary of the estimated partition. Uses <code><a href='get_desc_df.estimated_partition.html'>get_desc_df.estimated_partition</a></code></p>
<p>Print a summary of the estimated partition. Uses <code><a href='get_desc_df.html'>get_desc_df</a></code></p>
</div>
@ -141,11 +141,13 @@
</tr>
<tr>
<th>do_str</th>
<td><p>If True, use a string like "(a, b]", otherwise have two separate columns with a and b</p></td>
<td><p>If True, use a string like "(a, b]", otherwise have two
separate columns with a and b</p></td>
</tr>
<tr>
<th>drop_unsplit</th>
<td><p>If True, drop columns for variables overwhich the partition did not split</p></td>
<td><p>If True, drop columns for variables overwhich the
partition did not split</p></td>
</tr>
<tr>
<th>digits</th>
@ -153,8 +155,8 @@
</tr>
<tr>
<th>import_order</th>
<td><p>Should we use importance ordering (most important on the left) or input ordering (default) for features.
Rows will be ordered so that the right-most will change most frequently.</p></td>
<td><p>Whether should use importance ordering
(most important on the left) or input ordering (default) for features.</p></td>
</tr>
<tr>
<th>...</th>

Просмотреть файл

@ -27,6 +27,10 @@ should be normalized to have the same variance. With multiple core estimates see
\item{partition_i}{partition_i - 1 is the last include in split_seq included in new partition}
\item{index_tr}{Split between train and estimate samples (default is to get from \code{fit})}
\item{split_seq}{sequential list of splits (default is to get from \code{fit})}
\item{est_plan}{\link{EstimatorPlan}.}
}
\value{

Просмотреть файл

@ -1,11 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{const_vectr}
\alias{const_vectr}
\title{inline', second just Rcpp), but couldn't get to work in building a package.}
\usage{
const_vectr(x)
}
\description{
inline', second just Rcpp), but couldn't get to work in building a package.
}

Просмотреть файл

@ -11,7 +11,7 @@ estimated_partition(partition, cell_stats, ...)
\item{cell_stats}{cell_stats}
\item{...}{}
\item{...}{Additional arguments}
}
\value{
object of class estimated_partition

Просмотреть файл

@ -93,6 +93,8 @@ single - (smart) redo full fitting removing each possible dimension
interaction - (smart) redo full fitting removing each pair of dimensions
"" - Nothing}
\item{...}{Additional params.}
\item{x}{an R object}
}
\value{

Просмотреть файл

@ -86,6 +86,8 @@ for treated and controls.}
If m_mode==DS.MULTI_SAMPLE then each item is a sublist with such bootstrap samples over each dataset.}
\item{bump_ratio}{For bootstraps the ratio of sample size to sample (between 0 and 1, default 1)}
\item{...}{Additional params.}
}
\value{
An object.

41
man/get_desc_df.Rd Normal file
Просмотреть файл

@ -0,0 +1,41 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{get_desc_df}
\alias{get_desc_df}
\title{Get descriptive data.frame}
\usage{
get_desc_df(
obj,
cont_bounds_inf = TRUE,
do_str = FALSE,
drop_unsplit = FALSE,
digits = NULL,
unsplit_cat_star = TRUE,
...
)
}
\arguments{
\item{obj}{partition object}
\item{cont_bounds_inf}{Should "Inf" be used for continuous bounds (otherwise
the bounds from X_range)}
\item{do_str}{If True, use a string like "(a, b]", otherwise have two
separate columns with a and b}
\item{drop_unsplit}{If True, drop columns for variables overwhich the
partition did not split}
\item{digits}{digits Option (default is NULL)}
\item{unsplit_cat_star}{Should unsplit categorical variables be listed as
"*", otherwise all factor labels will be used.}
\item{...}{Additional arguments.}
}
\value{
data.frame with columns: partitioning columns
}
\description{
Get information for each cell
}

Просмотреть файл

@ -2,31 +2,46 @@
% Please edit documentation in R/fit_estimate.R
\name{get_desc_df.estimated_partition}
\alias{get_desc_df.estimated_partition}
\title{Get descriptive data.frame for an estimated_partition}
\title{Get descriptive data.frame}
\usage{
get_desc_df.estimated_partition(
\method{get_desc_df}{estimated_partition}(
obj,
cont_bounds_inf = TRUE,
do_str = TRUE,
drop_unsplit = TRUE,
digits = NULL,
import_order = FALSE
unsplit_cat_star = TRUE,
import_order = FALSE,
...
)
}
\arguments{
\item{obj}{estimated_partition object}
\item{obj}{partition object}
\item{do_str}{If True, use a string like "(a, b]", otherwise have two separate columns with a and b}
\item{cont_bounds_inf}{Should "Inf" be used for continuous bounds (otherwise
the bounds from X_range)}
\item{drop_unsplit}{If True, drop columns for variables overwhich the partition did not split}
\item{do_str}{If True, use a string like "(a, b]", otherwise have two
separate columns with a and b}
\item{drop_unsplit}{If True, drop columns for variables overwhich the
partition did not split}
\item{digits}{digits Option (default is NULL)}
\item{import_order}{Should we use importance ordering (most important on the left) or input ordering (default) for features.
Rows will be ordered so that the right-most will change most frequently.}
\item{unsplit_cat_star}{Should unsplit categorical variables be listed as
"*", otherwise all factor labels will be used.}
\item{import_order}{Whether should use importance ordering
(most important on the left) or input ordering (default) for features. Rows
will be ordered so that the right-most will change most frequently.}
\item{...}{Additional arguments.}
}
\value{
data.frame with columns: partitionin columns, {N_est, param_ests, pval} per estimate
data.frame with columns: partitioning columns, {N_est, param_ests,
pval} per estimate
}
\description{
Get statistics for each cell (feature boundary, and estimated cell stats)
Get information for each cell
}

Просмотреть файл

@ -2,33 +2,40 @@
% Please edit documentation in R/grid_partition.R
\name{get_desc_df.grid_partition}
\alias{get_desc_df.grid_partition}
\title{Get descriptive data.frame for grid_partition}
\title{Get descriptive data.frame}
\usage{
get_desc_df.grid_partition(
partition,
\method{get_desc_df}{grid_partition}(
obj,
cont_bounds_inf = TRUE,
do_str = FALSE,
drop_unsplit = FALSE,
digits = NULL,
unsplit_cat_star = TRUE
unsplit_cat_star = TRUE,
...
)
}
\arguments{
\item{partition}{Partition}
\item{obj}{partition object}
\item{cont_bounds_inf}{If True, will put continuous bounds as -Inf/Inf. Otherwise will use X_range bounds}
\item{cont_bounds_inf}{Should "Inf" be used for continuous bounds (otherwise
the bounds from X_range)}
\item{do_str}{If True, use a string like "(a, b]", otherwise have two separate columns with a and b}
\item{do_str}{If True, use a string like "(a, b]", otherwise have two
separate columns with a and b}
\item{drop_unsplit}{If True, drop columns for variables overwhich the partition did not split}
\item{drop_unsplit}{If True, drop columns for variables overwhich the
partition did not split}
\item{digits}{digits option}
\item{digits}{digits Option (default is NULL)}
\item{unsplit_cat_star}{if we don't split on a categorical var, should we show as "*" (otherwise list all levels)}
\item{unsplit_cat_star}{Should unsplit categorical variables be listed as
"*", otherwise all factor labels will be used.}
\item{...}{Additional arguments.}
}
\value{
data.frame
data.frame with columns: partitioning columns
}
\description{
A dataset with rows for each cell and columns defining partitioning
Get information for each cell
}

Просмотреть файл

@ -3,7 +3,7 @@
\name{grid_partition}
\alias{grid_partition}
\alias{is_grid_partition}
\title{Create a null grid_partition}
\title{Create a null \code{grid_partition}}
\usage{
grid_partition(X_range, varnames = NULL)
@ -23,7 +23,7 @@ True if x is a grid_partition
}
\description{
Create a empty partition. Splits can be added using \code{\link{add_partition_split}}.
Information about a split can be retrieved using \code{\link{num_cells}}, \code{\link{get_desc_df.grid_partition}} and \code{\link{print}}
Information about a split can be retrieved using \code{\link{num_cells}}, \code{\link{get_desc_df}} and \code{\link{print}}
With data, one can determine the cell for each observation using \code{\link{predict}}
Test whether an object is an \code{grid_function}

Просмотреть файл

@ -1,22 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/grid_partition.R
\name{is_grid_partition_split}
\alias{is_grid_partition_split}
\title{Is grid_partition_split}
\usage{
is_grid_partition_split(x)
}
\arguments{
\item{x}{an R object}
}
\value{
Boolean
}
\description{
Tests whether or not an object is a \code{partition_split}.
}
\section{Functions}{
\itemize{
\item \code{is_grid_partition_split}: is grid_partition_split
}}

Просмотреть файл

@ -2,18 +2,32 @@
% Please edit documentation in R/grid_partition.R
\name{partition_split}
\alias{partition_split}
\alias{is_partition_split}
\title{Create partition_split}
\usage{
partition_split(k, X_k_cut)
is_partition_split(x)
}
\arguments{
\item{k}{dimension}
\item{X_k_cut}{cut value}
\item{x}{an R object}
}
\value{
Partition Split
Boolean
}
\description{
Describes a single partition split. Used with \code{\link{add_partition_split}}.
Tests whether or not an object is a \code{partition_split}.
}
\section{Functions}{
\itemize{
\item \code{is_partition_split}: is partition_split
}}

Просмотреть файл

@ -4,12 +4,14 @@
\alias{plot.estimated_partition}
\title{Create 2D plots of parameter estimates}
\usage{
\method{plot}{estimated_partition}(grid_fit, X_names_2D = NULL)
\method{plot}{estimated_partition}(x, X_names_2D = NULL, ...)
}
\arguments{
\item{grid_fit}{grid_fit}
\item{x}{grid_fit}
\item{X_names_2D}{X_names_2D}
\item{...}{Additional arguments. Unused.}
}
\value{
ggplot2 object or list of such objects

Просмотреть файл

@ -4,16 +4,18 @@
\alias{predict.estimated_partition}
\title{Generate predicted estimates per observations}
\usage{
\method{predict}{estimated_partition}(obj, new_X, new_d = NULL, type = "effect")
\method{predict}{estimated_partition}(object, new_X, new_d = NULL, type = "effect", ...)
}
\arguments{
\item{obj}{estimated_partition object}
\item{object}{estimated_partition object}
\item{new_X}{new X}
\item{new_d}{new d. Required for type="outcome"}
\item{type}{"effect" or "outcome" (currently not implemented)}
\item{...}{Additional arguments. Unused.}
}
\value{
predicted treatment effect

Просмотреть файл

@ -4,14 +4,16 @@
\alias{predict.grid_partition}
\title{Get factor describing cell number fo each observation}
\usage{
\method{predict}{grid_partition}(obj, X, X_range = NULL)
\method{predict}{grid_partition}(object, X, X_range = NULL, ...)
}
\arguments{
\item{obj}{partition}
\item{object}{partition}
\item{X}{X data or list of X}
\item{X_range}{(Optional) overrides the partition$X_range}
\item{...}{Additional arguments. Unused.}
}
\value{
Factor

Просмотреть файл

@ -16,14 +16,16 @@
\arguments{
\item{x}{estimated_partition object}
\item{do_str}{If True, use a string like "(a, b]", otherwise have two separate columns with a and b}
\item{do_str}{If True, use a string like "(a, b]", otherwise have two
separate columns with a and b}
\item{drop_unsplit}{If True, drop columns for variables overwhich the partition did not split}
\item{drop_unsplit}{If True, drop columns for variables overwhich the
partition did not split}
\item{digits}{digits Option (default is NULL)}
\item{import_order}{Should we use importance ordering (most important on the left) or input ordering (default) for features.
Rows will be ordered so that the right-most will change most frequently.}
\item{import_order}{Whether should use importance ordering
(most important on the left) or input ordering (default) for features.}
\item{...}{Additional arguments. These will be passed to print.data.frame}
}
@ -31,5 +33,5 @@ Rows will be ordered so that the right-most will change most frequently.}
string (and displayed)
}
\description{
Print a summary of the estimated partition. Uses \code{\link{get_desc_df.estimated_partition}}
Print a summary of the estimated partition. Uses \code{\link{get_desc_df}}
}

6
renv/settings.dcf.bak Normal file
Просмотреть файл

@ -0,0 +1,6 @@
external.libraries: C:\Program Files\Microsoft\R Open\R-3.5.3\library
ignored.packages:
package.dependency.fields: Imports, Depends, LinkingTo
snapshot.type: implicit
use.cache: TRUE
vcs.ignore.library: TRUE

Просмотреть файл

@ -12,7 +12,8 @@ vignette: >
```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
comment = "#>",
dev = "ragg_png"
)
```
@ -39,6 +40,11 @@ tau = as.integer(X[,1]>.5)*2-1 #true treatment effect (just heterogeneous across
y = d*tau + rnorm(N, 0, err_sd) #outcome
```
```{r}
est_part0 = fit_estimate_partition(y, X, d, cv_folds=2)
get_desc_df(est_part0)
```
We typically want a high-level partition for "human" consumption. To save time, avoid cells with too few observations, and reduce the chance of splitting from running many noisy tests, it's common to only look for a few splits per dimension. If we don't specify this, the function will try every possible split across each dimension.
```{r}
# With just a scalar, we will split at points equal across the quantile-distribution for each feature.
@ -53,7 +59,7 @@ plot(est_part)
```
```{r}
get_desc_df.estimated_partition(est_part)
get_desc_df(est_part)
```
We can manually estimate this simple model given the partition
@ -101,7 +107,7 @@ Now let's look at a case where there's hereogeneity across all three dimensions.
tau_3 = (as.integer(X[,1]>0.5)*2-1) + (as.integer(X[,2]>0.5)*2-1)*2 + (as.integer(X[,3]>0.5)*2-1)*3
y_3 = d*tau_3 + rnorm(N, 0, err_sd)
est_part_3 = fit_estimate_partition(y_3, X, d, breaks_per_dim=5, partition_i=4)
get_desc_df.estimated_partition(est_part_3)
get_desc_df(est_part_3)
```
One benefit of grid-based partitions is that you can view easily view 2D slices of full heterogeneity space.
@ -137,7 +143,7 @@ tau2 = as.integer(X[,2]>0.5)*2-1
y2_yM = d*tau2 + rnorm(N, 0, err_sd)
y_yM = cbind(y, y2_yM)
est_part_yM = fit_estimate_partition(y_yM, X, d, breaks_per_dim=5, partition_i = 3)
get_desc_df.estimated_partition(est_part_yM)
get_desc_df(est_part_yM)
```
2) Multiple treatments, but same sample (single outcome)
@ -146,7 +152,7 @@ d2 = rbinom(N, 1, 0.5)
d_dM = cbind(d, d2)
y_dM = d*tau + d2*tau2 + rnorm(N, 0, err_sd)
est_part_dM = fit_estimate_partition(y_dM, X, d_dM, breaks_per_dim=5, partition_i = 3)
get_desc_df.estimated_partition(est_part_dM)
get_desc_df(est_part_dM)
```
3) Multiple separate samples, each having a single outcome and treatment
@ -156,14 +162,14 @@ y_MM = list(y, y2_MM)
d_MM = list(d, d2)
X_MM = list(X, X)
est_part_MM = fit_estimate_partition(y_MM, X_MM, d_MM, breaks_per_dim=5, partition_i = 3)
get_desc_df.estimated_partition(est_part_MM)
get_desc_df(est_part_MM)
```
# Mean-outcome prediction
```{r}
alpha = as.integer(X[,1]>0.5)*2-1 #true average outcome effect (just heterogeneous across X1)
y_y = alpha + rnorm(N, 0, err_sd) #outcome
est_part_y = fit_estimate_partition(y_y, X, breaks_per_dim=5, partition_i=2)
get_desc_df.estimated_partition(est_part_y)
get_desc_df(est_part_y)
```
# Minor things to add