133 строки
3.6 KiB
R
133 строки
3.6 KiB
R
|
|
exp_data <- function(n_4=25, dim_D=1, err_sd=0.01){
|
|
#n_4 is n/4. We get this to make sure we have the same in each chunk
|
|
n = n_4*4
|
|
stopifnot(dim_D %in% c(0,1,2))
|
|
#dim_D in {0,1,2}
|
|
X1 = cbind(runif(n_4, 0, .5), runif(n_4, 0, .5))
|
|
X2 = cbind(runif(n_4, 0, .5), runif(n_4, .5, 1))
|
|
X3 = cbind(runif(n_4, .5, 1), runif(n_4, 0, .5))
|
|
X4 = cbind(runif(n_4, .5, 1), runif(n_4, .5, 1))
|
|
X = rbind(X1, X2, X3, X4)
|
|
|
|
alpha = ifelse(X[,1]>.5, ifelse(X[,2]>.5,.5,.8), ifelse(X[,2]>.5, 2, -2))
|
|
#alpha=0
|
|
y = alpha + rnorm(n,0,err_sd)
|
|
if(dim_D) {
|
|
if(dim_D==1) {
|
|
beta = ifelse(X[,1]>.5, ifelse(X[,2]>.5,-1,2), ifelse(X[,2]>.5, 4, 6))
|
|
#beta = ifelse(X[,1]>.5, -1,1)
|
|
d = matrix(rnorm(n), n, 1)
|
|
y = y + beta*d
|
|
colnames(d) <- "d"
|
|
}
|
|
else {
|
|
beta1 = ifelse(X[,1]>.5,-1, 4)
|
|
beta2 = ifelse(X[,2]>.5, 2, 6)
|
|
d = matrix(rnorm(2*n), n, 2)
|
|
y = y + beta1*d[,1] + beta2*d[,2]
|
|
colnames(d) = c("d1", "d2")
|
|
}
|
|
|
|
}
|
|
else {
|
|
d = NULL
|
|
}
|
|
y = as.matrix(y, nrow=n, ncol=1)
|
|
colnames(y) = "y"
|
|
colnames(X) = c("X1", "X2")
|
|
return(list(y=y, X=X, d=d))
|
|
}
|
|
|
|
mix_data_y <- function(n=200) {
|
|
X = data.frame(X1=c(rep(0, n/4), rep(1, n/4), rep(0, n/4), rep(1, n/4)),
|
|
X2=factor(c(rep("A", n/2), rep("B", n/2))))
|
|
alpha = c(rep(0, n/4), rep(1, n/4), rep(2, n/4), rep(3, n/4))
|
|
y = alpha
|
|
return(list(y=y, X=X))
|
|
}
|
|
|
|
mix_data_d <- function(n=200) {
|
|
X = data.frame(X1=c(rep(0, n/4), rep(1, n/4), rep(0, n/4), rep(1, n/4)),
|
|
X2=factor(c(rep("A", n/2), rep("B", n/4), rep("C", n/4))))
|
|
tau = c(rep(0, n/4), rep(1, n/4), rep(2, n/4), rep(3, n/4))
|
|
d = rep(0:1, n/2)
|
|
y = d*tau
|
|
return(list(y=y, X=X, d=d))
|
|
}
|
|
|
|
|
|
two_groups_data <- function(){
|
|
|
|
X = matrix(factor(c(rep("M", 100), rep("F", 100))),nrow = 200 ,ncol = 1)
|
|
y = c(rep(5, 100), rep(50, 100))
|
|
|
|
return(list(y=y, X=X))
|
|
}
|
|
|
|
two_groups_data_int <- function(){
|
|
|
|
X = matrix(c(rep(1, 100), rep(2, 100), rep(0, 200)) ,nrow = 200 ,ncol = 2)
|
|
y = c(rep(5, 100), rep(50, 100))
|
|
|
|
return(list(y=y, X=X))
|
|
}
|
|
|
|
AI_sim <- function(n=500, design=1, err_sd=0.01) {
|
|
w = rbinom(n, 1, 0.5)
|
|
K = c(2, 10, 20)[design]
|
|
X = matrix(rnorm(n*K), nrow=n, ncol=K)
|
|
X_I = X>0
|
|
if(design==1) {
|
|
eta = X %*% matrix(c(0.5, 1), ncol=1)
|
|
kappa = X %*% matrix(c(0.5, 0), ncol=1)
|
|
}
|
|
if(design==2) {
|
|
eta = X %*% matrix(c(rep(0.5, 2), rep(1, 4), rep(0, 4)), ncol=1)
|
|
kappa = (X*X_I) %*% matrix(c(rep(1,2), rep(0,8)), ncol=1)
|
|
}
|
|
if(design==3) {
|
|
eta = X %*% matrix(c(rep(0.5, 4), rep(1, 4), rep(0, 12)), ncol=1)
|
|
kappa = (X*X_I) %*% matrix(c(rep(1,4), rep(0,16)), ncol=1)
|
|
}
|
|
epsilon = rnorm(n, 0, err_sd)
|
|
Y = eta + 0.5*(2*w-1)*kappa + epsilon
|
|
return(list(y=Y, X=X, w=w, kappa=kappa))
|
|
}
|
|
|
|
XOR_sim <- function(n=500, const_kappa=TRUE, err_sd=0.01) {
|
|
w = rbinom(n, 1, 0.5)
|
|
K=2
|
|
X = matrix(rnorm(n*K), nrow=n, ncol=K)
|
|
eta = 0
|
|
if(const_kappa) {
|
|
mag = 1
|
|
}
|
|
else {
|
|
mag = abs(X[,1]) + abs(X[,2])
|
|
}
|
|
kappa = ((X[,1]>0 & X[,2]>0) | (X[,1]<0 & X[,2]<0))*mag
|
|
epsilon = rnorm(n, 0, err_sd)
|
|
y = eta + 0.5*(2*w-1)*kappa + epsilon
|
|
return(list(y=y, X=X, w=w, kappa=kappa))
|
|
}
|
|
|
|
XOR2_sim <- function(n=500, const_kappa=TRUE, err_sd=0.01, kappa3_ratio=0.1) {
|
|
#This one adds a third dimension with mild heterogeneity so algos get stuck on that
|
|
w = rbinom(n, 1, 0.5)
|
|
K=3
|
|
X = matrix(rnorm(n*K), nrow=n, ncol=K)
|
|
eta = 0
|
|
if(const_kappa) {
|
|
mag = 1
|
|
}
|
|
else {
|
|
mag = abs(X[,1]) + abs(X[,2])
|
|
}
|
|
kappa = ((X[,1]>0 & X[,2]>0) | (X[,1]<0 & X[,2]<0))*mag + kappa3_ratio*X[,3]
|
|
epsilon = rnorm(n, 0, err_sd)
|
|
#y = eta + 0.5*(2*w-1)*kappa + epsilon
|
|
y = eta + w*kappa + epsilon
|
|
return(list(y=y, X=X, w=w, kappa=kappa))
|
|
}
|