зеркало из https://github.com/microsoft/SparseSC.git
Rename C->N0 and N->N1
This commit is contained in:
Родитель
6701aa7c2c
Коммит
6e9d313ccb
|
@ -1,4 +1,3 @@
|
||||||
|
|
||||||
from RidgeSC.fit_fold import fold_v_matrix, fold_score
|
from RidgeSC.fit_fold import fold_v_matrix, fold_score
|
||||||
from RidgeSC.fit_loo import loo_v_matrix, loo_score, loo_weights
|
from RidgeSC.fit_loo import loo_v_matrix, loo_score, loo_weights
|
||||||
from RidgeSC.fit_ct import ct_v_matrix, ct_score
|
from RidgeSC.fit_ct import ct_v_matrix, ct_score
|
||||||
|
@ -409,19 +408,19 @@ def repeatfunc(func, times=None, *args):
|
||||||
return itertools.starmap(func, itertools.repeat(args))
|
return itertools.starmap(func, itertools.repeat(args))
|
||||||
return itertools.starmap(func, itertools.repeat(args, times))
|
return itertools.starmap(func, itertools.repeat(args, times))
|
||||||
|
|
||||||
def _gen_placebo_stats_from_diffs(N, effect_vec, std_effect_vec, joint_effect, joint_std_effect,
|
def _gen_placebo_stats_from_diffs(N1, effect_vec, std_effect_vec, joint_effect, joint_std_effect,
|
||||||
control_effect_vecs, control_std_effect_vecs, control_joint_effects, control_joint_std_effects,
|
control_effect_vecs, control_std_effect_vecs, control_joint_effects, control_joint_std_effects,
|
||||||
max_n_pl = 1000000, ret_pl = False, ret_CI=False, level=0.95):
|
max_n_pl = 1000000, ret_pl = False, ret_CI=False, level=0.95):
|
||||||
#ret_p1s=False
|
#ret_p1s=False
|
||||||
keep_pl = ret_pl or ret_CI
|
keep_pl = ret_pl or ret_CI
|
||||||
C = control_effect_vecs.shape[0]
|
N0 = control_effect_vecs.shape[0]
|
||||||
T1 = len(effect_vec)
|
T1 = len(effect_vec)
|
||||||
n_pl = _ncr(C, N)
|
n_pl = _ncr(N0, N1)
|
||||||
if (max_n_pl > 0 & n_pl > max_n_pl): #randomize
|
if (max_n_pl > 0 & n_pl > max_n_pl): #randomize
|
||||||
comb_iter = itertools.combinations(range(C), N)
|
comb_iter = itertools.combinations(range(N0), N1)
|
||||||
comb_len = max_n_pl
|
comb_len = max_n_pl
|
||||||
else:
|
else:
|
||||||
comb_iter = repeatfunc(random_combination, n_pl, range(C), N)
|
comb_iter = repeatfunc(random_combination, n_pl, range(N0), N1)
|
||||||
comb_len = n_pl
|
comb_len = n_pl
|
||||||
placebo_effect_vecs = None
|
placebo_effect_vecs = None
|
||||||
if keep_pl:
|
if keep_pl:
|
||||||
|
@ -483,13 +482,13 @@ def _gen_placebo_stats_from_diffs(N, effect_vec, std_effect_vec, joint_effect, j
|
||||||
|
|
||||||
def estimate_effects(X, Y_pre, Y_post, treated_units, max_n_pl = 1000000, ret_pl = False, ret_CI=False, level=0.95, **kwargs):
|
def estimate_effects(X, Y_pre, Y_post, treated_units, max_n_pl = 1000000, ret_pl = False, ret_CI=False, level=0.95, **kwargs):
|
||||||
#TODO: Cleanup returning placebo distribution (incl pre?)
|
#TODO: Cleanup returning placebo distribution (incl pre?)
|
||||||
N = len(treated_units)
|
N1 = len(treated_units)
|
||||||
X_and_Y_pre = np.hstack( ( X, Y_pre,) )
|
X_and_Y_pre = np.hstack( ( X, Y_pre,) )
|
||||||
C_N = X_and_Y_pre.shape[0]
|
N = X_and_Y_pre.shape[0]
|
||||||
#C = C_N - N
|
#N0 = N - N1
|
||||||
#T1 = Y_post.shape[1]
|
#T1 = Y_post.shape[1]
|
||||||
control_units = list(set(range(C_N)) - set(treated_units))
|
control_units = list(set(range(N)) - set(treated_units))
|
||||||
all_units = list(range(C_N))
|
all_units = list(range(N))
|
||||||
Y_post_c = Y_post[control_units, :]
|
Y_post_c = Y_post[control_units, :]
|
||||||
Y_post_tr = Y_post[treated_units, :]
|
Y_post_tr = Y_post[treated_units, :]
|
||||||
X_and_Y_pre_c = X_and_Y_pre[control_units, :]
|
X_and_Y_pre_c = X_and_Y_pre[control_units, :]
|
||||||
|
@ -537,7 +536,7 @@ def estimate_effects(X, Y_pre, Y_post, treated_units, max_n_pl = 1000000, ret_pl
|
||||||
joint_effect = np.mean(joint_effects)
|
joint_effect = np.mean(joint_effects)
|
||||||
joint_std_effect = np.mean(joint_effects / pre_tr_rmspes)
|
joint_std_effect = np.mean(joint_effects / pre_tr_rmspes)
|
||||||
|
|
||||||
return _gen_placebo_stats_from_diffs(N, effect_vec, std_effect_vec, joint_effect, joint_std_effect,
|
return _gen_placebo_stats_from_diffs(N1, effect_vec, std_effect_vec, joint_effect, joint_std_effect,
|
||||||
control_effect_vecs, control_std_effect_vecs, control_joint_effects, control_joint_std_effects,
|
control_effect_vecs, control_std_effect_vecs, control_joint_effects, control_joint_std_effects,
|
||||||
max_n_pl, ret_pl, ret_CI, level)
|
max_n_pl, ret_pl, ret_CI, level)
|
||||||
|
|
||||||
|
|
|
@ -72,7 +72,7 @@ def ct_v_matrix(X,
|
||||||
raise TypeError( "L2_PEN_W is not a number")
|
raise TypeError( "L2_PEN_W is not a number")
|
||||||
|
|
||||||
# CONSTANTS
|
# CONSTANTS
|
||||||
C, N, K = len(control_units), len(treated_units), X.shape[1]
|
N0, N1, K = len(control_units), len(treated_units), X.shape[1]
|
||||||
if start is None:
|
if start is None:
|
||||||
start = zeros(K) # formerly: .1 * ones(K)
|
start = zeros(K) # formerly: .1 * ones(K)
|
||||||
Y_treated = Y[treated_units,:]
|
Y_treated = Y[treated_units,:]
|
||||||
|
@ -106,7 +106,7 @@ def ct_v_matrix(X,
|
||||||
weights, A, _, AinvB = _weights(dv)
|
weights, A, _, AinvB = _weights(dv)
|
||||||
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
|
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
|
||||||
dGamma0_dV_term2 = zeros(K)
|
dGamma0_dV_term2 = zeros(K)
|
||||||
dPI_dV = zeros((C, N))
|
dPI_dV = zeros((N0, N1))
|
||||||
#Ai = A.I
|
#Ai = A.I
|
||||||
for k in range(K):
|
for k in range(K):
|
||||||
if verbose: # for large sample sizes, linalg.solve is a huge bottle neck,
|
if verbose: # for large sample sizes, linalg.solve is a huge bottle neck,
|
||||||
|
@ -121,12 +121,12 @@ def ct_v_matrix(X,
|
||||||
|
|
||||||
L2_PEN_W_mat = 2 * L2_PEN_W * diag(ones(X_control.shape[0]))
|
L2_PEN_W_mat = 2 * L2_PEN_W * diag(ones(X_control.shape[0]))
|
||||||
def _weights(V):
|
def _weights(V):
|
||||||
weights = zeros((C, N))
|
weights = zeros((N0, N1))
|
||||||
A = X_control.dot(2*V).dot(X_control.T) + L2_PEN_W_mat # 5
|
A = X_control.dot(2*V).dot(X_control.T) + L2_PEN_W_mat # 5
|
||||||
B = X_treated.dot(2*V).dot(X_control.T).T # 6
|
B = X_treated.dot(2*V).dot(X_control.T).T # 6
|
||||||
b = linalg.solve(A,B)
|
b = linalg.solve(A,B)
|
||||||
if intercept:
|
if intercept:
|
||||||
weights = b + 1/C
|
weights = b + 1/N0
|
||||||
else:
|
else:
|
||||||
weights = b
|
weights = b
|
||||||
return weights, A, B,b
|
return weights, A, B,b
|
||||||
|
@ -154,7 +154,7 @@ def ct_v_matrix(X,
|
||||||
# _do_gradient_check()
|
# _do_gradient_check()
|
||||||
if intercept:
|
if intercept:
|
||||||
# not above, b/c Y_treated was already offset at the start
|
# not above, b/c Y_treated was already offset at the start
|
||||||
weights += 1/C
|
weights += 1/N0
|
||||||
return weights, v_mat, ts_score, ts_loss, L2_PEN_W, opt
|
return weights, v_mat, ts_score, ts_loss, L2_PEN_W, opt
|
||||||
|
|
||||||
def ct_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, intercept = True):
|
def ct_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, intercept = True):
|
||||||
|
@ -166,7 +166,7 @@ def ct_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, inter
|
||||||
if control_units is None:
|
if control_units is None:
|
||||||
control_units = list(set(range(X.shape[0])) - set(treated_units))
|
control_units = list(set(range(X.shape[0])) - set(treated_units))
|
||||||
|
|
||||||
C = len(control_units)
|
N0 = len(control_units)
|
||||||
X_treated = X[treated_units,:]
|
X_treated = X[treated_units,:]
|
||||||
X_control = X[control_units,:]
|
X_control = X[control_units,:]
|
||||||
|
|
||||||
|
@ -175,7 +175,7 @@ def ct_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, inter
|
||||||
|
|
||||||
weights = linalg.solve(A,B)
|
weights = linalg.solve(A,B)
|
||||||
if intercept:
|
if intercept:
|
||||||
weights += 1/C
|
weights += 1/N0
|
||||||
return weights.T
|
return weights.T
|
||||||
|
|
||||||
def ct_score(Y, X, V, L2_PEN_W, LAMBDA = 0, treated_units = None, control_units = None,**kwargs):
|
def ct_score(Y, X, V, L2_PEN_W, LAMBDA = 0, treated_units = None, control_units = None,**kwargs):
|
||||||
|
|
|
@ -105,11 +105,11 @@ def fold_v_matrix(X,
|
||||||
(i, len(treated_units),len(split[0]), len(split[1]), ))
|
(i, len(treated_units),len(split[0]), len(split[1]), ))
|
||||||
|
|
||||||
# CONSTANTS
|
# CONSTANTS
|
||||||
C, N, K = len(control_units), len(treated_units), X.shape[1]
|
N0, N1, K = len(control_units), len(treated_units), X.shape[1]
|
||||||
if start is None:
|
if start is None:
|
||||||
start = zeros(K) # formerly: .1 * ones(K)
|
start = zeros(K) # formerly: .1 * ones(K)
|
||||||
assert N > 0, "No control units"
|
assert N1 > 0, "No control units"
|
||||||
assert C > 0, "No treated units"
|
assert N0 > 0, "No treated units"
|
||||||
assert K > 0, "variables to fit (X.shape[1] == 0)"
|
assert K > 0, "variables to fit (X.shape[1] == 0)"
|
||||||
|
|
||||||
# CREATE THE INDEX THAT INDICATES THE ELIGIBLE CONTROLS FOR EACH TREATED UNIT
|
# CREATE THE INDEX THAT INDICATES THE ELIGIBLE CONTROLS FOR EACH TREATED UNIT
|
||||||
|
@ -131,9 +131,9 @@ def fold_v_matrix(X,
|
||||||
Y_control = Y[control_units,:]
|
Y_control = Y[control_units,:]
|
||||||
|
|
||||||
# INITIALIZE PARTIAL DERIVATIVES
|
# INITIALIZE PARTIAL DERIVATIVES
|
||||||
dA_dV_ki = [ [None,] *N for i in range(K)]
|
dA_dV_ki = [ [None,] *N1 for i in range(K)]
|
||||||
dB_dV_ki = [ [None,] *N for i in range(K)]
|
dB_dV_ki = [ [None,] *N1 for i in range(K)]
|
||||||
b_i = [None,] *N
|
b_i = [None,] *N1
|
||||||
for i, k in itertools.product(range(len(splits)), range(K)): # TREATED unit i, moment k
|
for i, k in itertools.product(range(len(splits)), range(K)): # TREATED unit i, moment k
|
||||||
_, test = splits[i]
|
_, test = splits[i]
|
||||||
Xc = X[in_controls[i], : ]
|
Xc = X[in_controls[i], : ]
|
||||||
|
@ -160,7 +160,7 @@ def fold_v_matrix(X,
|
||||||
weights, A, _ = _weights(dv)
|
weights, A, _ = _weights(dv)
|
||||||
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
|
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
|
||||||
dGamma0_dV_term2 = zeros(K)
|
dGamma0_dV_term2 = zeros(K)
|
||||||
dPI_dV = zeros((C, N))
|
dPI_dV = zeros((N0, N1))
|
||||||
for k in range(K):
|
for k in range(K):
|
||||||
if verbose: # for large sample sizes, linalg.solve is a huge bottle neck,
|
if verbose: # for large sample sizes, linalg.solve is a huge bottle neck,
|
||||||
print("Calculating gradient, for moment %s of %s" % (k ,K,))
|
print("Calculating gradient, for moment %s of %s" % (k ,K,))
|
||||||
|
@ -176,7 +176,7 @@ def fold_v_matrix(X,
|
||||||
return LAMBDA - 2 * dGamma0_dV_term2
|
return LAMBDA - 2 * dGamma0_dV_term2
|
||||||
|
|
||||||
def _weights(V):
|
def _weights(V):
|
||||||
weights = zeros((C, N))
|
weights = zeros((N0, N1))
|
||||||
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
|
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
|
||||||
B = X.dot(V + V.T).dot(X.T).T # 6
|
B = X.dot(V + V.T).dot(X.T).T # 6
|
||||||
for i, (_,test) in enumerate(splits):
|
for i, (_,test) in enumerate(splits):
|
||||||
|
@ -235,7 +235,7 @@ def fold_weights(X,
|
||||||
control_units = list(set(range(X.shape[0])) - set(treated_units))
|
control_units = list(set(range(X.shape[0])) - set(treated_units))
|
||||||
control_units = np.array(control_units)
|
control_units = np.array(control_units)
|
||||||
treated_units = np.array(treated_units)
|
treated_units = np.array(treated_units)
|
||||||
[C, N] = [len(control_units), len(treated_units)]
|
[N0, N1] = [len(control_units), len(treated_units)]
|
||||||
|
|
||||||
splits = grad_splits # for readability...
|
splits = grad_splits # for readability...
|
||||||
try:
|
try:
|
||||||
|
@ -249,7 +249,7 @@ def fold_weights(X,
|
||||||
in_controls = [list(set(control_units) - set(treated_units[test])) for _,test in splits]
|
in_controls = [list(set(control_units) - set(treated_units[test])) for _,test in splits]
|
||||||
in_controls2 = [np.ix_(i,i) for i in in_controls] # this is a much faster alternative to A[:,index][index,:]
|
in_controls2 = [np.ix_(i,i) for i in in_controls] # this is a much faster alternative to A[:,index][index,:]
|
||||||
|
|
||||||
# index of the controls relative to the rows of the outgoing C x N matrix of weights
|
# index of the controls relative to the rows of the outgoing N0 x N1 matrix of weights
|
||||||
ctrl_rng = np.arange(len(control_units))
|
ctrl_rng = np.arange(len(control_units))
|
||||||
out_controls = [ctrl_rng[np.logical_not(np.isin(control_units, treated_units[test]))] for _,test in splits]
|
out_controls = [ctrl_rng[np.logical_not(np.isin(control_units, treated_units[test]))] for _,test in splits]
|
||||||
# this is non-trivial when there control units are also being predicted:
|
# this is non-trivial when there control units are also being predicted:
|
||||||
|
@ -258,7 +258,7 @@ def fold_weights(X,
|
||||||
# constants for indexing
|
# constants for indexing
|
||||||
# X_control = X[control_units,:]
|
# X_control = X[control_units,:]
|
||||||
# X_treat = X[treated_units,:]
|
# X_treat = X[treated_units,:]
|
||||||
weights = zeros((C, N))
|
weights = zeros((N0, N1))
|
||||||
|
|
||||||
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
|
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
|
||||||
B = X.dot(V + V.T).dot(X.T).T # 6
|
B = X.dot(V + V.T).dot(X.T).T # 6
|
||||||
|
|
|
@ -7,19 +7,19 @@ import warnings
|
||||||
from RidgeSC.optimizers.cd_line_search import cdl_search
|
from RidgeSC.optimizers.cd_line_search import cdl_search
|
||||||
warnings.filterwarnings('ignore')
|
warnings.filterwarnings('ignore')
|
||||||
|
|
||||||
def complete_treated_control_list(C_N, treated_units = None, control_units = None):
|
def complete_treated_control_list(N, treated_units = None, control_units = None):
|
||||||
if treated_units is None:
|
if treated_units is None:
|
||||||
if control_units is None:
|
if control_units is None:
|
||||||
# both not provided, include all samples as both treat and control unit.
|
# both not provided, include all samples as both treat and control unit.
|
||||||
control_units = list(range(C_N))
|
control_units = list(range(N))
|
||||||
treated_units = control_units
|
treated_units = control_units
|
||||||
else:
|
else:
|
||||||
# Set the treated units to the not-control units
|
# Set the treated units to the not-control units
|
||||||
treated_units = list(set(range(C_N)) - set(control_units))
|
treated_units = list(set(range(N)) - set(control_units))
|
||||||
else:
|
else:
|
||||||
if control_units is None:
|
if control_units is None:
|
||||||
# Set the control units to the not-treated units
|
# Set the control units to the not-treated units
|
||||||
control_units = list(set(range(C_N)) - set(treated_units))
|
control_units = list(set(range(N)) - set(treated_units))
|
||||||
return(treated_units, control_units)
|
return(treated_units, control_units)
|
||||||
|
|
||||||
def loo_v_matrix(X,
|
def loo_v_matrix(X,
|
||||||
|
@ -90,11 +90,11 @@ def loo_v_matrix(X,
|
||||||
assert not non_neg_weights, "Bounds not implemented"
|
assert not non_neg_weights, "Bounds not implemented"
|
||||||
|
|
||||||
# CONSTANTS
|
# CONSTANTS
|
||||||
C, N, K = len(control_units), len(treated_units), X.shape[1]
|
N0, N1, K = len(control_units), len(treated_units), X.shape[1]
|
||||||
if start is None:
|
if start is None:
|
||||||
start = zeros(K) # formerly: .1 * ones(K)
|
start = zeros(K) # formerly: .1 * ones(K)
|
||||||
assert N > 0, "No control units"
|
assert N1 > 0, "No control units"
|
||||||
assert C > 0, "No treated units"
|
assert N0 > 0, "No treated units"
|
||||||
assert K > 0, "variables to fit (X.shape[1] == 0)"
|
assert K > 0, "variables to fit (X.shape[1] == 0)"
|
||||||
|
|
||||||
# CREATE THE INDEX THAT INDICATES THE ELIGIBLE CONTROLS FOR EACH TREATED UNIT
|
# CREATE THE INDEX THAT INDICATES THE ELIGIBLE CONTROLS FOR EACH TREATED UNIT
|
||||||
|
@ -117,10 +117,10 @@ def loo_v_matrix(X,
|
||||||
# only used by step-down method: X_control = X[control_units,:]
|
# only used by step-down method: X_control = X[control_units,:]
|
||||||
|
|
||||||
# INITIALIZE PARTIAL DERIVATIVES
|
# INITIALIZE PARTIAL DERIVATIVES
|
||||||
dA_dV_ki = [ [None,] *N for i in range(K)]
|
dA_dV_ki = [ [None,] *N1 for i in range(K)]
|
||||||
dB_dV_ki = [ [None,] *N for i in range(K)]
|
dB_dV_ki = [ [None,] *N1 for i in range(K)]
|
||||||
b_i = [None,] *N
|
b_i = [None,] *N1
|
||||||
for i, k in itertools.product(range(N), range(K)): # TREATED unit i, moment k
|
for i, k in itertools.product(range(N1), range(K)): # TREATED unit i, moment k
|
||||||
Xc = X[in_controls[i], : ]
|
Xc = X[in_controls[i], : ]
|
||||||
Xt = X[treated_units[i], : ]
|
Xt = X[treated_units[i], : ]
|
||||||
dA_dV_ki [k][i] = Xc[:, k ].dot(Xc[:, k ].T) + Xc[:, k ].dot(Xc[:, k ].T) # 8
|
dA_dV_ki [k][i] = Xc[:, k ].dot(Xc[:, k ].T) + Xc[:, k ].dot(Xc[:, k ].T) # 8
|
||||||
|
@ -148,7 +148,7 @@ def loo_v_matrix(X,
|
||||||
weights, A, _ = _weights(dv)
|
weights, A, _ = _weights(dv)
|
||||||
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
|
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
|
||||||
dGamma0_dV_term2 = zeros(K)
|
dGamma0_dV_term2 = zeros(K)
|
||||||
dPI_dV = zeros((C, N))
|
dPI_dV = zeros((N0, N1))
|
||||||
# if solve_method == "step-down": Ai_cache = all_subinverses(A)
|
# if solve_method == "step-down": Ai_cache = all_subinverses(A)
|
||||||
for k in range(K):
|
for k in range(K):
|
||||||
if verbose: # for large sample sizes, linalg.solve is a huge bottle neck,
|
if verbose: # for large sample sizes, linalg.solve is a huge bottle neck,
|
||||||
|
@ -171,7 +171,7 @@ def loo_v_matrix(X,
|
||||||
return LAMBDA - 2 * dGamma0_dV_term2
|
return LAMBDA - 2 * dGamma0_dV_term2
|
||||||
|
|
||||||
def _weights(V):
|
def _weights(V):
|
||||||
weights = zeros((C, N))
|
weights = zeros((N0, N1))
|
||||||
if solve_method == "step-down":
|
if solve_method == "step-down":
|
||||||
raise NotImplementedError("The solve_method 'step-down' is currently not implemented")
|
raise NotImplementedError("The solve_method 'step-down' is currently not implemented")
|
||||||
# A = X_control.dot(V + V.T).dot(X_control.T) + 2 * L2_PEN_W * diag(ones(X_control.shape[0])) # 5
|
# A = X_control.dot(V + V.T).dot(X_control.T) + 2 * L2_PEN_W * diag(ones(X_control.shape[0])) # 5
|
||||||
|
@ -227,14 +227,14 @@ def loo_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, inte
|
||||||
treated_units, control_units = complete_treated_control_list(X.shape[0], treated_units, control_units)
|
treated_units, control_units = complete_treated_control_list(X.shape[0], treated_units, control_units)
|
||||||
control_units = np.array(control_units)
|
control_units = np.array(control_units)
|
||||||
treated_units = np.array(treated_units)
|
treated_units = np.array(treated_units)
|
||||||
[C, N] = [len(control_units), len(treated_units)]
|
[N0, N1] = [len(control_units), len(treated_units)]
|
||||||
|
|
||||||
|
|
||||||
# index with positions of the controls relative to the incoming data
|
# index with positions of the controls relative to the incoming data
|
||||||
in_controls = [list(set(control_units) - set([trt_unit])) for trt_unit in treated_units]
|
in_controls = [list(set(control_units) - set([trt_unit])) for trt_unit in treated_units]
|
||||||
in_controls2 = [np.ix_(i,i) for i in in_controls] # this is a much faster alternative to A[:,index][index,:]
|
in_controls2 = [np.ix_(i,i) for i in in_controls] # this is a much faster alternative to A[:,index][index,:]
|
||||||
|
|
||||||
# index of the controls relative to the rows of the outgoing C x N matrix of weights
|
# index of the controls relative to the rows of the outgoing N0 x N1 matrix of weights
|
||||||
ctrl_rng = np.arange(len(control_units))
|
ctrl_rng = np.arange(len(control_units))
|
||||||
out_controls = [ctrl_rng[control_units != trt_unit] for trt_unit in treated_units]
|
out_controls = [ctrl_rng[control_units != trt_unit] for trt_unit in treated_units]
|
||||||
# this is non-trivial when there control units are also being predicted:
|
# this is non-trivial when there control units are also being predicted:
|
||||||
|
@ -243,7 +243,7 @@ def loo_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, inte
|
||||||
# constants for indexing
|
# constants for indexing
|
||||||
# > only used by the step-down method (currently not implemented) X_control = X[control_units,:]
|
# > only used by the step-down method (currently not implemented) X_control = X[control_units,:]
|
||||||
# > only used by the step-down method (currently not implemented) X_treat = X[treated_units,:]
|
# > only used by the step-down method (currently not implemented) X_treat = X[treated_units,:]
|
||||||
weights = zeros((C, N))
|
weights = zeros((N0, N1))
|
||||||
|
|
||||||
if solve_method == "step-down":
|
if solve_method == "step-down":
|
||||||
raise NotImplementedError("The solve_method 'step-down' is currently not implemented")
|
raise NotImplementedError("The solve_method 'step-down' is currently not implemented")
|
||||||
|
|
|
@ -3,14 +3,14 @@ import numpy as np
|
||||||
import random
|
import random
|
||||||
import RidgeSC as SC
|
import RidgeSC as SC
|
||||||
|
|
||||||
def ge_dgp(C,N,T0,T1,K,S,R,groups,group_scale,beta_scale,confounders_scale,model= "full"):
|
def ge_dgp(N0,N1,T0,T1,K,S,R,groups,group_scale,beta_scale,confounders_scale,model= "full"):
|
||||||
"""
|
"""
|
||||||
From example-code.py
|
From example-code.py
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# COVARIATE EFFECTS
|
# COVARIATE EFFECTS
|
||||||
X_control = np.matrix(np.random.normal(0,1,((C)*groups, K+S+R)))
|
X_control = np.matrix(np.random.normal(0,1,((N0)*groups, K+S+R)))
|
||||||
X_treated = np.matrix(np.random.normal(0,1,((N)*groups, K+S+R)))
|
X_treated = np.matrix(np.random.normal(0,1,((N1)*groups, K+S+R)))
|
||||||
|
|
||||||
# CAUSAL
|
# CAUSAL
|
||||||
b_cause = np.random.exponential(1,K)
|
b_cause = np.random.exponential(1,K)
|
||||||
|
@ -26,19 +26,19 @@ def ge_dgp(C,N,T0,T1,K,S,R,groups,group_scale,beta_scale,confounders_scale,model
|
||||||
# GROUP EFFECTS (hidden)
|
# GROUP EFFECTS (hidden)
|
||||||
|
|
||||||
Y_pre_group_effects = np.random.normal(0,group_scale,(groups,T0))
|
Y_pre_group_effects = np.random.normal(0,group_scale,(groups,T0))
|
||||||
Y_pre_ge_control = Y_pre_group_effects[np.repeat(np.arange(groups),C)]
|
Y_pre_ge_control = Y_pre_group_effects[np.repeat(np.arange(groups),N0)]
|
||||||
Y_pre_ge_treated = Y_pre_group_effects[np.repeat(np.arange(groups),N)]
|
Y_pre_ge_treated = Y_pre_group_effects[np.repeat(np.arange(groups),N1)]
|
||||||
|
|
||||||
Y_post_group_effects = np.random.normal(0,group_scale,(groups,T1))
|
Y_post_group_effects = np.random.normal(0,group_scale,(groups,T1))
|
||||||
Y_post_ge_control = Y_post_group_effects[np.repeat(np.arange(groups),C)]
|
Y_post_ge_control = Y_post_group_effects[np.repeat(np.arange(groups),N0)]
|
||||||
Y_post_ge_treated = Y_post_group_effects[np.repeat(np.arange(groups),N)]
|
Y_post_ge_treated = Y_post_group_effects[np.repeat(np.arange(groups),N1)]
|
||||||
|
|
||||||
# RANDOM ERRORS
|
# RANDOM ERRORS
|
||||||
Y_pre_err_control = np.matrix(np.random.random( ( C*groups, T0, ) ))
|
Y_pre_err_control = np.matrix(np.random.random( ( N0*groups, T0, ) ))
|
||||||
Y_pre_err_treated = np.matrix(np.random.random( ( N*groups, T0, ) ))
|
Y_pre_err_treated = np.matrix(np.random.random( ( N1*groups, T0, ) ))
|
||||||
|
|
||||||
Y_post_err_control = np.matrix(np.random.random( ( C*groups, T1, ) ))
|
Y_post_err_control = np.matrix(np.random.random( ( N0*groups, T1, ) ))
|
||||||
Y_post_err_treated = np.matrix(np.random.random( ( N*groups, T1, ) ))
|
Y_post_err_treated = np.matrix(np.random.random( ( N1*groups, T1, ) ))
|
||||||
|
|
||||||
# THE DATA GENERATING PROCESS
|
# THE DATA GENERATING PROCESS
|
||||||
|
|
||||||
|
@ -78,11 +78,11 @@ def ge_dgp(C,N,T0,T1,K,S,R,groups,group_scale,beta_scale,confounders_scale,model
|
||||||
return X_control, X_treated, Y_pre_control, Y_pre_treated, Y_post_control, Y_post_treated
|
return X_control, X_treated, Y_pre_control, Y_pre_treated, Y_post_control, Y_post_treated
|
||||||
|
|
||||||
|
|
||||||
def factor_dgp(C,N,T0,T1,K,R,F):
|
def factor_dgp(N0,N1,T0,T1,K,R,F):
|
||||||
|
|
||||||
# COVARIATE EFFECTS
|
# COVARIATE EFFECTS
|
||||||
X_control = np.matrix(np.random.normal(0,1,((C), K+R)))
|
X_control = np.matrix(np.random.normal(0,1,((N0), K+R)))
|
||||||
X_treated = np.matrix(np.random.normal(0,1,((N), K+R)))
|
X_treated = np.matrix(np.random.normal(0,1,((N1), K+R)))
|
||||||
|
|
||||||
b_cause = np.random.exponential(1,K)
|
b_cause = np.random.exponential(1,K)
|
||||||
b_cause *= b_cause.max()
|
b_cause *= b_cause.max()
|
||||||
|
@ -90,8 +90,8 @@ def factor_dgp(C,N,T0,T1,K,R,F):
|
||||||
beta = np.matrix(np.concatenate( ( b_cause , np.zeros(R)) ) ).T
|
beta = np.matrix(np.concatenate( ( b_cause , np.zeros(R)) ) ).T
|
||||||
|
|
||||||
# FACTORS
|
# FACTORS
|
||||||
Loadings_control = np.matrix(np.random.normal(0,1,((C), F)))
|
Loadings_control = np.matrix(np.random.normal(0,1,((N0), F)))
|
||||||
Loadings_treated = np.matrix(np.random.normal(0,1,((N), F)))
|
Loadings_treated = np.matrix(np.random.normal(0,1,((N1), F)))
|
||||||
|
|
||||||
Factors_pre = np.matrix(np.random.normal(0,1,((F), T0)))
|
Factors_pre = np.matrix(np.random.normal(0,1,((F), T0)))
|
||||||
Factors_post = np.matrix(np.random.normal(0,1,((F), T1)))
|
Factors_post = np.matrix(np.random.normal(0,1,((F), T1)))
|
||||||
|
@ -99,10 +99,10 @@ def factor_dgp(C,N,T0,T1,K,R,F):
|
||||||
|
|
||||||
|
|
||||||
# RANDOM ERRORS
|
# RANDOM ERRORS
|
||||||
Y_pre_err_control = np.matrix(np.random.random( ( C, T0, ) ))
|
Y_pre_err_control = np.matrix(np.random.random( ( N0, T0, ) ))
|
||||||
Y_pre_err_treated = np.matrix(np.random.random( ( N, T0, ) ))
|
Y_pre_err_treated = np.matrix(np.random.random( ( N1, T0, ) ))
|
||||||
Y_post_err_control = np.matrix(np.random.random( ( C, T1, ) ))
|
Y_post_err_control = np.matrix(np.random.random( ( N0, T1, ) ))
|
||||||
Y_post_err_treated = np.matrix(np.random.random( ( N, T1, ) ))
|
Y_post_err_treated = np.matrix(np.random.random( ( N1, T1, ) ))
|
||||||
|
|
||||||
# OUTCOMES
|
# OUTCOMES
|
||||||
Y_pre_control = X_control.dot(beta) + Loadings_control.dot(Factors_pre) + Y_pre_err_control
|
Y_pre_control = X_control.dot(beta) + Loadings_control.dot(Factors_pre) + Y_pre_err_control
|
||||||
|
@ -116,10 +116,10 @@ def factor_dgp(C,N,T0,T1,K,R,F):
|
||||||
|
|
||||||
class TestDGPs(unittest.TestCase):
|
class TestDGPs(unittest.TestCase):
|
||||||
def testFactorDGP(self):
|
def testFactorDGP(self):
|
||||||
N, C = 1,100
|
N1, N0 = 1,100
|
||||||
T0,T1 = 20, 10
|
T0,T1 = 20, 10
|
||||||
K, R, F = 5, 5, 5
|
K, R, F = 5, 5, 5
|
||||||
X_control, X_treated, Y_pre_control, Y_pre_treated, Y_post_control, Y_post_treated = factor_dgp(C,N,T0,T1,K,R,F)
|
X_control, X_treated, Y_pre_control, Y_pre_treated, Y_post_control, Y_post_treated = factor_dgp(N0,N1,T0,T1,K,R,F)
|
||||||
|
|
||||||
Y_post = np.vstack( (Y_post_treated,Y_post_control, ) )
|
Y_post = np.vstack( (Y_post_treated,Y_post_control, ) )
|
||||||
X = np.vstack( (X_treated, X_control, ) )
|
X = np.vstack( (X_treated, X_control, ) )
|
||||||
|
|
|
@ -16,7 +16,7 @@ import numpy as np
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
|
||||||
# def setup(C,N,T,K,g,gs,bs ): # controls (C), treated units (N) , time periods (T), Predictors (K), groups (g), g-scale, b-scale
|
# def setup(N0,N1,T,K,g,gs,bs ): # controls (N0), treated units (N1) , time periods (T), Predictors (K), groups (g), g-scale, b-scale
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@ if __name__ == "__main__":
|
||||||
np.random.seed(10101)
|
np.random.seed(10101)
|
||||||
|
|
||||||
# Controls (per group), Treated (per group), pre-intervention Time points, post intervention time-periods
|
# Controls (per group), Treated (per group), pre-intervention Time points, post intervention time-periods
|
||||||
C,N,T0,T1 = 5,5,4,4
|
N0,N1,T0,T1 = 5,5,4,4
|
||||||
# Causal Covariates, Confounders , Random Covariates
|
# Causal Covariates, Confounders , Random Covariates
|
||||||
K,S,R = 7,4,5
|
K,S,R = 7,4,5
|
||||||
# Number of Groups, Scale of the Group Effect, (groups make the time series correlated for reasons other than the observed covariates...)
|
# Number of Groups, Scale of the Group Effect, (groups make the time series correlated for reasons other than the observed covariates...)
|
||||||
|
@ -41,7 +41,7 @@ if __name__ == "__main__":
|
||||||
beta_scale,confounders_scale = 4,1
|
beta_scale,confounders_scale = 4,1
|
||||||
|
|
||||||
X_control, X_treated, Y_pre_control, Y_pre_treated, \
|
X_control, X_treated, Y_pre_control, Y_pre_treated, \
|
||||||
Y_post_control, Y_post_treated = ge_dgp(C,N,T0,T1,K,S,R,groups,group_scale,beta_scale,confounders_scale,model= "full")
|
Y_post_control, Y_post_treated = ge_dgp(N0,N1,T0,T1,K,S,R,groups,group_scale,beta_scale,confounders_scale,model= "full")
|
||||||
|
|
||||||
# JOIN THE TREAT AND CONTROL DATA
|
# JOIN THE TREAT AND CONTROL DATA
|
||||||
X = np.vstack( (X_control, X_treated,) )
|
X = np.vstack( (X_control, X_treated,) )
|
||||||
|
@ -53,8 +53,8 @@ if __name__ == "__main__":
|
||||||
X_and_Y_pre_control = np.hstack( ( X_control, Y_pre_control,) )
|
X_and_Y_pre_control = np.hstack( ( X_control, Y_pre_control,) )
|
||||||
|
|
||||||
# IDENTIFIERS FOR TREAT AND CONTROL UNITS
|
# IDENTIFIERS FOR TREAT AND CONTROL UNITS
|
||||||
# control_units = np.arange( C * groups )
|
# control_units = np.arange( N0 * groups )
|
||||||
# treated_units = np.arange( N * groups ) + C
|
# treated_units = np.arange( N1 * groups ) + N0
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
# ------------------------------------------------------------
|
||||||
# ------------------------------------------------------------
|
# ------------------------------------------------------------
|
||||||
|
|
Загрузка…
Ссылка в новой задаче