This commit is contained in:
= 2018-05-21 15:42:54 -07:00
Родитель 5fb3503f4e
Коммит 93588155bf
5 изменённых файлов: 217 добавлений и 156 удалений

Просмотреть файл

@ -13,3 +13,19 @@ Scipy 1.0.1 )
For now, refer to the example code in `./example-code.py` for usage
details.
### Performance Notes
For larger sample sizes (number of controls), almost all of the running
time is spent calculating `np.linalg.solve(A,B)` where A is a `C x C`
matrix and `B` is a `C x T` matrix, where `C` is the number of control
units and `T` is the number of treated units. Because the LAPACK library
is already parallelized, passing `parallel = true` to the `CV_score`
function (which runs each fold in a separate sub-process) will actually
tend to increase the running time.
This is especially true for the leave-one-out gradient descent using only
controls as `np.linalg.solve(A,B)` will be called twice for each control
unit. With larger sample sizes, it is more efficient to pass a value to
`grad_splits`, in order to use k-fold gradient descent, in which case
`np.linalg.solve(A,B)` will be called twice for each split.

Просмотреть файл

@ -1,8 +1,10 @@
from fit_fold import fold_v_matrix#, fold_score
from fit_loo import loo_v_matrix, loo_score
from fit_ct import ct_v_matrix, ct_score
from optimizers.cd_line_search import cdl_search
import numpy as np
import itertools
from concurrent import futures
def score_train_test(X,
@ -11,7 +13,8 @@ def score_train_test(X,
test,
X_treat=None,
Y_treat=None,
FoldNumber=None, # for consistency with score_train_test_sorted_lambdas()
FoldNumber=None, # For consistency with score_train_test_sorted_lambdas()
grad_splits=None, # If present, use k fold gradient descent. See fold_v_matrix for details
**kwargs):
""" presents a unified api for ct_v_matrix and loo_v_matrix
and returns the v_mat, l2_pen_w (possibly calculated, possibly a parameter), and the score
@ -59,14 +62,27 @@ def score_train_test(X,
else:
">> K-fold validation on the only control units; assuming that Y contains post-intervention outcomes"
# FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
# note that the weights, score, and loss function value returned here are for the in-sample predictions
weights, v_mat, ts_score, loss, l2_pen_w, _ = \
loo_v_matrix(X = X[train, :],
Y = Y[train, :],
# treated_units = [X.shape[0] + i for i in range(len(train))],
method = cdl_search,
**kwargs)
if grad_splits is not None:
# FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
# note that the weights, score, and loss function value returned here are for the in-sample predictions
weights, v_mat, ts_score, loss, l2_pen_w, _ = \
fold_v_matrix(X = X[train, :],
Y = Y[train, :],
# treated_units = [X.shape[0] + i for i in range(len(train))],
method = cdl_search,
**kwargs)
else:
# FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
# note that the weights, score, and loss function value returned here are for the in-sample predictions
weights, v_mat, ts_score, loss, l2_pen_w, _ = \
loo_v_matrix(X = X[train, :],
Y = Y[train, :],
# treated_units = [X.shape[0] + i for i in range(len(train))],
method = cdl_search,
**kwargs)
# GET THE OUT-OF-SAMPLE PREDICTION ERROR
s = loo_score(X = X, Y = Y,
@ -149,17 +165,20 @@ def CV_score(X,Y,LAMBDA,X_treat=None,Y_treat=None,splits=5,sub_splits=None,quiet
if X_treat.shape[0] != Y_treat.shape[0]:
raise ValueError("X_treat and Y_treat have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
# MESSAGING
if not quiet:
print("K-fold validation with %s control and %s treated units %s predictors and %s outcomes, holding out one fold among Treated units; Assumes that `Y` and `Y_treat` are pre-intervention outcomes" %
(X.shape[0] , X_treat.shape[0],X.shape[1],Y.shape[1],))
if isinstance(splits, (int)) or np.issubdtype(splits, np.integer):
try:
iter(splits)
except TypeError:
from sklearn.model_selection import KFold
splits = KFold(splits).split(np.arange(X_treat.shape[0]))
train_test_splits = [ x for x in splits ]
n_splits = len(train_test_splits)
# MESSAGING
if not quiet:
print("%s-fold validation with %s control and %s treated units %s predictors and %s outcomes, holding out one fold among Treated units; Assumes that `Y` and `Y_treat` are pre-intervention outcomes" %
(n_splits, X.shape[0] , X_treat.shape[0],X.shape[1],Y.shape[1],))
if parallel:
@ -209,17 +228,20 @@ def CV_score(X,Y,LAMBDA,X_treat=None,Y_treat=None,splits=5,sub_splits=None,quiet
else: # X_treat *is* None
# MESSAGING
if not quiet:
print("Leave-one-out Cross Validation with %s control units, %s predictors and %s outcomes; Y may contain post-intervention outcomes" %
(X.shape[0],X.shape[1],Y.shape[1],) )
if isinstance(splits, (int)) or np.issubdtype(splits, np.integer):
try:
iter(splits)
except TypeError:
from sklearn.model_selection import KFold
splits = KFold(splits).split(np.arange(X.shape[0]))
train_test_splits = [ x for x in splits ]
n_splits = len(train_test_splits)
# MESSAGING
if not quiet:
print("%s-fold Cross Validation with %s control units, %s predictors and %s outcomes; Y may contain post-intervention outcomes" %
(n_splits, X.shape[0],X.shape[1],Y.shape[1],) )
if parallel:
if max_workers is None:

Просмотреть файл

@ -20,8 +20,12 @@ import random
if __name__ == "__main__":
# ------------------------------------------------------------
# CONTROL PARAMETERS
# ------------------------------------------------------------
# SECTION 1: GENERATE SOME TOY DATA
# ------------------------------------------------------------
# ------------------------------------------------------------
# CONTROL PARAMETERS
random.seed(12345)
np.random.seed(10101)
@ -36,9 +40,6 @@ if __name__ == "__main__":
beta_scale,confounders_scale = 4,1
# ------------------------------------------------------------
# SIMULATE SOME DATA
# ------------------------------------------------------------
# COVARIATE EFFECTS
X_control = np.matrix(np.random.normal(0,1,((C)*groups, K+S+R)))
@ -120,9 +121,11 @@ if __name__ == "__main__":
# control_units = np.arange( C * groups )
# treated_units = np.arange( N * groups ) + C
# ------------------------------------------------------------
# ------------------------------------------------------------
# find default penalties
# ------------------------------------------------------------
# ------------------------------------------------------------
# get starting point for the L2 penalty
L2_pen_start_ct = SC.L2_pen_guestimate(X_control)
@ -130,8 +133,11 @@ if __name__ == "__main__":
# get the maximum value for the L1 Penalty parameter conditional on the guestimate for the L2 penalty
L1_max_ct = SC.get_max_lambda(X_control,Y_pre_control,X_treat=X_treated,Y_treat=Y_pre_treated)
L1_max_loo = SC.get_max_lambda(X_and_Y_pre,Y_post)
print("Max L1")
if False:
L1_max_loo = SC.get_max_lambda(X_and_Y_pre[np.arange(100)],Y_post[np.arange(100)])
print("Max L1 loo %s " % L1_max_loo)
else:
L1_max_loo = np.float(147975295.9121998)
if False:
"Demonstrate relations between the L1 and L2 penalties"
@ -182,6 +188,7 @@ if __name__ == "__main__":
# ------------------------------------------------------------
if False:
print("starting grid scoring for treat / control scenario", grid*L1_max_ct)
grid_scores_ct = SC.CV_score(
X = X_control,
@ -208,8 +215,36 @@ if __name__ == "__main__":
best_L1_penalty_ct = (grid * L1_max_ct)[np.argmin(grid_scores_ct)]
if False:
print("Starting grid scoring for Controls Only scenario with 5-fold gradient descent", grid*L1_max_ct)
grid_scores_loo = SC.CV_score(
X = X_and_Y_pre, # limit the amount of time...
Y = Y_post , # limit the amount of time...
# this is what enables the k-fold gradient descent
grad_splits = 5,
# L1 Penalty. if LAMBDA is a single value (value), we get a single score, If it's an array of values, we get an array of scores.
LAMBDA = grid * L1_max_loo,
# L2 Penalty (float)
L2_PEN_W = L2_pen_start_loo,
# CACHE THE V MATRIX BETWEEN LAMBDA PARAMETERS (generally faster, but path dependent)
#cache = True, # False by Default
# Run each of the Cross-validation folds in parallel? Often slower
# for large sample sizes because numpy.linalg.solve() already runs
# in parallel for large matrices
parallel=False,
# ANNOUNCE COMPLETION OF EACH ITERATION
progress = True)
if False:
# even with smaller data, this takes a while.
print("starting grid scoring for Controls Only scenario", grid*L1_max_ct)
print("Starting grid scoring for Controls Only scenario with leave-one-out gradient descent", grid*L1_max_ct)
grid_scores_loo = SC.CV_score(
X = X_and_Y_pre [np.arange(100)], # limit the amount of time...
Y = Y_post [np.arange(100)], # limit the amount of time...
@ -295,7 +330,6 @@ if __name__ == "__main__":
import pdb; pdb.set_trace()
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# Optimization of the L1 and L2 parameters together (second order)
@ -306,9 +340,12 @@ if __name__ == "__main__":
import time
# -----------------------------------------------------------------
# Optimization of the L2 and L1 Penalties Simultaneously, keeping their product constant
# Optimization of the L2 and L1 Penalties Simultaneously, keeping their
# product constant. Heuristically, this has been most efficient means of
# optimizing the L2 Parameter.
# -----------------------------------------------------------------
# build the objective function to be minimized
# cache for L2_obj_func

Просмотреть файл

@ -20,8 +20,7 @@ def fold_v_matrix(X,
method = cdl_search,
intercept = True,
max_lambda = False, # this is terrible at least without documentation...
solve_method = "standard",
splits = 5,
grad_splits = 5,
**kwargs):
'''
Computes and sets the optimal v_matrix for the given moments and
@ -33,11 +32,18 @@ def fold_v_matrix(X,
:param treated_units: a list containing the position (rows) of the treated units within X and Y
:param control_units: a list containing the position (rows) of the control units within X and Y
:param start: initial values for the diagonals of the tensor matrix
:param L2_PEN_W: L2 penalty on the magnitude of the deviance of the weight vector from null. Optional.
:param method: The name of a method to be used by scipy.optimize.minimize, or a callable with the same API as scipy.optimize.minimize
:param intercept: If True, weights are penalized toward the 1 / the number of controls, else weights are penalized toward zero
:;aram max_lambda: if True, the return value is the maximum L1 penalty for which at least one element of the tensor matrix is non-zero
:;aram solve_method: Method for solving A.I.dot(B). Either "standard" or "step-down".
:param L2_PEN_W: L2 penalty on the magnitude of the deviance of the weight
vector from null. Optional.
:param method: The name of a method to be used by scipy.optimize.minimize,
or a callable with the same API as scipy.optimize.minimize
:param intercept: If True, weights are penalized toward the 1 / the number
of controls, else weights are penalized toward zero
:;aram max_lambda: if True, the return value is the maximum L1 penalty for
which at least one element of the tensor matrix is
non-zero
:;aram grad_splits: Splits for Fitted v.s. Control units in each gradient
descent step. An integer, or a list/generator of train
and test units in each fold of the gradient descent.
:param **kwargs: additional arguments passed to the optimizer
'''
@ -45,7 +51,7 @@ def fold_v_matrix(X,
if treated_units is None:
if control_units is None:
# Neither provided; INCLUDE ALL SAMPLES AS BOTH TREAT AND CONTROL UNIT.
# (this is the typical controls-only Loo V-matrix estimation)
# (this is the typical controls-only fold V-matrix estimation)
control_units = list(range(X.shape[0]))
treated_units = control_units
else:
@ -69,11 +75,14 @@ def fold_v_matrix(X,
if not isinstance(L2_PEN_W, (float, int)): raise TypeError( "L2_PEN_W is not a number")
assert not non_neg_weights, "Bounds not implemented"
splits = grad_splits # for readability...
if isinstance(splits, (int)) or np.issubdtype(splits, np.integer):
from sklearn.model_selection import KFold
splits = KFold(splits).split(np.arange(X_treat.shape[0]))
splits = KFold(splits).split(np.arange(len(control_units)))
splits = list(splits)
n_splits = len(grad_splits)
for i in range(len(splits)):
assert len(splits[i][0]) + len(splits[i][1]) == len(treated_units), ("Splits for fold %s do not match the number of treated units" % i)
# CONSTANTS
C, N, K = len(control_units), len(treated_units), X.shape[1]
@ -83,79 +92,97 @@ def fold_v_matrix(X,
assert K > 0; "variables to fit (X.shape[1] == 0)"
# CREATE THE INDEX THAT INDICATES THE ELIGIBLE CONTROLS FOR EACH TREATED UNIT
in_controls = [list(set(control_units) - set([trt_unit])) for trt_unit in treated_units]
in_controls = [list(set(control_units) - set(treated_units[test])) for train,test in splits]
in_controls2 = [np.ix_(i,i) for i in in_controls] # this is a much faster alternative to A[:,index][index,:]
ctrl_rng = np.arange(len(control_units))
out_controls = [ctrl_rng[control_units != trt_unit] for trt_unit in treated_units]
out_treated = [ctrl_rng[control_units == trt_unit] for trt_unit in treated_units] # this is non-trivial when there control units are also being predicted.
out_controls = [ctrl_rng[np.logical_not(np.isin(control_units, treated_units[test]))] for train,test in splits]
out_treated = [ctrl_rng[ np.isin(control_units, treated_units[test]) ] for train,test in splits] # this is non-trivial when there control units are also being predicted.
if intercept:
for i, trt_unit in enumerate(treated_units):
Y[trt_unit,:] -= Y[in_controls[i],:].mean(axis=0)
for in_ctrl, (train, test) in zip(in_controls,splits):
Y[treated_units[test],:] -= Y[in_ctrl,:].mean(axis=0)
# handy constants (for speed purposes):
Y_treated = Y[treated_units,:]
Y_control = Y[control_units,:]
# INITIALIZE PARTIAL DERIVATIVES
dA_dV_ki = [ [None,] *N for i in range(K)]
dB_dV_ki = [ [None,] *N for i in range(K)]
b_i = [None,] *N
for i, k in itertools.product(range(len(splits)), range(K)): # TREATED unit i, moment k
train, test = splits[i]
Xc = X[in_controls[i], : ]
Xt = X[treated_units[test], : ]
dA_dV_ki [k][i] = Xc[:, k ].dot(Xc[:, k ].T) + Xc[:, k ].dot(Xc[:, k ].T) # 8
dB_dV_ki [k][i] = Xc[:, k ].dot(Xt[:, k ].T) + Xc[:, k ].dot(Xt[:, k ].T) # 9
del Xc, Xt, i, k
def __old_fold_v_matrix(Xt,Xc,Yt,Yc,LAMBDA = .1,L2_PEN_W = .1,guess = None,method = 'Newton-CG',**kwargs):
""" Calculates the V matrix with an explicit Treatment and Control folds,
where the match is optimized on the error of the outcomes.
def _score(V):
dv = diag(V)
weights, _, _ = _weights(dv)
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
return ((Ey **2).sum() + LAMBDA * absolute(V).sum()).copy() # (...).copy() assures that x.flags.writeable is True
don't use this. it hasn't been thoroughly been checked for bugs (compared
to loo_v_matrix and ct_v_matrix) and lacks several of the optimizations of loo_v_matrix
"""
C,K = Xc.shape; N,T = Yt.shape
two_L2_PEN_W = 2*L2_PEN_W*diag(ones(C))
def _score(diagV):
""" calculates just the diagonal of dGamma0_dV
def _grad(V):
""" Calculates just the diagonal of dGamma0_dV
There is an implementation that allows for all elements of V to be varied...
"""
V = diag(diagV)
A = Xc.dot(2*V).dot(Xc.T) + two_L2_PEN_W # 5
B = Xc.dot(2*V).dot(Xt.T) # 6
Pi = A.I.dot(B)
Ey = (Yt - Pi.T.dot(Yc)).getA()
return (Ey **2).sum() + LAMBDA * absolute(Pi.T).sum()
def _grad(diagV):
""" calculates just the diagonal of dGamma0_dV
"""
V = diag(diagV)
assert Xc.shape[0] == Yc.shape[0]
# CALCULATE CONSTANTS
A = Xc.dot(2*V).dot(Xc.T) + two_L2_PEN_W # 5
B = Xc.dot(2*V).dot(Xt.T) # 6
Ai = A.I
Pi = Ai.dot(B)
Ey = (Yt - Pi.T.dot(Yc)).getA()
# INITIALIZE PARTIAL DERIVATIVES
dGamma0_dV_term2 = zeros((K,K))
for i in range(K):
dA = Xc[:, i ].dot(Xc[:, i ].T) + Xc[:, i ].dot(Xc[:, i ].T) # 8
dB = Xc[:, i ].dot(Xt [:, i ].T) + Xc[:, i ].dot(Xt [:, i ].T) # 9
dPI_dV = Ai.dot(dB - dA.dot(Pi)) # 7
dGamma0_dV_term2[i,i] = (Ey * Yc.T.dot(dPI_dV).T.getA()).sum()
return diag(LAMBDA * (1 - 2*(V<0)) - 2 * dGamma0_dV_term2)
dv = diag(V)
weights, A, B = _weights(dv)
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
dGamma0_dV_term2 = zeros(K)
dPI_dV = zeros((C, N))
for k in range(K):
dPI_dV.fill(0) # faster than re-allocating the memory each loop.
for i, (index, (train, test)) in enumerate(zip(in_controls,splits)):
dA = dA_dV_ki[k][i]
dB = dB_dV_ki[k][i]
b = linalg.solve(A[in_controls2[i]],dB - dA.dot(b_i[i]))
dPI_dV[np.ix_(in_controls[i], treated_units[test])] = b
dGamma0_dV_term2[k] = (Ey * Y_control.T.dot(dPI_dV).T.getA()).sum()
return LAMBDA - 2 * dGamma0_dV_term2 # (...).copy() assures that x.flags.writeable is True // * (1 - 2*(V < 0)) was droped b/c we're restricting to positive V's
L2_PEN_W_mat = 2 * L2_PEN_W * diag(ones(C))
def _weights(V):
A = Xc.dot(2*V).dot(Xc.T) + two_L2_PEN_W # 5
B = Xc.dot(2*V).dot(Xt.T) # 6
#for col in treated_unit:
#w_col = A[-col,-col].I.dot(B[:,col])
return A.I.dot(B).T
weights = zeros((C, N))
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
B = X.dot(V + V.T).dot(X.T).T # 6
for i, (train,test) in enumerate(splits):
(b) = b_i[i] = linalg.solve(A[in_controls2[i]], B[np.ix_(in_controls[i], treated_units[test])])
weights[np.ix_(out_controls[i], test)] = b
return weights, A, B
if max_lambda:
grad0 = _grad(zeros(K))
return -grad0[grad0 < 0].min()
# DO THE OPTIMIZATION
opt = minimize(_score, zeros(Xc.shape[1]) if guess is None else guess,
jac = _grad,
method = method, **kwargs)
if isinstance(method, str):
from scipy.optimize import minimize
opt = minimize(_score, start.copy(), jac = _grad, method = method, **kwargs)
else:
assert callable(method), "Method must be a valid method name for scipy.optimize.minimize or a minimizer"
opt = method(_score, start.copy(), jac = _grad, **kwargs)
v_mat = diag(opt.x)
ts_loss = opt.fun
# CALCULATE weights AND ts_score
weights = _weights(v_mat)
errors = Yt - weights.dot(Yc)
weights, _, _ = _weights(v_mat)
errors = Y_treated - weights.T.dot(Y_control)
ts_loss = opt.fun
ts_score = linalg.norm(errors) / sqrt(prod(errors.shape))
return weights, v_mat, ts_score, ts_loss, opt
if intercept:
for i in range(len(splits)):
weights[out_controls[i], i] += 1/len(out_controls[i])
return weights, v_mat, ts_score, ts_loss, L2_PEN_W, opt
def loo_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, intercept = True, solve_method = "standard"):
# NOTE: this is here for completeness, but you will generally want to use loo_weights instead. a single set of weights is a lot less costly than a full gradient descent...
def fold_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, intercept = True, splits = 5):
if treated_units is None:
if control_units is None:
# both not provided, include all samples as both treat and control unit.
@ -172,48 +199,41 @@ def loo_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, inte
treated_units = np.array(treated_units)
[C, N, K] = [len(control_units), len(treated_units), X.shape[1]]
if isinstance(splits, (int)) or np.issubdtype(splits, np.integer):
from sklearn.model_selection import KFold
splits = KFold(splits).split(np.arange(len(control_units)))
splits = list(splits)
# index with positions of the controls relative to the incoming data
in_controls = [list(set(control_units) - set([trt_unit])) for trt_unit in treated_units]
in_controls = [list(set(control_units) - set(treated_units[test])) for train,test in splits]
in_controls2 = [np.ix_(i,i) for i in in_controls] # this is a much faster alternative to A[:,index][index,:]
# index of the controls relative to the rows of the outgoing C x N matrix of weights
ctrl_rng = np.arange(len(control_units))
out_controls = [ctrl_rng[control_units != trt_unit] for trt_unit in treated_units]
out_treated = [ctrl_rng[control_units == trt_unit] for trt_unit in treated_units] # this is non-trivial when there control units are also being predicted.
out_controls = [ctrl_rng[np.logical_not(np.isin(control_units, treated_units[test]))] for train,test in splits]
out_treated = [ctrl_rng[ np.isin(control_units, treated_units[test]) ] for train,test in splits] # this is non-trivial when there control units are also being predicted.
# constants for indexing
X_control = X[control_units,:]
X_treat = X[treated_units,:]
weights = zeros((C, N))
if solve_method == "step-down":
A = X_control.dot(V + V.T).dot(X_control.T) + 2 * L2_PEN_W * diag(ones(X_control.shape[0])) # 5
B = X_treat.dot( V + V.T).dot(X_control.T) # 6
Ai = A.I
for i, trt_unit in enumerate(treated_units):
if trt_unit in control_units:
(b) = subinv_k(Ai,_k).dot(B[out_controls[i],i])
else:
(b) = Ai.dot(B[:, i])
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
B = X.dot(V + V.T).dot(X.T).T # 6
weights[out_controls[i], i] = b.flatten()
if intercept:
weights[out_controls[i], i] += 1/len(out_controls[i])
elif solve_method == "standard":
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
B = X.dot(V + V.T).dot(X.T).T # 6
for i, trt_unit in enumerate(treated_units):
(b) = linalg.solve(A[in_controls2[i]], B[in_controls[i], trt_unit])
weights[out_controls[i], i] = b.flatten()
if intercept:
weights[out_controls[i], i] += 1/len(out_controls[i])
else:
raise ValueError("Unknown Solve Method: " + solve_method)
for i, (train,test) in enumerate(splits):
(b) = linalg.solve(A[in_controls2[i]], B[np.ix_(in_controls[i], treated_units[test])])
indx2 = np.ix_(out_controls[i], train)
weights[indx2] = b.T
if intercept:
weights[indx2] += 1/len(out_controls[i])
return weights.T
def loo_score(Y, X, V, L2_PEN_W, LAMBDA = 0, treated_units = None, control_units = None,**kwargs):
# NOTE: this is here for completeness, but you will almost always want to use loo_score instead.
def fold_score(Y, X, V, L2_PEN_W, LAMBDA = 0, treated_units = None, control_units = None,**kwargs):
if treated_units is None:
if control_units is None:
# both not provided, include all samples as both treat and control unit.
@ -226,7 +246,7 @@ def loo_score(Y, X, V, L2_PEN_W, LAMBDA = 0, treated_units = None, control_units
if control_units is None:
# Set the control units to the not-treated units
control_units = list(set(range(X.shape[0])) - set(treated_units))
weights = loo_weights(X = X,
weights = fold_weights(X = X,
V = V,
L2_PEN_W = L2_PEN_W,
treated_units = treated_units,

Просмотреть файл

@ -35,7 +35,7 @@ def loo_v_matrix(X,
:param method: The name of a method to be used by scipy.optimize.minimize, or a callable with the same API as scipy.optimize.minimize
:param intercept: If True, weights are penalized toward the 1 / the number of controls, else weights are penalized toward zero
:;aram max_lambda: if True, the return value is the maximum L1 penalty for which at least one element of the tensor matrix is non-zero
:;aram solve_method: Method for solving A.I.dot(B). Either "standard" or "step-down".
:;aram solve_method: Method for solving A.I.dot(B). Either "standard" or "step-down". https://math.stackexchange.com/a/208021/252693
:param **kwargs: additional arguments passed to the optimizer
'''
@ -246,40 +246,6 @@ def loo_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, inte
return weights.T
def __old__loo_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, intercept = True):
assert False
if treated_units is None:
if control_units is None:
# both not provided, include all samples as both treat and control unit.
control_units = list(range(X.shape[0]))
treated_units = control_units
else:
# Set the treated units to the not-control units
treated_units = list(set(range(X.shape[0])) - set(control_units))
else:
if control_units is None:
# Set the control units to the not-treated units
control_units = list(set(range(X.shape[0])) - set(treated_units))
[C, N, K] = [len(control_units), len(treated_units), X.shape[1]]
index_i = [list(set(list(range(200))) - set([col])) for col in treated_units]
index_i2 = [np.ix_(i,i) for i in index_i] # this is a much faster alternative to A[:,index][index,:]
index_map = {c_unit : row_at for row_at, c_unit in enumerate(control_units)}
weights = zeros((C, N))
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
B = X.dot(V + V.T).dot(X.T).T # 6
for col, index in enumerate(index_i):
b = linalg.solve(A[index_i2[col]], B[index, col])
weights[np.vectorize(index_map.get)(index), col] = b.T
if intercept:
weights[index, col] += 1/len(index)
return weights
def loo_score(Y, X, V, L2_PEN_W, LAMBDA = 0, treated_units = None, control_units = None,**kwargs):
if treated_units is None:
if control_units is None: