зеркало из https://github.com/microsoft/SparseSC.git
fold_v_matrix now works.
This commit is contained in:
Родитель
5fb3503f4e
Коммит
93588155bf
16
README.md
16
README.md
|
@ -13,3 +13,19 @@ Scipy 1.0.1 )
|
|||
For now, refer to the example code in `./example-code.py` for usage
|
||||
details.
|
||||
|
||||
### Performance Notes
|
||||
|
||||
For larger sample sizes (number of controls), almost all of the running
|
||||
time is spent calculating `np.linalg.solve(A,B)` where A is a `C x C`
|
||||
matrix and `B` is a `C x T` matrix, where `C` is the number of control
|
||||
units and `T` is the number of treated units. Because the LAPACK library
|
||||
is already parallelized, passing `parallel = true` to the `CV_score`
|
||||
function (which runs each fold in a separate sub-process) will actually
|
||||
tend to increase the running time.
|
||||
|
||||
This is especially true for the leave-one-out gradient descent using only
|
||||
controls as `np.linalg.solve(A,B)` will be called twice for each control
|
||||
unit. With larger sample sizes, it is more efficient to pass a value to
|
||||
`grad_splits`, in order to use k-fold gradient descent, in which case
|
||||
`np.linalg.solve(A,B)` will be called twice for each split.
|
||||
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
|
||||
from fit_fold import fold_v_matrix#, fold_score
|
||||
from fit_loo import loo_v_matrix, loo_score
|
||||
from fit_ct import ct_v_matrix, ct_score
|
||||
from optimizers.cd_line_search import cdl_search
|
||||
import numpy as np
|
||||
import itertools
|
||||
from concurrent import futures
|
||||
|
||||
def score_train_test(X,
|
||||
|
@ -11,7 +13,8 @@ def score_train_test(X,
|
|||
test,
|
||||
X_treat=None,
|
||||
Y_treat=None,
|
||||
FoldNumber=None, # for consistency with score_train_test_sorted_lambdas()
|
||||
FoldNumber=None, # For consistency with score_train_test_sorted_lambdas()
|
||||
grad_splits=None, # If present, use k fold gradient descent. See fold_v_matrix for details
|
||||
**kwargs):
|
||||
""" presents a unified api for ct_v_matrix and loo_v_matrix
|
||||
and returns the v_mat, l2_pen_w (possibly calculated, possibly a parameter), and the score
|
||||
|
@ -59,14 +62,27 @@ def score_train_test(X,
|
|||
else:
|
||||
">> K-fold validation on the only control units; assuming that Y contains post-intervention outcomes"
|
||||
|
||||
# FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
|
||||
# note that the weights, score, and loss function value returned here are for the in-sample predictions
|
||||
weights, v_mat, ts_score, loss, l2_pen_w, _ = \
|
||||
loo_v_matrix(X = X[train, :],
|
||||
Y = Y[train, :],
|
||||
# treated_units = [X.shape[0] + i for i in range(len(train))],
|
||||
method = cdl_search,
|
||||
**kwargs)
|
||||
if grad_splits is not None:
|
||||
|
||||
# FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
|
||||
# note that the weights, score, and loss function value returned here are for the in-sample predictions
|
||||
weights, v_mat, ts_score, loss, l2_pen_w, _ = \
|
||||
fold_v_matrix(X = X[train, :],
|
||||
Y = Y[train, :],
|
||||
# treated_units = [X.shape[0] + i for i in range(len(train))],
|
||||
method = cdl_search,
|
||||
**kwargs)
|
||||
|
||||
else:
|
||||
|
||||
# FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
|
||||
# note that the weights, score, and loss function value returned here are for the in-sample predictions
|
||||
weights, v_mat, ts_score, loss, l2_pen_w, _ = \
|
||||
loo_v_matrix(X = X[train, :],
|
||||
Y = Y[train, :],
|
||||
# treated_units = [X.shape[0] + i for i in range(len(train))],
|
||||
method = cdl_search,
|
||||
**kwargs)
|
||||
|
||||
# GET THE OUT-OF-SAMPLE PREDICTION ERROR
|
||||
s = loo_score(X = X, Y = Y,
|
||||
|
@ -149,17 +165,20 @@ def CV_score(X,Y,LAMBDA,X_treat=None,Y_treat=None,splits=5,sub_splits=None,quiet
|
|||
if X_treat.shape[0] != Y_treat.shape[0]:
|
||||
raise ValueError("X_treat and Y_treat have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
|
||||
|
||||
# MESSAGING
|
||||
if not quiet:
|
||||
print("K-fold validation with %s control and %s treated units %s predictors and %s outcomes, holding out one fold among Treated units; Assumes that `Y` and `Y_treat` are pre-intervention outcomes" %
|
||||
(X.shape[0] , X_treat.shape[0],X.shape[1],Y.shape[1],))
|
||||
|
||||
if isinstance(splits, (int)) or np.issubdtype(splits, np.integer):
|
||||
|
||||
try:
|
||||
iter(splits)
|
||||
except TypeError:
|
||||
from sklearn.model_selection import KFold
|
||||
splits = KFold(splits).split(np.arange(X_treat.shape[0]))
|
||||
train_test_splits = [ x for x in splits ]
|
||||
n_splits = len(train_test_splits)
|
||||
|
||||
# MESSAGING
|
||||
if not quiet:
|
||||
print("%s-fold validation with %s control and %s treated units %s predictors and %s outcomes, holding out one fold among Treated units; Assumes that `Y` and `Y_treat` are pre-intervention outcomes" %
|
||||
(n_splits, X.shape[0] , X_treat.shape[0],X.shape[1],Y.shape[1],))
|
||||
|
||||
if parallel:
|
||||
|
||||
|
@ -209,17 +228,20 @@ def CV_score(X,Y,LAMBDA,X_treat=None,Y_treat=None,splits=5,sub_splits=None,quiet
|
|||
|
||||
|
||||
else: # X_treat *is* None
|
||||
# MESSAGING
|
||||
if not quiet:
|
||||
print("Leave-one-out Cross Validation with %s control units, %s predictors and %s outcomes; Y may contain post-intervention outcomes" %
|
||||
(X.shape[0],X.shape[1],Y.shape[1],) )
|
||||
|
||||
if isinstance(splits, (int)) or np.issubdtype(splits, np.integer):
|
||||
try:
|
||||
iter(splits)
|
||||
except TypeError:
|
||||
from sklearn.model_selection import KFold
|
||||
splits = KFold(splits).split(np.arange(X.shape[0]))
|
||||
train_test_splits = [ x for x in splits ]
|
||||
n_splits = len(train_test_splits)
|
||||
|
||||
# MESSAGING
|
||||
if not quiet:
|
||||
print("%s-fold Cross Validation with %s control units, %s predictors and %s outcomes; Y may contain post-intervention outcomes" %
|
||||
(n_splits, X.shape[0],X.shape[1],Y.shape[1],) )
|
||||
|
||||
if parallel:
|
||||
|
||||
if max_workers is None:
|
||||
|
|
|
@ -20,8 +20,12 @@ import random
|
|||
if __name__ == "__main__":
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# CONTROL PARAMETERS
|
||||
# ------------------------------------------------------------
|
||||
# SECTION 1: GENERATE SOME TOY DATA
|
||||
# ------------------------------------------------------------
|
||||
# ------------------------------------------------------------
|
||||
|
||||
# CONTROL PARAMETERS
|
||||
|
||||
random.seed(12345)
|
||||
np.random.seed(10101)
|
||||
|
@ -36,9 +40,6 @@ if __name__ == "__main__":
|
|||
beta_scale,confounders_scale = 4,1
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# SIMULATE SOME DATA
|
||||
# ------------------------------------------------------------
|
||||
|
||||
|
||||
# COVARIATE EFFECTS
|
||||
X_control = np.matrix(np.random.normal(0,1,((C)*groups, K+S+R)))
|
||||
|
@ -120,9 +121,11 @@ if __name__ == "__main__":
|
|||
# control_units = np.arange( C * groups )
|
||||
# treated_units = np.arange( N * groups ) + C
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# ------------------------------------------------------------
|
||||
# find default penalties
|
||||
# ------------------------------------------------------------
|
||||
# ------------------------------------------------------------
|
||||
|
||||
# get starting point for the L2 penalty
|
||||
L2_pen_start_ct = SC.L2_pen_guestimate(X_control)
|
||||
|
@ -130,8 +133,11 @@ if __name__ == "__main__":
|
|||
|
||||
# get the maximum value for the L1 Penalty parameter conditional on the guestimate for the L2 penalty
|
||||
L1_max_ct = SC.get_max_lambda(X_control,Y_pre_control,X_treat=X_treated,Y_treat=Y_pre_treated)
|
||||
L1_max_loo = SC.get_max_lambda(X_and_Y_pre,Y_post)
|
||||
print("Max L1")
|
||||
if False:
|
||||
L1_max_loo = SC.get_max_lambda(X_and_Y_pre[np.arange(100)],Y_post[np.arange(100)])
|
||||
print("Max L1 loo %s " % L1_max_loo)
|
||||
else:
|
||||
L1_max_loo = np.float(147975295.9121998)
|
||||
|
||||
if False:
|
||||
"Demonstrate relations between the L1 and L2 penalties"
|
||||
|
@ -182,6 +188,7 @@ if __name__ == "__main__":
|
|||
# ------------------------------------------------------------
|
||||
|
||||
if False:
|
||||
|
||||
print("starting grid scoring for treat / control scenario", grid*L1_max_ct)
|
||||
grid_scores_ct = SC.CV_score(
|
||||
X = X_control,
|
||||
|
@ -208,8 +215,36 @@ if __name__ == "__main__":
|
|||
best_L1_penalty_ct = (grid * L1_max_ct)[np.argmin(grid_scores_ct)]
|
||||
|
||||
if False:
|
||||
|
||||
print("Starting grid scoring for Controls Only scenario with 5-fold gradient descent", grid*L1_max_ct)
|
||||
grid_scores_loo = SC.CV_score(
|
||||
X = X_and_Y_pre, # limit the amount of time...
|
||||
Y = Y_post , # limit the amount of time...
|
||||
|
||||
# this is what enables the k-fold gradient descent
|
||||
grad_splits = 5,
|
||||
|
||||
# L1 Penalty. if LAMBDA is a single value (value), we get a single score, If it's an array of values, we get an array of scores.
|
||||
LAMBDA = grid * L1_max_loo,
|
||||
|
||||
# L2 Penalty (float)
|
||||
L2_PEN_W = L2_pen_start_loo,
|
||||
|
||||
# CACHE THE V MATRIX BETWEEN LAMBDA PARAMETERS (generally faster, but path dependent)
|
||||
#cache = True, # False by Default
|
||||
|
||||
# Run each of the Cross-validation folds in parallel? Often slower
|
||||
# for large sample sizes because numpy.linalg.solve() already runs
|
||||
# in parallel for large matrices
|
||||
parallel=False,
|
||||
|
||||
# ANNOUNCE COMPLETION OF EACH ITERATION
|
||||
progress = True)
|
||||
|
||||
if False:
|
||||
|
||||
# even with smaller data, this takes a while.
|
||||
print("starting grid scoring for Controls Only scenario", grid*L1_max_ct)
|
||||
print("Starting grid scoring for Controls Only scenario with leave-one-out gradient descent", grid*L1_max_ct)
|
||||
grid_scores_loo = SC.CV_score(
|
||||
X = X_and_Y_pre [np.arange(100)], # limit the amount of time...
|
||||
Y = Y_post [np.arange(100)], # limit the amount of time...
|
||||
|
@ -295,7 +330,6 @@ if __name__ == "__main__":
|
|||
import pdb; pdb.set_trace()
|
||||
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ---------------------------------------------------------------------------
|
||||
# Optimization of the L1 and L2 parameters together (second order)
|
||||
|
@ -306,9 +340,12 @@ if __name__ == "__main__":
|
|||
import time
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Optimization of the L2 and L1 Penalties Simultaneously, keeping their product constant
|
||||
# Optimization of the L2 and L1 Penalties Simultaneously, keeping their
|
||||
# product constant. Heuristically, this has been most efficient means of
|
||||
# optimizing the L2 Parameter.
|
||||
# -----------------------------------------------------------------
|
||||
|
||||
|
||||
# build the objective function to be minimized
|
||||
|
||||
# cache for L2_obj_func
|
||||
|
|
206
fit_fold.py
206
fit_fold.py
|
@ -20,8 +20,7 @@ def fold_v_matrix(X,
|
|||
method = cdl_search,
|
||||
intercept = True,
|
||||
max_lambda = False, # this is terrible at least without documentation...
|
||||
solve_method = "standard",
|
||||
splits = 5,
|
||||
grad_splits = 5,
|
||||
**kwargs):
|
||||
'''
|
||||
Computes and sets the optimal v_matrix for the given moments and
|
||||
|
@ -33,11 +32,18 @@ def fold_v_matrix(X,
|
|||
:param treated_units: a list containing the position (rows) of the treated units within X and Y
|
||||
:param control_units: a list containing the position (rows) of the control units within X and Y
|
||||
:param start: initial values for the diagonals of the tensor matrix
|
||||
:param L2_PEN_W: L2 penalty on the magnitude of the deviance of the weight vector from null. Optional.
|
||||
:param method: The name of a method to be used by scipy.optimize.minimize, or a callable with the same API as scipy.optimize.minimize
|
||||
:param intercept: If True, weights are penalized toward the 1 / the number of controls, else weights are penalized toward zero
|
||||
:;aram max_lambda: if True, the return value is the maximum L1 penalty for which at least one element of the tensor matrix is non-zero
|
||||
:;aram solve_method: Method for solving A.I.dot(B). Either "standard" or "step-down".
|
||||
:param L2_PEN_W: L2 penalty on the magnitude of the deviance of the weight
|
||||
vector from null. Optional.
|
||||
:param method: The name of a method to be used by scipy.optimize.minimize,
|
||||
or a callable with the same API as scipy.optimize.minimize
|
||||
:param intercept: If True, weights are penalized toward the 1 / the number
|
||||
of controls, else weights are penalized toward zero
|
||||
:;aram max_lambda: if True, the return value is the maximum L1 penalty for
|
||||
which at least one element of the tensor matrix is
|
||||
non-zero
|
||||
:;aram grad_splits: Splits for Fitted v.s. Control units in each gradient
|
||||
descent step. An integer, or a list/generator of train
|
||||
and test units in each fold of the gradient descent.
|
||||
:param **kwargs: additional arguments passed to the optimizer
|
||||
|
||||
'''
|
||||
|
@ -45,7 +51,7 @@ def fold_v_matrix(X,
|
|||
if treated_units is None:
|
||||
if control_units is None:
|
||||
# Neither provided; INCLUDE ALL SAMPLES AS BOTH TREAT AND CONTROL UNIT.
|
||||
# (this is the typical controls-only Loo V-matrix estimation)
|
||||
# (this is the typical controls-only fold V-matrix estimation)
|
||||
control_units = list(range(X.shape[0]))
|
||||
treated_units = control_units
|
||||
else:
|
||||
|
@ -69,11 +75,14 @@ def fold_v_matrix(X,
|
|||
if not isinstance(L2_PEN_W, (float, int)): raise TypeError( "L2_PEN_W is not a number")
|
||||
assert not non_neg_weights, "Bounds not implemented"
|
||||
|
||||
splits = grad_splits # for readability...
|
||||
if isinstance(splits, (int)) or np.issubdtype(splits, np.integer):
|
||||
from sklearn.model_selection import KFold
|
||||
splits = KFold(splits).split(np.arange(X_treat.shape[0]))
|
||||
splits = KFold(splits).split(np.arange(len(control_units)))
|
||||
splits = list(splits)
|
||||
n_splits = len(grad_splits)
|
||||
|
||||
for i in range(len(splits)):
|
||||
assert len(splits[i][0]) + len(splits[i][1]) == len(treated_units), ("Splits for fold %s do not match the number of treated units" % i)
|
||||
|
||||
# CONSTANTS
|
||||
C, N, K = len(control_units), len(treated_units), X.shape[1]
|
||||
|
@ -83,79 +92,97 @@ def fold_v_matrix(X,
|
|||
assert K > 0; "variables to fit (X.shape[1] == 0)"
|
||||
|
||||
# CREATE THE INDEX THAT INDICATES THE ELIGIBLE CONTROLS FOR EACH TREATED UNIT
|
||||
in_controls = [list(set(control_units) - set([trt_unit])) for trt_unit in treated_units]
|
||||
in_controls = [list(set(control_units) - set(treated_units[test])) for train,test in splits]
|
||||
in_controls2 = [np.ix_(i,i) for i in in_controls] # this is a much faster alternative to A[:,index][index,:]
|
||||
ctrl_rng = np.arange(len(control_units))
|
||||
out_controls = [ctrl_rng[control_units != trt_unit] for trt_unit in treated_units]
|
||||
out_treated = [ctrl_rng[control_units == trt_unit] for trt_unit in treated_units] # this is non-trivial when there control units are also being predicted.
|
||||
out_controls = [ctrl_rng[np.logical_not(np.isin(control_units, treated_units[test]))] for train,test in splits]
|
||||
out_treated = [ctrl_rng[ np.isin(control_units, treated_units[test]) ] for train,test in splits] # this is non-trivial when there control units are also being predicted.
|
||||
|
||||
if intercept:
|
||||
for i, trt_unit in enumerate(treated_units):
|
||||
Y[trt_unit,:] -= Y[in_controls[i],:].mean(axis=0)
|
||||
for in_ctrl, (train, test) in zip(in_controls,splits):
|
||||
Y[treated_units[test],:] -= Y[in_ctrl,:].mean(axis=0)
|
||||
|
||||
# handy constants (for speed purposes):
|
||||
Y_treated = Y[treated_units,:]
|
||||
Y_control = Y[control_units,:]
|
||||
|
||||
# INITIALIZE PARTIAL DERIVATIVES
|
||||
dA_dV_ki = [ [None,] *N for i in range(K)]
|
||||
dB_dV_ki = [ [None,] *N for i in range(K)]
|
||||
b_i = [None,] *N
|
||||
for i, k in itertools.product(range(len(splits)), range(K)): # TREATED unit i, moment k
|
||||
train, test = splits[i]
|
||||
Xc = X[in_controls[i], : ]
|
||||
Xt = X[treated_units[test], : ]
|
||||
dA_dV_ki [k][i] = Xc[:, k ].dot(Xc[:, k ].T) + Xc[:, k ].dot(Xc[:, k ].T) # 8
|
||||
dB_dV_ki [k][i] = Xc[:, k ].dot(Xt[:, k ].T) + Xc[:, k ].dot(Xt[:, k ].T) # 9
|
||||
|
||||
del Xc, Xt, i, k
|
||||
|
||||
def __old_fold_v_matrix(Xt,Xc,Yt,Yc,LAMBDA = .1,L2_PEN_W = .1,guess = None,method = 'Newton-CG',**kwargs):
|
||||
""" Calculates the V matrix with an explicit Treatment and Control folds,
|
||||
where the match is optimized on the error of the outcomes.
|
||||
def _score(V):
|
||||
dv = diag(V)
|
||||
weights, _, _ = _weights(dv)
|
||||
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
|
||||
return ((Ey **2).sum() + LAMBDA * absolute(V).sum()).copy() # (...).copy() assures that x.flags.writeable is True
|
||||
|
||||
don't use this. it hasn't been thoroughly been checked for bugs (compared
|
||||
to loo_v_matrix and ct_v_matrix) and lacks several of the optimizations of loo_v_matrix
|
||||
"""
|
||||
C,K = Xc.shape; N,T = Yt.shape
|
||||
two_L2_PEN_W = 2*L2_PEN_W*diag(ones(C))
|
||||
def _score(diagV):
|
||||
""" calculates just the diagonal of dGamma0_dV
|
||||
def _grad(V):
|
||||
""" Calculates just the diagonal of dGamma0_dV
|
||||
|
||||
There is an implementation that allows for all elements of V to be varied...
|
||||
"""
|
||||
V = diag(diagV)
|
||||
A = Xc.dot(2*V).dot(Xc.T) + two_L2_PEN_W # 5
|
||||
B = Xc.dot(2*V).dot(Xt.T) # 6
|
||||
Pi = A.I.dot(B)
|
||||
Ey = (Yt - Pi.T.dot(Yc)).getA()
|
||||
return (Ey **2).sum() + LAMBDA * absolute(Pi.T).sum()
|
||||
def _grad(diagV):
|
||||
""" calculates just the diagonal of dGamma0_dV
|
||||
"""
|
||||
V = diag(diagV)
|
||||
assert Xc.shape[0] == Yc.shape[0]
|
||||
# CALCULATE CONSTANTS
|
||||
A = Xc.dot(2*V).dot(Xc.T) + two_L2_PEN_W # 5
|
||||
B = Xc.dot(2*V).dot(Xt.T) # 6
|
||||
Ai = A.I
|
||||
Pi = Ai.dot(B)
|
||||
Ey = (Yt - Pi.T.dot(Yc)).getA()
|
||||
# INITIALIZE PARTIAL DERIVATIVES
|
||||
dGamma0_dV_term2 = zeros((K,K))
|
||||
for i in range(K):
|
||||
dA = Xc[:, i ].dot(Xc[:, i ].T) + Xc[:, i ].dot(Xc[:, i ].T) # 8
|
||||
dB = Xc[:, i ].dot(Xt [:, i ].T) + Xc[:, i ].dot(Xt [:, i ].T) # 9
|
||||
dPI_dV = Ai.dot(dB - dA.dot(Pi)) # 7
|
||||
dGamma0_dV_term2[i,i] = (Ey * Yc.T.dot(dPI_dV).T.getA()).sum()
|
||||
return diag(LAMBDA * (1 - 2*(V<0)) - 2 * dGamma0_dV_term2)
|
||||
dv = diag(V)
|
||||
weights, A, B = _weights(dv)
|
||||
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
|
||||
dGamma0_dV_term2 = zeros(K)
|
||||
dPI_dV = zeros((C, N))
|
||||
for k in range(K):
|
||||
dPI_dV.fill(0) # faster than re-allocating the memory each loop.
|
||||
for i, (index, (train, test)) in enumerate(zip(in_controls,splits)):
|
||||
dA = dA_dV_ki[k][i]
|
||||
dB = dB_dV_ki[k][i]
|
||||
b = linalg.solve(A[in_controls2[i]],dB - dA.dot(b_i[i]))
|
||||
dPI_dV[np.ix_(in_controls[i], treated_units[test])] = b
|
||||
dGamma0_dV_term2[k] = (Ey * Y_control.T.dot(dPI_dV).T.getA()).sum()
|
||||
return LAMBDA - 2 * dGamma0_dV_term2 # (...).copy() assures that x.flags.writeable is True // * (1 - 2*(V < 0)) was droped b/c we're restricting to positive V's
|
||||
|
||||
L2_PEN_W_mat = 2 * L2_PEN_W * diag(ones(C))
|
||||
def _weights(V):
|
||||
A = Xc.dot(2*V).dot(Xc.T) + two_L2_PEN_W # 5
|
||||
B = Xc.dot(2*V).dot(Xt.T) # 6
|
||||
#for col in treated_unit:
|
||||
#w_col = A[-col,-col].I.dot(B[:,col])
|
||||
return A.I.dot(B).T
|
||||
weights = zeros((C, N))
|
||||
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
|
||||
B = X.dot(V + V.T).dot(X.T).T # 6
|
||||
for i, (train,test) in enumerate(splits):
|
||||
(b) = b_i[i] = linalg.solve(A[in_controls2[i]], B[np.ix_(in_controls[i], treated_units[test])])
|
||||
weights[np.ix_(out_controls[i], test)] = b
|
||||
return weights, A, B
|
||||
|
||||
if max_lambda:
|
||||
grad0 = _grad(zeros(K))
|
||||
return -grad0[grad0 < 0].min()
|
||||
|
||||
# DO THE OPTIMIZATION
|
||||
opt = minimize(_score, zeros(Xc.shape[1]) if guess is None else guess,
|
||||
jac = _grad,
|
||||
method = method, **kwargs)
|
||||
if isinstance(method, str):
|
||||
from scipy.optimize import minimize
|
||||
opt = minimize(_score, start.copy(), jac = _grad, method = method, **kwargs)
|
||||
else:
|
||||
assert callable(method), "Method must be a valid method name for scipy.optimize.minimize or a minimizer"
|
||||
opt = method(_score, start.copy(), jac = _grad, **kwargs)
|
||||
v_mat = diag(opt.x)
|
||||
ts_loss = opt.fun
|
||||
# CALCULATE weights AND ts_score
|
||||
weights = _weights(v_mat)
|
||||
errors = Yt - weights.dot(Yc)
|
||||
weights, _, _ = _weights(v_mat)
|
||||
errors = Y_treated - weights.T.dot(Y_control)
|
||||
ts_loss = opt.fun
|
||||
ts_score = linalg.norm(errors) / sqrt(prod(errors.shape))
|
||||
return weights, v_mat, ts_score, ts_loss, opt
|
||||
|
||||
if intercept:
|
||||
for i in range(len(splits)):
|
||||
weights[out_controls[i], i] += 1/len(out_controls[i])
|
||||
|
||||
return weights, v_mat, ts_score, ts_loss, L2_PEN_W, opt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def loo_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, intercept = True, solve_method = "standard"):
|
||||
# NOTE: this is here for completeness, but you will generally want to use loo_weights instead. a single set of weights is a lot less costly than a full gradient descent...
|
||||
def fold_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, intercept = True, splits = 5):
|
||||
if treated_units is None:
|
||||
if control_units is None:
|
||||
# both not provided, include all samples as both treat and control unit.
|
||||
|
@ -172,48 +199,41 @@ def loo_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, inte
|
|||
treated_units = np.array(treated_units)
|
||||
[C, N, K] = [len(control_units), len(treated_units), X.shape[1]]
|
||||
|
||||
if isinstance(splits, (int)) or np.issubdtype(splits, np.integer):
|
||||
from sklearn.model_selection import KFold
|
||||
splits = KFold(splits).split(np.arange(len(control_units)))
|
||||
splits = list(splits)
|
||||
|
||||
# index with positions of the controls relative to the incoming data
|
||||
in_controls = [list(set(control_units) - set([trt_unit])) for trt_unit in treated_units]
|
||||
in_controls = [list(set(control_units) - set(treated_units[test])) for train,test in splits]
|
||||
in_controls2 = [np.ix_(i,i) for i in in_controls] # this is a much faster alternative to A[:,index][index,:]
|
||||
|
||||
# index of the controls relative to the rows of the outgoing C x N matrix of weights
|
||||
ctrl_rng = np.arange(len(control_units))
|
||||
out_controls = [ctrl_rng[control_units != trt_unit] for trt_unit in treated_units]
|
||||
out_treated = [ctrl_rng[control_units == trt_unit] for trt_unit in treated_units] # this is non-trivial when there control units are also being predicted.
|
||||
out_controls = [ctrl_rng[np.logical_not(np.isin(control_units, treated_units[test]))] for train,test in splits]
|
||||
out_treated = [ctrl_rng[ np.isin(control_units, treated_units[test]) ] for train,test in splits] # this is non-trivial when there control units are also being predicted.
|
||||
|
||||
# constants for indexing
|
||||
X_control = X[control_units,:]
|
||||
X_treat = X[treated_units,:]
|
||||
weights = zeros((C, N))
|
||||
|
||||
if solve_method == "step-down":
|
||||
A = X_control.dot(V + V.T).dot(X_control.T) + 2 * L2_PEN_W * diag(ones(X_control.shape[0])) # 5
|
||||
B = X_treat.dot( V + V.T).dot(X_control.T) # 6
|
||||
Ai = A.I
|
||||
for i, trt_unit in enumerate(treated_units):
|
||||
if trt_unit in control_units:
|
||||
(b) = subinv_k(Ai,_k).dot(B[out_controls[i],i])
|
||||
else:
|
||||
(b) = Ai.dot(B[:, i])
|
||||
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
|
||||
B = X.dot(V + V.T).dot(X.T).T # 6
|
||||
|
||||
weights[out_controls[i], i] = b.flatten()
|
||||
if intercept:
|
||||
weights[out_controls[i], i] += 1/len(out_controls[i])
|
||||
elif solve_method == "standard":
|
||||
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
|
||||
B = X.dot(V + V.T).dot(X.T).T # 6
|
||||
for i, trt_unit in enumerate(treated_units):
|
||||
(b) = linalg.solve(A[in_controls2[i]], B[in_controls[i], trt_unit])
|
||||
|
||||
weights[out_controls[i], i] = b.flatten()
|
||||
if intercept:
|
||||
weights[out_controls[i], i] += 1/len(out_controls[i])
|
||||
else:
|
||||
raise ValueError("Unknown Solve Method: " + solve_method)
|
||||
for i, (train,test) in enumerate(splits):
|
||||
(b) = linalg.solve(A[in_controls2[i]], B[np.ix_(in_controls[i], treated_units[test])])
|
||||
indx2 = np.ix_(out_controls[i], train)
|
||||
weights[indx2] = b.T
|
||||
if intercept:
|
||||
weights[indx2] += 1/len(out_controls[i])
|
||||
return weights.T
|
||||
|
||||
def loo_score(Y, X, V, L2_PEN_W, LAMBDA = 0, treated_units = None, control_units = None,**kwargs):
|
||||
|
||||
|
||||
|
||||
# NOTE: this is here for completeness, but you will almost always want to use loo_score instead.
|
||||
def fold_score(Y, X, V, L2_PEN_W, LAMBDA = 0, treated_units = None, control_units = None,**kwargs):
|
||||
if treated_units is None:
|
||||
if control_units is None:
|
||||
# both not provided, include all samples as both treat and control unit.
|
||||
|
@ -226,7 +246,7 @@ def loo_score(Y, X, V, L2_PEN_W, LAMBDA = 0, treated_units = None, control_units
|
|||
if control_units is None:
|
||||
# Set the control units to the not-treated units
|
||||
control_units = list(set(range(X.shape[0])) - set(treated_units))
|
||||
weights = loo_weights(X = X,
|
||||
weights = fold_weights(X = X,
|
||||
V = V,
|
||||
L2_PEN_W = L2_PEN_W,
|
||||
treated_units = treated_units,
|
||||
|
|
36
fit_loo.py
36
fit_loo.py
|
@ -35,7 +35,7 @@ def loo_v_matrix(X,
|
|||
:param method: The name of a method to be used by scipy.optimize.minimize, or a callable with the same API as scipy.optimize.minimize
|
||||
:param intercept: If True, weights are penalized toward the 1 / the number of controls, else weights are penalized toward zero
|
||||
:;aram max_lambda: if True, the return value is the maximum L1 penalty for which at least one element of the tensor matrix is non-zero
|
||||
:;aram solve_method: Method for solving A.I.dot(B). Either "standard" or "step-down".
|
||||
:;aram solve_method: Method for solving A.I.dot(B). Either "standard" or "step-down". https://math.stackexchange.com/a/208021/252693
|
||||
:param **kwargs: additional arguments passed to the optimizer
|
||||
|
||||
'''
|
||||
|
@ -246,40 +246,6 @@ def loo_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, inte
|
|||
return weights.T
|
||||
|
||||
|
||||
|
||||
def __old__loo_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, intercept = True):
|
||||
assert False
|
||||
if treated_units is None:
|
||||
if control_units is None:
|
||||
# both not provided, include all samples as both treat and control unit.
|
||||
control_units = list(range(X.shape[0]))
|
||||
treated_units = control_units
|
||||
else:
|
||||
# Set the treated units to the not-control units
|
||||
treated_units = list(set(range(X.shape[0])) - set(control_units))
|
||||
else:
|
||||
if control_units is None:
|
||||
# Set the control units to the not-treated units
|
||||
control_units = list(set(range(X.shape[0])) - set(treated_units))
|
||||
[C, N, K] = [len(control_units), len(treated_units), X.shape[1]]
|
||||
index_i = [list(set(list(range(200))) - set([col])) for col in treated_units]
|
||||
|
||||
index_i2 = [np.ix_(i,i) for i in index_i] # this is a much faster alternative to A[:,index][index,:]
|
||||
index_map = {c_unit : row_at for row_at, c_unit in enumerate(control_units)}
|
||||
|
||||
weights = zeros((C, N))
|
||||
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
|
||||
B = X.dot(V + V.T).dot(X.T).T # 6
|
||||
for col, index in enumerate(index_i):
|
||||
b = linalg.solve(A[index_i2[col]], B[index, col])
|
||||
weights[np.vectorize(index_map.get)(index), col] = b.T
|
||||
if intercept:
|
||||
weights[index, col] += 1/len(index)
|
||||
return weights
|
||||
|
||||
|
||||
|
||||
|
||||
def loo_score(Y, X, V, L2_PEN_W, LAMBDA = 0, treated_units = None, control_units = None,**kwargs):
|
||||
if treated_units is None:
|
||||
if control_units is None:
|
||||
|
|
Загрузка…
Ссылка в новой задаче