зеркало из https://github.com/microsoft/SparseSC.git
Initial Commit
This commit is contained in:
Коммит
b112ca5318
|
@ -0,0 +1,104 @@
|
|||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
|
@ -0,0 +1,15 @@
|
|||
|
||||
# Ridge Synthetic Controls
|
||||
|
||||
### Setup
|
||||
|
||||
This package requires [numpy](http://www.numpy.org/) and
|
||||
[scipy](https://www.scipy.org/) and has been tested with ( Python 2.7.14,
|
||||
Numpy 1.14.1, and Scipy 1.0.0 ) and ( Python 3.5.5, Numpy 1.13.1, and
|
||||
Scipy 1.0.1 )
|
||||
|
||||
### API
|
||||
|
||||
For now, refer to the example code in `./example-code.py` for usage
|
||||
details.
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
|
||||
# PRIMARY FITTING FUNCTIONS
|
||||
from fit_loo import loo_v_matrix, loo_weights, loo_score
|
||||
from fit_ct import ct_v_matrix, ct_weights, ct_score
|
||||
|
||||
# Public API
|
||||
from cross_validation import score_train_test, score_train_test_sorted_lambdas, CV_score
|
||||
from tensor import tensor
|
||||
from weights import weights
|
||||
from lambda_utils import get_max_lambda, L2_pen_guestimate
|
|
@ -0,0 +1,299 @@
|
|||
|
||||
from .fit_loo import loo_v_matrix, loo_score
|
||||
from .fit_ct import ct_v_matrix, ct_score
|
||||
from .optimizers.cd_line_search import cdl_search
|
||||
import numpy as np
|
||||
from concurrent import futures
|
||||
|
||||
def score_train_test(X,
|
||||
Y,
|
||||
train,
|
||||
test,
|
||||
X_treat=None,
|
||||
Y_treat=None,
|
||||
FoldNumber=None, # for consistency with score_train_test_sorted_lambdas()
|
||||
**kwargs):
|
||||
""" presents a unified api for ct_v_matrix and loo_v_matrix
|
||||
and returns the v_mat, l2_pen_w (possibly calculated, possibly a parameter), and the score
|
||||
"""
|
||||
# to use `pdb.set_trace()` here, set `parallel = False` above
|
||||
if X_treat is None != Y_treat is None:
|
||||
raise ValueError("parameters `X_treat` and `Y_treat` must both be Matrices or None")
|
||||
|
||||
if X_treat is not None:
|
||||
">> K-fold validation on the Treated units; assuming that Y and Y_treat are pre-intervention outcomes"
|
||||
|
||||
# PARAMETER QC
|
||||
if not isinstance(X_treat, np.matrix): raise TypeError("X_treat is not a matrix")
|
||||
if not isinstance(Y_treat, np.matrix): raise TypeError("Y_treat is not a matrix")
|
||||
if X_treat.shape[1] == 0: raise ValueError("X_treat.shape[1] == 0")
|
||||
if Y_treat.shape[1] == 0: raise ValueError("Y_treat.shape[1] == 0")
|
||||
if X_treat.shape[0] != Y_treat.shape[0]:
|
||||
raise ValueError("X_treat and Y_treat have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
|
||||
|
||||
# FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
|
||||
# note that the weights, score, and loss function value returned here are for the in-sample predictions
|
||||
weights, v_mat, ts_score, loss, l2_pen_w, _ = \
|
||||
ct_v_matrix(X = np.vstack((X,X_treat[train, :])),
|
||||
Y = np.vstack((Y,Y_treat[train, :])),
|
||||
treated_units = [X.shape[0] + i for i in range(len(train))],
|
||||
method = cdl_search,
|
||||
**kwargs)
|
||||
|
||||
|
||||
# GET THE OUT-OF-SAMPLE PREDICTION ERROR
|
||||
s = ct_score(X = np.vstack((X,X_treat[test, :])),
|
||||
Y = np.vstack((Y,Y_treat[test, :])),
|
||||
treated_units = [X.shape[0] + i for i in range(len(test))],
|
||||
V = v_mat,
|
||||
L2_PEN_W = l2_pen_w)
|
||||
|
||||
|
||||
print("LAMBDA: %0.1f, zeros %s (of %s), Score: %0.1f / %0.1f " % (kwargs["LAMBDA"],
|
||||
sum(np.diag(v_mat == 0)),
|
||||
v_mat.shape[0],
|
||||
s,
|
||||
np.power(Y_treat[test, :] - np.mean(Y_treat[test, :]),2).sum() ))
|
||||
|
||||
else:
|
||||
">> K-fold validation on the only control units; assuming that Y contains post-intervention outcomes"
|
||||
|
||||
# FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
|
||||
# note that the weights, score, and loss function value returned here are for the in-sample predictions
|
||||
weights, v_mat, ts_score, loss, l2_pen_w, _ = \
|
||||
loo_v_matrix(X = X[train, :],
|
||||
Y = Y[train, :],
|
||||
# treated_units = [X.shape[0] + i for i in range(len(train))],
|
||||
method = cdl_search,
|
||||
**kwargs)
|
||||
|
||||
# GET THE OUT-OF-SAMPLE PREDICTION ERROR
|
||||
s = loo_score(X = X, Y = Y,
|
||||
treated_units = test,
|
||||
V = v_mat,
|
||||
L2_PEN_W = l2_pen_w)
|
||||
|
||||
return v_mat, l2_pen_w, s
|
||||
|
||||
|
||||
def score_train_test_sorted_lambdas(
|
||||
LAMBDA,
|
||||
start=None,
|
||||
cache=False,
|
||||
progress=False,
|
||||
FoldNumber=None,
|
||||
**kwargs):
|
||||
""" a wrapper which calls score_train_test() for each element of an
|
||||
array of `LAMBDA`'s, optionally caching the optimized v_mat and using it
|
||||
as the start position for the next iteration.
|
||||
"""
|
||||
|
||||
# DEFAULTS
|
||||
values = [None]*len(LAMBDA)
|
||||
|
||||
if progress > 0:
|
||||
import time
|
||||
t0 = time.time()
|
||||
|
||||
for i,Lam in enumerate(LAMBDA):
|
||||
v_mat, w, s = values[i] = score_train_test( LAMBDA = Lam, start = start, **kwargs)
|
||||
|
||||
if cache:
|
||||
start = np.diag(v_mat)
|
||||
if progress > 0 and (i % progress) == 0:
|
||||
t1 = time.time()
|
||||
if FoldNumber is None:
|
||||
print("iteration %s of %s time: %0.4f ,lambda: %0.4f" % (i+1, len(LAMBDA), t1 - t0, Lam,))
|
||||
#print("iteration %s of %s time: %0.4f ,lambda: %0.4f, diags: %s" % (i+1, len(LAMBDA), t1 - t0, Lam, np.diag(v_mat),))
|
||||
else:
|
||||
print("Fold %s, iteration %s of %s, time: %0.4f ,lambda: %0.4f" % (FoldNumber, i+1, len(LAMBDA), t1 - t0, Lam, ))
|
||||
#print("Fold %s, iteration %s of %s, time: %0.4f ,lambda: %0.4f, diags: %s" % (FoldNumber, i+1, len(LAMBDA), t1 - t0, Lam, np.diag(v_mat),))
|
||||
t0 = time.time()
|
||||
|
||||
return list(zip(*values))
|
||||
|
||||
|
||||
def CV_score(X,Y,LAMBDA,X_treat=None,Y_treat=None,splits=5,sub_splits=None,quiet=False,parallel=False,max_workers=None,**kwargs):
|
||||
""" Cross fold validation for 1 or more L1 Penalties, holding the L2 penalty fixed.
|
||||
"""
|
||||
|
||||
# PARAMETER QC
|
||||
if not isinstance(X, np.matrix): raise TypeError("X is not a matrix")
|
||||
if not isinstance(Y, np.matrix): raise TypeError("Y is not a matrix")
|
||||
if X_treat is None != Y_treat is None:
|
||||
raise ValueError("parameters `X_treat` and `Y_treat` must both be Matrices or None")
|
||||
if X.shape[1] == 0: raise ValueError("X.shape[1] == 0")
|
||||
if Y.shape[1] == 0: raise ValueError("Y.shape[1] == 0")
|
||||
if X.shape[0] != Y.shape[0]: raise ValueError("X and Y have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
|
||||
|
||||
try:
|
||||
_LAMBDA = iter(LAMBDA)
|
||||
except TypeError:
|
||||
# Lambda is a single value
|
||||
multi_lambda = False
|
||||
__score_train_test__ = score_train_test
|
||||
else:
|
||||
# Lambda is an iterable of values
|
||||
multi_lambda = True
|
||||
__score_train_test__ = score_train_test_sorted_lambdas
|
||||
|
||||
|
||||
if X_treat is not None:
|
||||
|
||||
# PARAMETER QC
|
||||
if not isinstance(X_treat, np.matrix): raise TypeError("X_treat is not a matrix")
|
||||
if not isinstance(Y_treat, np.matrix): raise TypeError("Y_treat is not a matrix")
|
||||
if X_treat.shape[1] == 0: raise ValueError("X_treat.shape[1] == 0")
|
||||
if Y_treat.shape[1] == 0: raise ValueError("Y_treat.shape[1] == 0")
|
||||
if X_treat.shape[0] != Y_treat.shape[0]:
|
||||
raise ValueError("X_treat and Y_treat have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
|
||||
|
||||
# MESSAGING
|
||||
if not quiet:
|
||||
print("K-fold validation with %s control and %s treated units %s predictors and %s outcomes, holding out one fold among Treated units; Assumes that `Y` and `Y_treat` are pre-intervention outcomes" %
|
||||
(X.shape[0] , X_treat.shape[0],X.shape[1],Y.shape[1],))
|
||||
|
||||
if isinstance(splits, (int)) or np.issubdtype(splits, np.integer):
|
||||
from sklearn.model_selection import KFold
|
||||
splits = KFold(splits).split(np.arange(X_treat.shape[0]))
|
||||
train_test_splits = [ x for x in splits ]
|
||||
n_splits = len(train_test_splits)
|
||||
|
||||
|
||||
if parallel:
|
||||
|
||||
if max_workers is None:
|
||||
# CALCULATE A DEFAULT FOR MAX_WORKERS
|
||||
import multiprocessing
|
||||
multiprocessing.cpu_count()
|
||||
if n_splits == 1:
|
||||
print("WARNING: Using Parallel options with a single split is expected reduce performance")
|
||||
max_workers = min(max(multiprocessing.cpu_count() - 2,1),len(train_test_splits))
|
||||
if max_workers == 1 and n_splits > 1:
|
||||
print("WARNING: Default for max_workers is 1 on a machine with %s cores is 1.")
|
||||
|
||||
_initialize_Global_worker_pool(max_workers)
|
||||
|
||||
try:
|
||||
|
||||
promises = [ _worker_pool.submit( __score_train_test__,
|
||||
X = X,
|
||||
Y = Y,
|
||||
LAMBDA = LAMBDA,
|
||||
X_treat = X_treat,
|
||||
Y_treat = Y_treat,
|
||||
train = train,
|
||||
test = test,
|
||||
FoldNumber = fold,
|
||||
**kwargs)
|
||||
for fold, (train,test) in enumerate(train_test_splits) ]
|
||||
results = [ promise.result() for promise in futures.as_completed(promises)]
|
||||
|
||||
finally:
|
||||
|
||||
_clean_up_worker_pool()
|
||||
|
||||
else:
|
||||
|
||||
results = [ __score_train_test__(X = X,
|
||||
Y = Y,
|
||||
X_treat = X_treat,
|
||||
Y_treat = Y_treat,
|
||||
LAMBDA = LAMBDA,
|
||||
train = train,
|
||||
test = test,
|
||||
FoldNumber = fold,
|
||||
**kwargs)
|
||||
for fold, (train,test) in enumerate(train_test_splits) ]
|
||||
|
||||
|
||||
else: # X_treat *is* None
|
||||
# MESSAGING
|
||||
if not quiet:
|
||||
print("Leave-one-out Cross Validation with %s control units, %s predictors and %s outcomes; Y may contain post-intervention outcomes" %
|
||||
(X.shape[0],X.shape[1],Y.shape[1],) )
|
||||
|
||||
if isinstance(splits, (int)) or np.issubdtype(splits, np.integer):
|
||||
from sklearn.model_selection import KFold
|
||||
splits = KFold(splits).split(np.arange(X.shape[0]))
|
||||
train_test_splits = [ x for x in splits ]
|
||||
n_splits = len(train_test_splits)
|
||||
|
||||
if parallel:
|
||||
|
||||
if max_workers is None:
|
||||
# CALCULATE A DEFAULT FOR MAX_WORKERS
|
||||
import multiprocessing
|
||||
multiprocessing.cpu_count()
|
||||
if n_splits == 1:
|
||||
print("WARNING: Using Parallel options with a single split is expected reduce performance")
|
||||
max_workers = min(max(multiprocessing.cpu_count() - 2,1),len(train_test_splits))
|
||||
if max_workers == 1 and n_splits > 1:
|
||||
print("WARNING: Default for max_workers is 1 on a machine with %s cores is 1.")
|
||||
|
||||
_initialize_Global_worker_pool(max_workers)
|
||||
|
||||
try:
|
||||
|
||||
promises = [ _worker_pool.submit(__score_train_test__,
|
||||
X = X,
|
||||
Y = Y,
|
||||
LAMBDA = LAMBDA,
|
||||
train = train,
|
||||
test = test,
|
||||
FoldNumber = fold,
|
||||
**kwargs)
|
||||
for fold, (train,test) in enumerate(train_test_splits) ]
|
||||
|
||||
results = [ promise.result() for promise in futures.as_completed(promises)]
|
||||
|
||||
finally:
|
||||
|
||||
_clean_up_worker_pool()
|
||||
|
||||
else:
|
||||
results = [ __score_train_test__(X = X,
|
||||
Y = Y,
|
||||
LAMBDA = LAMBDA,
|
||||
train = train,
|
||||
test = test,
|
||||
FoldNumber = fold,
|
||||
**kwargs)
|
||||
for fold, (train,test) in enumerate(train_test_splits) ]
|
||||
|
||||
# extract the score.
|
||||
v_mats, l2_pens, scores = list(zip(* results))
|
||||
|
||||
if multi_lambda:
|
||||
total_score = [sum(s) for s in zip(*scores)]
|
||||
else:
|
||||
total_score = sum(scores)
|
||||
|
||||
return total_score
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# utilities for maintaining a worker pool
|
||||
# ------------------------------------------------------------
|
||||
|
||||
_worker_pool = None
|
||||
|
||||
def _initialize_Global_worker_pool(n_workers):
|
||||
global _worker_pool
|
||||
|
||||
if _worker_pool is not None:
|
||||
return # keep it itempotent, please
|
||||
|
||||
_worker_pool = futures.ProcessPoolExecutor(max_workers=n_workers)
|
||||
|
||||
def _clean_up_worker_pool():
|
||||
global _worker_pool
|
||||
|
||||
if _worker_pool is not None:
|
||||
_worker_pool.shutdown()
|
||||
_worker_pool = None
|
||||
|
||||
import atexit
|
||||
atexit.register(_clean_up_worker_pool)
|
||||
|
||||
|
|
@ -0,0 +1,436 @@
|
|||
"""
|
||||
USAGE:
|
||||
|
||||
cd path/to/RidgeSC
|
||||
python example-code.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.join(os.getcwd(),".."))
|
||||
|
||||
import time
|
||||
import RidgeSC as SC
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
|
||||
# def setup(C,N,T,K,g,gs,bs ): # controls (C), treated units (N) , time periods (T), Predictors (K), groups (g), g-scale, b-scale
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# CONTROL PARAMETERS
|
||||
# ------------------------------------------------------------
|
||||
|
||||
random.seed(12345)
|
||||
np.random.seed(10101)
|
||||
|
||||
# Controls (per group), Treated (per group), pre-intervention Time points, post intervention time-periods
|
||||
C,N,T0,T1 = 5,5,4,4
|
||||
# Causal Covariates, Confounders , Random Covariates
|
||||
K,S,R = 7,4,5
|
||||
# Number of Groups, Scale of the Group Effect, (groups make the time series correlated for reasons other than the observed covariates...)
|
||||
groups,group_scale = 30,2
|
||||
# Scale of the Correlation of the Causal Covariates
|
||||
beta_scale,confounders_scale = 4,1
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# SIMULATE SOME DATA
|
||||
# ------------------------------------------------------------
|
||||
|
||||
|
||||
# COVARIATE EFFECTS
|
||||
X_control = np.matrix(np.random.normal(0,1,((C)*groups, K+S+R)))
|
||||
X_treated = np.matrix(np.random.normal(0,1,((N)*groups, K+S+R)))
|
||||
|
||||
# CAUSAL
|
||||
b_cause = np.random.exponential(1,K)
|
||||
b_cause *= beta_scale / b_cause.max()
|
||||
|
||||
# CONFOUNDERS
|
||||
b_confound = np.random.exponential(1,S)
|
||||
b_confound *= confounders_scale / b_confound.max()
|
||||
|
||||
beta_control = np.matrix(np.concatenate( ( b_cause ,b_confound, np.zeros(R)) ) ).T
|
||||
beta_treated = np.matrix(np.concatenate( ( b_cause ,np.zeros(S), np.zeros(R)) ) ).T
|
||||
|
||||
# GROUP EFFECTS (hidden)
|
||||
|
||||
Y_pre_group_effects = np.random.normal(0,group_scale,(groups,T0))
|
||||
Y_pre_ge_control = Y_pre_group_effects[np.repeat(np.arange(groups),C)]
|
||||
Y_pre_ge_treated = Y_pre_group_effects[np.repeat(np.arange(groups),N)]
|
||||
|
||||
Y_post_group_effects = np.random.normal(0,group_scale,(groups,T1))
|
||||
Y_post_ge_control = Y_post_group_effects[np.repeat(np.arange(groups),C)]
|
||||
Y_post_ge_treated = Y_post_group_effects[np.repeat(np.arange(groups),N)]
|
||||
|
||||
# RANDOM ERRORS
|
||||
Y_pre_err_control = np.matrix(np.random.random( ( C*groups, T0, ) ))
|
||||
Y_pre_err_treated = np.matrix(np.random.random( ( N*groups, T0, ) ))
|
||||
|
||||
Y_post_err_control = np.matrix(np.random.random( ( C*groups, T1, ) ))
|
||||
Y_post_err_treated = np.matrix(np.random.random( ( N*groups, T1, ) ))
|
||||
|
||||
# THE DATA GENERATING PROCESS
|
||||
model = "full"
|
||||
|
||||
if model == "full":
|
||||
""" In the full model, covariates (X) are correlated with pre and post
|
||||
outcomes, and variance of the outcomes pre- and post- outcomes is
|
||||
lower within groups which span both treated and control units.
|
||||
"""
|
||||
Y_pre_control = X_control.dot(beta_control) + Y_pre_ge_control + Y_pre_err_control
|
||||
Y_pre_treated = X_treated.dot(beta_treated) + Y_pre_ge_treated + Y_pre_err_treated
|
||||
|
||||
Y_post_control = X_control.dot(beta_control) + Y_post_ge_control + Y_post_err_control
|
||||
Y_post_treated = X_treated.dot(beta_treated) + Y_post_ge_treated + Y_post_err_treated
|
||||
|
||||
elif model == "hidden":
|
||||
""" In the hidden model outcomes are independent of the covariates, but
|
||||
variance of the outcomes pre- and post- outcomes is lower within
|
||||
groups which span both treated and control units.
|
||||
"""
|
||||
Y_pre_control = Y_pre_ge_control + Y_pre_err_control
|
||||
Y_pre_treated = Y_pre_ge_treated + Y_pre_err_treated
|
||||
|
||||
Y_post_control = Y_post_ge_control + Y_post_err_control
|
||||
Y_post_treated = Y_post_ge_treated + Y_post_err_treated
|
||||
|
||||
elif model == "null":
|
||||
"Purely random data"
|
||||
Y_pre_control = Y_pre_err_control
|
||||
Y_pre_treated = Y_pre_err_treated
|
||||
|
||||
Y_post_control = Y_post_err_control
|
||||
Y_post_treated = Y_post_err_treated
|
||||
|
||||
else:
|
||||
raise ValueError("Unknown model type: "+model)
|
||||
|
||||
# JOIN THE TREAT AND CONTROL DATA
|
||||
X = np.vstack( (X_control, X_treated,) )
|
||||
Y_pre = np.vstack( (Y_pre_control, Y_pre_treated, ) )
|
||||
Y_post = np.vstack( (Y_post_control, Y_post_treated,) )
|
||||
|
||||
# in the leave-one-out scneario, the pre-treatment outcomes will be part of the covariates
|
||||
X_and_Y_pre = np.hstack( ( X, Y_pre,) )
|
||||
|
||||
# IDENTIFIERS FOR TREAT AND CONTROL UNITS
|
||||
# control_units = np.arange( C * groups )
|
||||
# treated_units = np.arange( N * groups ) + C
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# find default penalties
|
||||
# ------------------------------------------------------------
|
||||
|
||||
# get starting point for the L2 penalty
|
||||
L2_pen_start_ct = SC.L2_pen_guestimate(X_control)
|
||||
L2_pen_start_loo = SC.L2_pen_guestimate(X_and_Y_pre)
|
||||
|
||||
# get the maximum value for the L1 Penalty parameter conditional on the guestimate for the L2 penalty
|
||||
L1_max_ct = SC.get_max_lambda(X_control,Y_pre_control,X_treat=X_treated,Y_treat=Y_pre_treated)
|
||||
L1_max_loo = SC.get_max_lambda(X_and_Y_pre,Y_post)
|
||||
print("Max L1")
|
||||
|
||||
if False:
|
||||
"Demonstrate relations between the L1 and L2 penalties"
|
||||
|
||||
# get the maximum value for the L1 Penalty parameter conditional on several L2 penalty parameter values
|
||||
L2_grid = (2.** np.arange(-1,2))
|
||||
|
||||
L1_max_loo_grid = SC.get_max_lambda(X_and_Y_pre,
|
||||
Y_post,
|
||||
L2_PEN_W = L2_pen_start_loo * L2_grid)
|
||||
L1_max_ct_grid = SC.get_max_lambda(X_control,
|
||||
Y_pre_control,
|
||||
X_treat=X_treated,
|
||||
Y_treat=Y_pre_treated,
|
||||
L2_PEN_W = L2_pen_start_ct * L2_grid)
|
||||
assert ( L1_max_loo / L1_max_loo_grid == 1/L2_grid).all()
|
||||
assert ( L1_max_ct / L1_max_ct_grid == 1/L2_grid).all()
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# create a grid of penalties to try
|
||||
# ------------------------------------------------------------
|
||||
n_points = 10 # number of points in the grid
|
||||
|
||||
grid_type = "log-linear" # Usually the optimal penalties are quite Small
|
||||
|
||||
if grid_type == "simple":
|
||||
# An equally spaced linear grid that does not include 0 or 1
|
||||
grid = ( 1 + np.arange(n_points) ) / ( 1 + n_points )
|
||||
elif grid_type == "linear":
|
||||
# Another equally spaced linear grid
|
||||
fmax = 1e-3 # lowest point in the grid relative to the max-lambda
|
||||
fmin = 1e-5 # highest point in the grid relative to the max-lambda
|
||||
grid = np.linspace(fmin,fmax,n_points)
|
||||
elif grid_type == "log-linear":
|
||||
# Another equally spaced linear grid
|
||||
fmax = 1e-2 # lowest point in the grid relative to the max-lambda
|
||||
fmin = 1e-4 # highest point in the grid relative to the max-lambda
|
||||
grid = np.exp(np.linspace(np.log(fmin),np.log(fmax),n_points))
|
||||
else:
|
||||
raise ValueError("Unknown grid type: %s" % grid_type)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# get the Cross Validation Error over a grid of L1 penalty
|
||||
# parameters using both (a) Treat/Control and (b) the
|
||||
# leave-one-out controls only methods
|
||||
# ------------------------------------------------------------
|
||||
|
||||
if False:
|
||||
print("starting grid scoring for treat / control scenario", grid*L1_max_ct)
|
||||
grid_scores_ct = SC.CV_score(
|
||||
X = X_control,
|
||||
Y = Y_pre_control,
|
||||
|
||||
X_treat = X_treated,
|
||||
Y_treat = Y_pre_treated,
|
||||
|
||||
# if LAMBDA is a single value, we get a single score, If it's an array of values, we get an array of scores.
|
||||
LAMBDA = grid * L1_max_ct,
|
||||
L2_PEN_W = L2_pen_start_ct,
|
||||
|
||||
# CACHE THE V MATRIX BETWEEN LAMBDA PARAMETERS (generally faster, but path dependent)
|
||||
cache = False, # False by Default
|
||||
|
||||
# Run each of the Cross-validation folds in parallel? Often slower
|
||||
# for large sample sizes because numpy.linalg.solve() already runs
|
||||
# in parallel for large matrices
|
||||
parallel=False,
|
||||
|
||||
# ANNOUNCE COMPLETION OF EACH ITERATION
|
||||
progress = True)
|
||||
|
||||
best_L1_penalty_ct = (grid * L1_max_ct)[np.argmin(grid_scores_ct)]
|
||||
|
||||
if False:
|
||||
# even with smaller data, this takes a while.
|
||||
print("starting grid scoring for Controls Only scenario", grid*L1_max_ct)
|
||||
grid_scores_loo = SC.CV_score(
|
||||
X = X_and_Y_pre [np.arange(100)], # limit the amount of time...
|
||||
Y = Y_post [np.arange(100)], # limit the amount of time...
|
||||
|
||||
# L1 Penalty. if LAMBDA is a single value (value), we get a single score, If it's an array of values, we get an array of scores.
|
||||
LAMBDA = grid * L1_max_loo,
|
||||
|
||||
# L2 Penalty (float)
|
||||
L2_PEN_W = L2_pen_start_loo,
|
||||
|
||||
# CACHE THE V MATRIX BETWEEN LAMBDA PARAMETERS (generally faster, but path dependent)
|
||||
#cache = True, # False by Default
|
||||
|
||||
# Run each of the Cross-validation folds in parallel? Often slower
|
||||
# for large sample sizes because numpy.linalg.solve() already runs
|
||||
# in parallel for large matrices
|
||||
parallel=False,
|
||||
|
||||
# ANNOUNCE COMPLETION OF EACH ITERATION
|
||||
progress = True)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Calculate Synthetic Control weights for a fixed pair of penalty parameters
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# This is a two-step process because in principle, we can estimate a
|
||||
# tensor matrix (which contains the relative weights of the covariates and
|
||||
# possibly pre-treatment outcomes) in one population, and apply it to
|
||||
# another population.
|
||||
best_L1_penalty_ct = np.float(1908.9329)
|
||||
|
||||
# -----------------------------------
|
||||
# Treat/Control:
|
||||
# -----------------------------------
|
||||
|
||||
V_ct = SC.tensor(X = X_control,
|
||||
Y = Y_pre_control,
|
||||
X_treat = X_treated,
|
||||
Y_treat = Y_pre_treated,
|
||||
LAMBDA = best_L1_penalty_ct,
|
||||
L2_PEN_W = L2_pen_start_ct)
|
||||
|
||||
SC_weights_ct = SC.weights(X = X_control,
|
||||
X_treat = X_treated,
|
||||
V = V_ct,
|
||||
L2_PEN_W = L2_pen_start_ct)
|
||||
|
||||
Y_post_treated_synthetic_conrols_ct = SC_weights_ct.dot(Y_post_control)
|
||||
ct_prediction_error = Y_post_treated_synthetic_conrols_ct - Y_post_treated
|
||||
null_model_error = Y_post_treated - np.mean(Y_pre_treated)
|
||||
R_squared_post_ct = 1 - np.power(ct_prediction_error,2).sum() / np.power(null_model_error,2).sum()
|
||||
|
||||
print( "C/T: Out of Sample post intervention R squared: %0.2f%% " % (100*R_squared_post_ct,))
|
||||
|
||||
|
||||
# -----------------------------------
|
||||
# Leave-One-Out with Control Only:
|
||||
# -----------------------------------
|
||||
|
||||
if False:
|
||||
# this takes a while:
|
||||
V_loo = SC.tensor(
|
||||
X = X_and_Y_pre [np.arange(100)], # limit the amount of time...
|
||||
Y = Y_post [np.arange(100)], # limit the amount of time...
|
||||
LAMBDA = best_L1_penalty_ct,
|
||||
L2_PEN_W = L2_pen_start_loo)
|
||||
|
||||
SC_weights_loo = SC.weights(X = X_control,
|
||||
V = V_ct,
|
||||
L2_PEN_W = L2_pen_start_loo)
|
||||
|
||||
# in progress...
|
||||
import pdb; pdb.set_trace()
|
||||
|
||||
Y_post_treated_synthetic_conrols_loo = SC_weights_ct.dot(Y_post_control)
|
||||
loo_prediction_error = Y_post_treated_synthetic_conrols_loo - Y_post_treated
|
||||
null_model_error = Y_post_treated - np.mean(Y_pre_treated)
|
||||
R_squared_post_ct = 1 - np.power(loo_prediction_error,2).sum() / np.power(null_model_error,2).sum()
|
||||
|
||||
print( "LOO: Out of Sample post intervention R squared: %0.2f%% " % (100*R_squared_post_ct,))
|
||||
|
||||
import pdb; pdb.set_trace()
|
||||
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ---------------------------------------------------------------------------
|
||||
# Optimization of the L1 and L2 parameters together (second order)
|
||||
# ---------------------------------------------------------------------------
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
from scipy.optimize import fmin_l_bfgs_b, differential_evolution
|
||||
import time
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Optimization of the L2 and L1 Penalties Simultaneously, keeping their product constant
|
||||
# -----------------------------------------------------------------
|
||||
|
||||
# build the objective function to be minimized
|
||||
|
||||
# cache for L2_obj_func
|
||||
n_calls = [0,]
|
||||
temp_results =[]
|
||||
SS = np.power(Y_pre_treated - np.mean(Y_pre_treated),2).sum()
|
||||
best_L1_penalty_ct = np.float(1908.9329)
|
||||
|
||||
def L1_L2_obj_func (x):
|
||||
n_calls[0] += 1
|
||||
t1 = time.time();
|
||||
score = SC.CV_score(X = X_control,
|
||||
Y = Y_pre_control,
|
||||
X_treat = X_treated,
|
||||
Y_treat = Y_pre_treated,
|
||||
# if LAMBDA is a single value, we get a single score, If it's an array of values, we get an array of scores.
|
||||
LAMBDA = best_L1_penalty_ct * np.exp(x[0]),
|
||||
L2_PEN_W = L2_pen_start_ct / np.exp(x[0]),
|
||||
# suppress the analysis type message
|
||||
quiet = True)
|
||||
t2 = time.time();
|
||||
temp_results.append((n_calls[0],x,score))
|
||||
print("calls: %s, time: %0.4f, x0: %0.4f, Cross Validation Error: %s, out-of-sample R-Squared: %s" % (n_calls[0], t2 - t1, x[0], score, 1 - score / SS ))
|
||||
#print("calls: %s, time: %0.4f, x0: %0.4f, x1: %0.4f, Cross Validation Error: %s, R-Squared: %s" % (n_calls[0], t2 - t1, x[0], x[1], score, 1 - score / SS ))
|
||||
return score
|
||||
|
||||
# the actual optimization
|
||||
print("Starting L2 Penalty optimization")
|
||||
|
||||
results = differential_evolution(L1_L2_obj_func, bounds = ((-6,6,),) )
|
||||
#results = differential_evolution(L1_L2_obj_func, bounds = ((-6,6,),)*2 )
|
||||
|
||||
import pdb; pdb.set_trace()
|
||||
NEW_best_L1_penalty_ct = best_L1_penalty_ct * np.exp(results.x[0])
|
||||
best_L2_penalty = L2_pen_start_ct * np.exp(results.x[1])
|
||||
|
||||
print("DE optimized L2 Penalty: %s, DE optimized L1 penalty: %s" % (NEW_best_L1_penalty_ct, best_L2_penalty,) )
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Optimization of the L2 Parameter alone
|
||||
# -----------------------------------------------------------------
|
||||
|
||||
# -----------------------
|
||||
# build the objective function to be minimized
|
||||
# -----------------------
|
||||
|
||||
# OBJECTIVE FUNCTION(S) TO MINIMIZE USING DE
|
||||
|
||||
# cache for L2_obj_func
|
||||
n_calls = [0,]
|
||||
temp_results =[]
|
||||
SS = np.power(Y_pre_treated - np.mean(Y_pre_treated),2).sum()
|
||||
best_L1_penalty_ct = np.float(1908.9329)
|
||||
|
||||
def L2_obj_func (x):
|
||||
n_calls[0] += 1
|
||||
t1 = time.time();
|
||||
|
||||
score = SC.CV_score(X = X_control,
|
||||
Y = Y_pre_control,
|
||||
X_treat = X_treated,
|
||||
Y_treat = Y_pre_treated,
|
||||
# if LAMBDA is a single value, we get a single score, If it's an array of values, we get an array of scores.
|
||||
LAMBDA = best_L1_penalty_ct,
|
||||
L2_PEN_W = L2_pen_start_ct * np.exp(x[0]),
|
||||
# suppress the analysis type message
|
||||
quiet = True)
|
||||
|
||||
t2 = time.time();
|
||||
temp_results.append((n_calls[0],x,score))
|
||||
print("calls: %s, time: %0.4f, x0: %0.4f, Cross Validation Error: %s, out-of-sample R-Squared: %s" %
|
||||
(n_calls[0], t2 - t1, x[0], score, 1 - score / SS ))
|
||||
return score
|
||||
|
||||
# the actual optimization
|
||||
print("Starting L2 Penalty optimization")
|
||||
|
||||
results = differential_evolution(L2_obj_func, bounds = ((-6,6,),))
|
||||
NEW_L2_pen_start_ct = L2_pen_start_ct * np.exp(results.x[1])
|
||||
print("DE optimized L2 Penalty: %s, using fixed L1 penalty: %s" % (NEW_L2_pen_start_ct, best_L1_penalty_ct,) )
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Optimization of the L2 and L1 Penalties Simultaneously
|
||||
# -----------------------------------------------------------------
|
||||
|
||||
# build the objective function to be minimized
|
||||
|
||||
# cache for L2_obj_func
|
||||
n_calls = [0,]
|
||||
temp_results =[]
|
||||
SS = np.power(Y_pre_treated - np.mean(Y_pre_treated),2).sum()
|
||||
best_L1_penalty_ct = np.float(1908.9329)
|
||||
|
||||
def L1_L2_obj_func (x):
|
||||
n_calls[0] += 1
|
||||
t1 = time.time();
|
||||
score = SC.CV_score(X = X_control,
|
||||
Y = Y_pre_control,
|
||||
X_treat = X_treated,
|
||||
Y_treat = Y_pre_treated,
|
||||
# if LAMBDA is a single value, we get a single score, If it's an array of values, we get an array of scores.
|
||||
LAMBDA = best_L1_penalty_ct * np.exp(x[0]),
|
||||
L2_PEN_W = L2_pen_start_ct * np.exp(x[1]),
|
||||
# suppress the analysis type message
|
||||
quiet = True)
|
||||
t2 = time.time();
|
||||
temp_results.append((n_calls[0],x,score))
|
||||
print("calls: %s, time: %0.4f, x0: %0.4f, x1: %0.4f, Cross Validation Error: %s, out-of-sample R-Squared: %s" % (n_calls[0], t2 - t1, x[0], x[1], score, 1 - score / SS ))
|
||||
return score
|
||||
|
||||
# the actual optimization
|
||||
print("Starting L2 Penalty optimization")
|
||||
|
||||
results = differential_evolution(L1_L2_obj_func, bounds = ((-6,6,),)*2 )
|
||||
|
||||
import pdb; pdb.set_trace()
|
||||
NEW_best_L1_penalty_ct = best_L1_penalty_ct * np.exp(results.x[0])
|
||||
best_L2_penalty = L2_pen_start_ct * np.exp(results.x[1])
|
||||
|
||||
print("DE optimized L2 Penalty: %s, DE optimized L1 penalty: %s" % (NEW_best_L1_penalty_ct, best_L2_penalty,) )
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,175 @@
|
|||
from numpy import dot, ones, diag, matrix, zeros, array,absolute, mean,var, linalg, prod,shape,sqrt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import itertools
|
||||
import timeit
|
||||
import warnings
|
||||
from .optimizers.cd_line_search import cdl_search
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
def ct_v_matrix(X,
|
||||
Y,
|
||||
LAMBDA = 0,
|
||||
treated_units = None,
|
||||
control_units = None,
|
||||
start = None,
|
||||
L2_PEN_W = None,
|
||||
method = cdl_search,
|
||||
intercept = True,
|
||||
max_lambda = False, # this is terrible at least without documentation...
|
||||
**kwargs):
|
||||
'''
|
||||
Computes and sets the optimal v_matrix for the given moments and
|
||||
penalty parameter.
|
||||
|
||||
:param X: Matrix of Covariates
|
||||
:param Y: Matrix of Outcomes
|
||||
:param LAMBDA: penalty parameter used to shrink L1 norm of v/v.max() toward zero
|
||||
:param treated_units: a list containing the position (rows) of the treated units within X and Y
|
||||
:param control_units: a list containing the position (rows) of the control units within X and Y
|
||||
:param start: initial values for the diagonals of the tensor matrix
|
||||
:param L2_PEN_W: L2 penalty on the magnitude of the deviance of the weight vector from null. Optional.
|
||||
:param method: The name of a method to be used by scipy.optimize.minimize, or a callable with the same API as scipy.optimize.minimize
|
||||
:param intercept: If True, weights are penalized toward the 1 / the number of controls, else weights are penalized toward zero
|
||||
:;aram max_lambda: if True, the return value is the maximum L1 penalty for which at least one element of the tensor matrix is non-zero
|
||||
:param **kwargs: additional arguments passed to the optimizer
|
||||
|
||||
'''
|
||||
# DEFAULTS
|
||||
if treated_units is None:
|
||||
if control_units is None:
|
||||
raise ValueError("At least on of treated_units or control_units is required")
|
||||
# Set the treated units to the not-control units
|
||||
treated_units = list(set(range(X.shape[0])) - set(control_units))
|
||||
if control_units is None:
|
||||
control_units = list(set(range(X.shape[0])) - set(treated_units))
|
||||
|
||||
# Parameter QC
|
||||
if len(set(treated_units).intersection(control_units)):
|
||||
raise ValueError("Treated and Control units must be exclusive")
|
||||
if not isinstance(X, matrix): raise TypeError("X is not a matrix")
|
||||
if not isinstance(Y, matrix): raise TypeError("Y is not a matrix")
|
||||
if X.shape[1] == 0: raise ValueError("X.shape[1] == 0")
|
||||
if Y.shape[1] == 0: raise ValueError("Y.shape[1] == 0")
|
||||
if X.shape[0] != Y.shape[0]: raise ValueError("X and Y have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
|
||||
if not isinstance(LAMBDA, (float, int)): raise TypeError( "LAMBDA is not a number")
|
||||
if L2_PEN_W is None: L2_PEN_W = mean(var(X, axis = 0))
|
||||
if not isinstance(L2_PEN_W, (float, int)): raise TypeError( "L2_PEN_W is not a number")
|
||||
|
||||
# CONSTANTS
|
||||
C, N, K = len(control_units), len(treated_units), X.shape[1]
|
||||
if start is None: start = zeros(K) # formerly: .1 * ones(K)
|
||||
Y_treated = Y[treated_units,:]
|
||||
Y_control = Y[control_units,:]
|
||||
X_treated = X[treated_units,:]
|
||||
X_control = X[control_units,:]
|
||||
|
||||
if intercept:
|
||||
Y_treated -= Y_control.mean(axis=0)
|
||||
|
||||
# INITIALIZE PARTIAL DERIVATIVES
|
||||
dA_dV_ki = [ X_control[:, k ].dot(X_control[:, k ].T) + # i,j are on the diagonal (both equal to k)
|
||||
X_control[:, k ].dot(X_control[:, k ].T) for k in range(K)] # 8
|
||||
dB_dV_ki = [ X_control[:, k ].dot(X_treated[:, k ].T) + # i,j are on the diagonal (both equal to k)
|
||||
X_control[:, k ].dot(X_treated[:, k ].T) for k in range(K)] # 9
|
||||
|
||||
def _score(V):
|
||||
dv = diag(V)
|
||||
weights, _, _ ,_ = _weights(dv)
|
||||
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
|
||||
return ((Ey **2).sum() + LAMBDA * absolute(V).sum()).copy() # (...).copy() assures that x.flags.writeable is True
|
||||
|
||||
def _grad(V):
|
||||
""" Calculates just the diagonal of dGamma0_dV
|
||||
|
||||
There is an implementation that allows for all elements of V to be varied...
|
||||
"""
|
||||
dv = diag(V)
|
||||
weights, A, B, AinvB = _weights(dv)
|
||||
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
|
||||
dGamma0_dV_term2 = zeros(K)
|
||||
dPI_dV = zeros((C, N))
|
||||
#Ai = A.I
|
||||
for k in range(K):
|
||||
dPI_dV.fill(0) # faster than re-allocating the memory each loop.
|
||||
dA = dA_dV_ki[k]
|
||||
dB = dB_dV_ki[k]
|
||||
dPI_dV = linalg.solve(A,(dB - dA.dot(AinvB)))
|
||||
#dPI_dV = Ai.dot(dB - dA.dot(AinvB))
|
||||
dGamma0_dV_term2[k] = (Ey * Y_control.T.dot(dPI_dV).T.getA()).sum()
|
||||
return LAMBDA - 2 * dGamma0_dV_term2
|
||||
|
||||
L2_PEN_W_mat = 2 * L2_PEN_W * diag(ones(X_control.shape[0]))
|
||||
def _weights(V):
|
||||
weights = zeros((C, N))
|
||||
A = X_control.dot(2*V).dot(X_control.T) + L2_PEN_W_mat # 5
|
||||
B = X_treated.dot(2*V).dot(X_control.T).T # 6
|
||||
b = linalg.solve(A,B)
|
||||
if intercept:
|
||||
weights = b + 1/C
|
||||
else:
|
||||
weights = b
|
||||
return weights, A, B,b
|
||||
|
||||
if max_lambda:
|
||||
grad0 = _grad(zeros(K))
|
||||
return -grad0[grad0 < 0].min()
|
||||
|
||||
# DO THE OPTIMIZATION
|
||||
if isinstance(method, str):
|
||||
from scipy.optimize import minimize
|
||||
opt = minimize(_score, start.copy(), jac = _grad, method = method, **kwargs)
|
||||
else:
|
||||
assert callable(method), "Method must be a valid method name for scipy.optimize.minimize or a minimizer"
|
||||
opt = method(_score, start.copy(), jac = _grad, **kwargs)
|
||||
v_mat = diag(opt.x)
|
||||
|
||||
# CALCULATE weights AND ts_score
|
||||
weights, _, _ ,_ = _weights(v_mat)
|
||||
errors = Y_treated - weights.T.dot(Y_control)
|
||||
ts_loss = opt.fun
|
||||
ts_score = linalg.norm(errors) / sqrt(prod(errors.shape))
|
||||
|
||||
#if True:
|
||||
# _do_gradient_check()
|
||||
if intercept:
|
||||
# not above, b/c Y_treated was already offset at the start
|
||||
weights += 1/C
|
||||
return weights, v_mat, ts_score, ts_loss, L2_PEN_W, opt
|
||||
|
||||
def ct_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, intercept = True):
|
||||
if treated_units is None:
|
||||
if control_units is None:
|
||||
raise ValueError("At least on of treated_units or control_units is required")
|
||||
# Set the treated units to the not-control units
|
||||
treated_units = list(set(range(X.shape[0])) - set(control_units))
|
||||
if control_units is None:
|
||||
control_units = list(set(range(X.shape[0])) - set(treated_units))
|
||||
|
||||
C, N, K = len(control_units), len(treated_units), X.shape[1]
|
||||
X_treated = X[treated_units,:]
|
||||
X_control = X[control_units,:]
|
||||
|
||||
A = X_control.dot(2*V).dot(X_control.T) + 2 * L2_PEN_W * diag(ones(X_control.shape[0])) # 5
|
||||
B = X_treated.dot(2*V).dot(X_control.T).T # 6
|
||||
|
||||
weights = linalg.solve(A,B)
|
||||
if intercept:
|
||||
weights += 1/C
|
||||
return weights.T
|
||||
|
||||
def ct_score(Y, X, V, L2_PEN_W, LAMBDA = 0, treated_units = None, control_units = None,**kwargs):
|
||||
if treated_units is None:
|
||||
if control_units is None:
|
||||
raise ValueError("At least on of treated_units or control_units is required")
|
||||
# Set the treated units to the not-control units
|
||||
treated_units = list(set(range(X.shape[0])) - set(control_units))
|
||||
if control_units is None:
|
||||
control_units = list(set(range(X.shape[0])) - set(treated_units))
|
||||
weights = ct_weights(X = X, V = V, L2_PEN_W = L2_PEN_W, treated_units = treated_units, control_units = control_units,**kwargs)
|
||||
Y_tr = Y[treated_units, :]
|
||||
Y_c = Y[control_units, :]
|
||||
Ey = (Y_tr - weights.dot(Y_c)).getA()
|
||||
np.power(Y_tr - np.mean(Y_tr),2).sum()
|
||||
return (Ey **2).sum() + LAMBDA * V.sum()
|
||||
|
|
@ -0,0 +1,366 @@
|
|||
from numpy import dot, ones, diag, matrix, zeros, array,absolute, mean,var, linalg, prod,shape,sqrt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import itertools
|
||||
import timeit
|
||||
import warnings
|
||||
from utils.sub_matrix_inverse import subinv_k, all_subinverses
|
||||
from .optimizers.cd_line_search import cdl_search
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
def loo_v_matrix(X,
|
||||
Y,
|
||||
LAMBDA = 0,
|
||||
treated_units = None,
|
||||
control_units = None,
|
||||
non_neg_weights = False,
|
||||
start = None,
|
||||
L2_PEN_W = None,
|
||||
method = cdl_search,
|
||||
intercept = True,
|
||||
max_lambda = False, # this is terrible at least without documentation...
|
||||
solve_method = "standard",
|
||||
**kwargs):
|
||||
'''
|
||||
Computes and sets the optimal v_matrix for the given moments and
|
||||
penalty parameter.
|
||||
|
||||
:param X: Matrix of Covariates
|
||||
:param Y: Matrix of Outcomes
|
||||
:param LAMBDA: penalty parameter used to shrink L1 norm of v/v.max() toward zero
|
||||
:param treated_units: a list containing the position (rows) of the treated units within X and Y
|
||||
:param control_units: a list containing the position (rows) of the control units within X and Y
|
||||
:param start: initial values for the diagonals of the tensor matrix
|
||||
:param L2_PEN_W: L2 penalty on the magnitude of the deviance of the weight vector from null. Optional.
|
||||
:param method: The name of a method to be used by scipy.optimize.minimize, or a callable with the same API as scipy.optimize.minimize
|
||||
:param intercept: If True, weights are penalized toward the 1 / the number of controls, else weights are penalized toward zero
|
||||
:;aram max_lambda: if True, the return value is the maximum L1 penalty for which at least one element of the tensor matrix is non-zero
|
||||
:;aram solve_method: Method for solving A.I.dot(B). Either "standard" or "step-down".
|
||||
:param **kwargs: additional arguments passed to the optimizer
|
||||
|
||||
'''
|
||||
# (by default all the units are treated and all are controls)
|
||||
if treated_units is None:
|
||||
if control_units is None:
|
||||
# Neither provided; INCLUDE ALL SAMPLES AS BOTH TREAT AND CONTROL UNIT.
|
||||
# (this is the typical controls-only Loo V-matrix estimation)
|
||||
control_units = list(range(X.shape[0]))
|
||||
treated_units = control_units
|
||||
else:
|
||||
# Set the treated units to the not-control units
|
||||
treated_units = list(set(range(X.shape[0])) - set(control_units))
|
||||
else:
|
||||
if control_units is None:
|
||||
# Set the control units to the not-treated units
|
||||
control_units = list(set(range(X.shape[0])) - set(treated_units))
|
||||
control_units = np.array(control_units)
|
||||
treated_units = np.array(treated_units)
|
||||
|
||||
# parameter QC
|
||||
if not isinstance(X, matrix): raise TypeError("X is not a matrix")
|
||||
if not isinstance(Y, matrix): raise TypeError("Y is not a matrix")
|
||||
if X.shape[1] == 0: raise ValueError("X.shape[1] == 0")
|
||||
if Y.shape[1] == 0: raise ValueError("Y.shape[1] == 0")
|
||||
if X.shape[0] != Y.shape[0]: raise ValueError("X and Y have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
|
||||
if not isinstance(LAMBDA, (float, int)): raise TypeError( "LAMBDA is not a number")
|
||||
if L2_PEN_W is None: L2_PEN_W = mean(var(X, axis = 0))
|
||||
if not isinstance(L2_PEN_W, (float, int)): raise TypeError( "L2_PEN_W is not a number")
|
||||
assert not non_neg_weights, "Bounds not implemented"
|
||||
|
||||
# CONSTANTS
|
||||
C, N, K = len(control_units), len(treated_units), X.shape[1]
|
||||
if start is None: start = zeros(K) # formerly: .1 * ones(K)
|
||||
assert N > 0; "No control units"
|
||||
assert C > 0; "No treated units"
|
||||
assert K > 0; "variables to fit (X.shape[1] == 0)"
|
||||
|
||||
# CREATE THE INDEX THAT INDICATES THE ELIGIBLE CONTROLS FOR EACH TREATED UNIT
|
||||
in_controls = [list(set(control_units) - set([trt_unit])) for trt_unit in treated_units]
|
||||
in_controls2 = [np.ix_(i,i) for i in in_controls] # this is a much faster alternative to A[:,index][index,:]
|
||||
ctrl_rng = np.arange(len(control_units))
|
||||
out_controls = [ctrl_rng[control_units != trt_unit] for trt_unit in treated_units]
|
||||
out_treated = [ctrl_rng[control_units == trt_unit] for trt_unit in treated_units] # this is non-trivial when there control units are also being predicted.
|
||||
|
||||
if intercept:
|
||||
for i, trt_unit in enumerate(treated_units):
|
||||
Y[trt_unit,:] -= Y[in_controls[i],:].mean(axis=0)
|
||||
|
||||
# handy constants (for speed purposes):
|
||||
Y_treated = Y[treated_units,:]
|
||||
Y_control = Y[control_units,:]
|
||||
X_treated = X[treated_units,:]
|
||||
X_control = X[control_units,:]
|
||||
|
||||
# INITIALIZE PARTIAL DERIVATIVES
|
||||
dA_dV_ki = [ [None,] *N for i in range(K)]
|
||||
dB_dV_ki = [ [None,] *N for i in range(K)]
|
||||
b_i = [None,] *N
|
||||
for i, k in itertools.product(range(N), range(K)): # TREATED unit i, moment k
|
||||
Xc = X[in_controls[i], : ]
|
||||
Xt = X[treated_units[i], : ]
|
||||
dA_dV_ki [k][i] = Xc[:, k ].dot(Xc[:, k ].T) + Xc[:, k ].dot(Xc[:, k ].T) # 8
|
||||
dB_dV_ki [k][i] = Xc[:, k ].dot(Xt[:, k ].T) + Xc[:, k ].dot(Xt[:, k ].T) # 9
|
||||
|
||||
del Xc, Xt, i, k
|
||||
|
||||
#assert (dA_dV_ki [k][i] == X[index, k ].dot(X[index, k ].T) + X[index, k ].dot(X[index, k ].T)).all()
|
||||
# https://math.stackexchange.com/a/1471836/252693
|
||||
|
||||
def _score(V):
|
||||
dv = diag(V)
|
||||
weights, _, _ = _weights(dv)
|
||||
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
|
||||
return ((Ey **2).sum() + LAMBDA * absolute(V).sum()).copy() # (...).copy() assures that x.flags.writeable is True
|
||||
|
||||
def _grad(V):
|
||||
""" Calculates just the diagonal of dGamma0_dV
|
||||
|
||||
There is an implementation that allows for all elements of V to be varied...
|
||||
"""
|
||||
dv = diag(V)
|
||||
weights, A, B = _weights(dv)
|
||||
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
|
||||
dGamma0_dV_term2 = zeros(K)
|
||||
dPI_dV = zeros((C, N))
|
||||
if solve_method == "step-down":
|
||||
Ai_cache = all_subinverses(A)
|
||||
for k in range(K):
|
||||
dPI_dV.fill(0) # faster than re-allocating the memory each loop.
|
||||
for i, index in enumerate(in_controls):
|
||||
dA = dA_dV_ki[k][i]
|
||||
dB = dB_dV_ki[k][i]
|
||||
if solve_method == "step-down":
|
||||
b = Ai_cache[i].dot(dB - dA.dot(b_i[i]))
|
||||
else:
|
||||
b = linalg.solve(A[in_controls2[i]],dB - dA.dot(b_i[i]))
|
||||
dPI_dV[index, i] = b.flatten() # TODO: is the Transpose an error???
|
||||
dGamma0_dV_term2[k] = (Ey * Y_control.T.dot(dPI_dV).T.getA()).sum()
|
||||
return LAMBDA - 2 * dGamma0_dV_term2 # (...).copy() assures that x.flags.writeable is True // * (1 - 2*(V < 0)) was droped b/c we're restricting to positive V's
|
||||
|
||||
L2_PEN_W_mat = 2 * L2_PEN_W * diag(ones(X_control.shape[0]))
|
||||
def _weights(V):
|
||||
weights = zeros((C, N))
|
||||
if solve_method == "step-down":
|
||||
A = X_control.dot(V + V.T).dot(X_control.T) + 2 * L2_PEN_W * diag(ones(X_control.shape[0])) # 5
|
||||
B = X_treated.dot(V + V.T).dot(X_control.T) # 6
|
||||
Ai = A.I
|
||||
for i, trt_unit in enumerate(treated_units):
|
||||
if trt_unit in control_units:
|
||||
(b) = subinv_k(Ai,_k).dot(B[out_controls[i],i])
|
||||
else:
|
||||
(b) = Ai.dot(B[:, i])
|
||||
b_i[i] = b
|
||||
weights[out_controls[i], i] = b.flatten()
|
||||
elif solve_method == "standard":
|
||||
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
|
||||
B = X.dot(V + V.T).dot(X.T).T # 6
|
||||
for i, trt_unit in enumerate(treated_units):
|
||||
(b) = b_i[i] = linalg.solve(A[in_controls2[i]], B[in_controls[i], trt_unit])
|
||||
weights[out_controls[i], i] = b.flatten()
|
||||
else:
|
||||
raise ValueError("Unknown Solve Method: " + solve_method)
|
||||
return weights, A, B
|
||||
|
||||
if max_lambda:
|
||||
grad0 = _grad(zeros(K))
|
||||
return -grad0[grad0 < 0].min()
|
||||
|
||||
# DO THE OPTIMIZATION
|
||||
if isinstance(method, str):
|
||||
from scipy.optimize import minimize
|
||||
opt = minimize(_score, start.copy(), jac = _grad, method = method, **kwargs)
|
||||
else:
|
||||
assert callable(method), "Method must be a valid method name for scipy.optimize.minimize or a minimizer"
|
||||
opt = method(_score, start.copy(), jac = _grad, **kwargs)
|
||||
v_mat = diag(opt.x)
|
||||
# CALCULATE weights AND ts_score
|
||||
weights, _, _ = _weights(v_mat)
|
||||
errors = Y_treated - weights.T.dot(Y_control)
|
||||
ts_loss = opt.fun
|
||||
ts_score = linalg.norm(errors) / sqrt(prod(errors.shape))
|
||||
|
||||
#if True:
|
||||
# _do_gradient_check()
|
||||
|
||||
if intercept:
|
||||
for i, trt_unit in enumerate(treated_units):
|
||||
weights[out_controls[i], i] += 1/len(out_controls[i])
|
||||
return weights, v_mat, ts_score, ts_loss, L2_PEN_W, opt
|
||||
|
||||
def loo_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, intercept = True, solve_method = "standard"):
|
||||
if treated_units is None:
|
||||
if control_units is None:
|
||||
# both not provided, include all samples as both treat and control unit.
|
||||
control_units = list(range(X.shape[0]))
|
||||
treated_units = control_units
|
||||
else:
|
||||
# Set the treated units to the not-control units
|
||||
treated_units = list(set(range(X.shape[0])) - set(control_units))
|
||||
else:
|
||||
if control_units is None:
|
||||
# Set the control units to the not-treated units
|
||||
control_units = list(set(range(X.shape[0])) - set(treated_units))
|
||||
control_units = np.array(control_units)
|
||||
treated_units = np.array(treated_units)
|
||||
[C, N, K] = [len(control_units), len(treated_units), X.shape[1]]
|
||||
|
||||
|
||||
# index with positions of the controls relative to the incoming data
|
||||
in_controls = [list(set(control_units) - set([trt_unit])) for trt_unit in treated_units]
|
||||
in_controls2 = [np.ix_(i,i) for i in in_controls] # this is a much faster alternative to A[:,index][index,:]
|
||||
|
||||
# index of the controls relative to the rows of the outgoing C x N matrix of weights
|
||||
ctrl_rng = np.arange(len(control_units))
|
||||
out_controls = [ctrl_rng[control_units != trt_unit] for trt_unit in treated_units]
|
||||
out_treated = [ctrl_rng[control_units == trt_unit] for trt_unit in treated_units] # this is non-trivial when there control units are also being predicted.
|
||||
|
||||
# constants for indexing
|
||||
X_control = X[control_units,:]
|
||||
X_treat = X[treated_units,:]
|
||||
weights = zeros((C, N))
|
||||
|
||||
if solve_method == "step-down":
|
||||
A = X_control.dot(V + V.T).dot(X_control.T) + 2 * L2_PEN_W * diag(ones(X_control.shape[0])) # 5
|
||||
B = X_treat.dot( V + V.T).dot(X_control.T) # 6
|
||||
Ai = A.I
|
||||
for i, trt_unit in enumerate(treated_units):
|
||||
if trt_unit in control_units:
|
||||
(b) = subinv_k(Ai,_k).dot(B[out_controls[i],i])
|
||||
else:
|
||||
(b) = Ai.dot(B[:, i])
|
||||
|
||||
weights[out_controls[i], i] = b.flatten()
|
||||
if intercept:
|
||||
weights[out_controls[i], i] += 1/len(out_controls[i])
|
||||
elif solve_method == "standard":
|
||||
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
|
||||
B = X.dot(V + V.T).dot(X.T).T # 6
|
||||
for i, trt_unit in enumerate(treated_units):
|
||||
(b) = linalg.solve(A[in_controls2[i]], B[in_controls[i], trt_unit])
|
||||
|
||||
weights[out_controls[i], i] = b.flatten()
|
||||
if intercept:
|
||||
weights[out_controls[i], i] += 1/len(out_controls[i])
|
||||
else:
|
||||
raise ValueError("Unknown Solve Method: " + solve_method)
|
||||
return weights.T
|
||||
|
||||
|
||||
|
||||
def __old__loo_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, intercept = True):
|
||||
assert False
|
||||
if treated_units is None:
|
||||
if control_units is None:
|
||||
# both not provided, include all samples as both treat and control unit.
|
||||
control_units = list(range(X.shape[0]))
|
||||
treated_units = control_units
|
||||
else:
|
||||
# Set the treated units to the not-control units
|
||||
treated_units = list(set(range(X.shape[0])) - set(control_units))
|
||||
else:
|
||||
if control_units is None:
|
||||
# Set the control units to the not-treated units
|
||||
control_units = list(set(range(X.shape[0])) - set(treated_units))
|
||||
[C, N, K] = [len(control_units), len(treated_units), X.shape[1]]
|
||||
index_i = [list(set(list(range(200))) - set([col])) for col in treated_units]
|
||||
|
||||
index_i2 = [np.ix_(i,i) for i in index_i] # this is a much faster alternative to A[:,index][index,:]
|
||||
index_map = {c_unit : row_at for row_at, c_unit in enumerate(control_units)}
|
||||
|
||||
weights = zeros((C, N))
|
||||
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
|
||||
B = X.dot(V + V.T).dot(X.T).T # 6
|
||||
for col, index in enumerate(index_i):
|
||||
b = linalg.solve(A[index_i2[col]], B[index, col])
|
||||
weights[np.vectorize(index_map.get)(index), col] = b.T
|
||||
if intercept:
|
||||
weights[index, col] += 1/len(index)
|
||||
return weights
|
||||
|
||||
|
||||
|
||||
|
||||
def loo_score(Y, X, V, L2_PEN_W, LAMBDA = 0, treated_units = None, control_units = None,**kwargs):
|
||||
if treated_units is None:
|
||||
if control_units is None:
|
||||
# both not provided, include all samples as both treat and control unit.
|
||||
control_units = list(range(X.shape[0]))
|
||||
treated_units = control_units
|
||||
else:
|
||||
# Set the treated units to the not-control units
|
||||
treated_units = list(set(range(X.shape[0])) - set(control_units))
|
||||
else:
|
||||
if control_units is None:
|
||||
# Set the control units to the not-treated units
|
||||
control_units = list(set(range(X.shape[0])) - set(treated_units))
|
||||
weights = loo_weights(X = X,
|
||||
V = V,
|
||||
L2_PEN_W = L2_PEN_W,
|
||||
treated_units = treated_units,
|
||||
control_units = control_units,
|
||||
**kwargs)
|
||||
Y_tr = Y[treated_units, :]
|
||||
Y_c = Y[control_units, :]
|
||||
Ey = (Y_tr - weights.dot(Y_c)).getA()
|
||||
return (Ey **2).sum() + LAMBDA * V.sum()
|
||||
|
||||
|
||||
|
||||
def fold_v_matrix(Xt,Xc,Yt,Yc,LAMBDA = .1,L2_PEN_W = .1,guess = None,method = 'Newton-CG',**kwargs):
|
||||
""" Calculates the V matrix with an explicit Treatment and Control folds,
|
||||
where the match is optimized on the error of the outcomes.
|
||||
|
||||
don't use this. it hasn't been thoroughly been checked for bugs (compared
|
||||
to loo_v_matrix and ct_v_matrix) and lacks several of the optimizations of loo_v_matrix
|
||||
"""
|
||||
C,K = Xc.shape; N,T = Yt.shape
|
||||
two_L2_PEN_W = 0#2*L2_PEN_W*diag(ones(C))
|
||||
def _score(diagV):
|
||||
""" calculates just the diagonal of dGamma0_dV
|
||||
"""
|
||||
V = diag(diagV)
|
||||
A = Xc.dot(2*V).dot(Xc.T) + two_L2_PEN_W # 5
|
||||
B = Xc.dot(2*V).dot(Xt.T) # 6
|
||||
Pi = A.I.dot(B)
|
||||
Ey = (Yt - Pi.T.dot(Yc)).getA()
|
||||
return (Ey **2).sum() + LAMBDA * absolute(Pi.T).sum()
|
||||
def _grad(diagV):
|
||||
""" calculates just the diagonal of dGamma0_dV
|
||||
"""
|
||||
V = diag(diagV)
|
||||
assert Xc.shape[0] == Yc.shape[0]
|
||||
# CALCULATE CONSTANTS
|
||||
A = Xc.dot(2*V).dot(Xc.T) + two_L2_PEN_W # 5
|
||||
B = Xc.dot(2*V).dot(Xt.T) # 6
|
||||
Ai = A.I
|
||||
Pi = Ai.dot(B)
|
||||
Ey = (Yt - Pi.T.dot(Yc)).getA()
|
||||
# INITIALIZE PARTIAL DERIVATIVES
|
||||
dGamma0_dV_term2 = zeros((K,K))
|
||||
for i in range(K):
|
||||
dA = Xc[:, i ].dot(Xc[:, i ].T) + Xc[:, i ].dot(Xc[:, i ].T) # 8
|
||||
dB = Xc[:, i ].dot(Xt [:, i ].T) + Xc[:, i ].dot(Xt [:, i ].T) # 9
|
||||
dPI_dV = Ai.dot(dB - dA.dot(Pi)) # 7
|
||||
dGamma0_dV_term2[i,i] = (Ey * Yc.T.dot(dPI_dV).T.getA()).sum()
|
||||
return diag(LAMBDA * (1 - 2*(V<0)) - 2 * dGamma0_dV_term2)
|
||||
def _weights(V):
|
||||
A = Xc.dot(2*V).dot(Xc.T) + two_L2_PEN_W # 5
|
||||
B = Xc.dot(2*V).dot(Xt.T) # 6
|
||||
#for col in treated_unit:
|
||||
#w_col = A[-col,-col].I.dot(B[:,col])
|
||||
return A.I.dot(B).T
|
||||
# DO THE OPTIMIZATION
|
||||
opt = minimize(_score, zeros(Xc.shape[1]) if guess is None else guess,
|
||||
jac = _grad,
|
||||
method = method, **kwargs)
|
||||
v_mat = diag(opt.x)
|
||||
ts_loss = opt.fun
|
||||
# CALCULATE weights AND ts_score
|
||||
weights = _weights(v_mat)
|
||||
errors = Yt - weights.dot(Yc)
|
||||
ts_score = linalg.norm(errors) / sqrt(prod(errors.shape))
|
||||
return weights, v_mat, ts_score, ts_loss, opt
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
|
||||
from fit_loo import loo_v_matrix
|
||||
from fit_ct import ct_v_matrix
|
||||
from optimizers.cd_line_search import cdl_search
|
||||
import numpy as np
|
||||
|
||||
def L2_pen_guestimate(X):
|
||||
return np.mean(np.var(X, axis = 0))
|
||||
|
||||
def get_max_lambda(X,Y,L2_PEN_W=None,X_treat=None,Y_treat=None,**kwargs):
|
||||
""" returns the maximum value of the L1 penalty for which the elements of tensor matrix (V) are not all zero.
|
||||
"""
|
||||
|
||||
# PARAMETER QC
|
||||
if not isinstance(X, np.matrix): raise TypeError("X is not a matrix")
|
||||
if not isinstance(Y, np.matrix): raise TypeError("Y is not a matrix")
|
||||
if X_treat is None != Y_treat is None:
|
||||
raise ValueError("parameters `X_treat` and `Y_treat` must both be Matrices or None")
|
||||
if X.shape[1] == 0: raise ValueError("X.shape[1] == 0")
|
||||
if Y.shape[1] == 0: raise ValueError("Y.shape[1] == 0")
|
||||
if X.shape[0] != Y.shape[0]: raise ValueError("X and Y have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
|
||||
if L2_PEN_W is None: L2_PEN_W = np.mean(np.var(X, axis = 0))
|
||||
|
||||
if X_treat is not None:
|
||||
|
||||
# PARAMETER QC
|
||||
if not isinstance(X_treat, np.matrix): raise TypeError("X_treat is not a matrix")
|
||||
if not isinstance(Y_treat, np.matrix): raise TypeError("Y_treat is not a matrix")
|
||||
if X_treat.shape[1] == 0: raise ValueError("X_treat.shape[1] == 0")
|
||||
if Y_treat.shape[1] == 0: raise ValueError("Y_treat.shape[1] == 0")
|
||||
if X_treat.shape[0] != Y_treat.shape[0]:
|
||||
raise ValueError("X_treat and Y_treat have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
|
||||
|
||||
control_units = np.arange(X.shape[0])
|
||||
treated_units = np.arange(X.shape[0],X.shape[0] + X_treat.shape[0])
|
||||
|
||||
try:
|
||||
_LAMBDA = iter(L2_PEN_W)
|
||||
except TypeError:
|
||||
# L2_PEN_W is a single value
|
||||
return ct_v_matrix(X = np.vstack((X,X_treat)),
|
||||
Y = np.vstack((Y,Y_treat)),
|
||||
L2_PEN_W = L2_PEN_W,
|
||||
control_units = control_units,
|
||||
treated_units = treated_units,
|
||||
max_lambda = True, # this is terrible at least without documentation...
|
||||
**kwargs)
|
||||
|
||||
else:
|
||||
# L2_PEN_W is an iterable of values
|
||||
return [ ct_v_matrix(X = np.vstack((X,X_treat)),
|
||||
Y = np.vstack((Y,Y_treat)),
|
||||
control_units = control_units,
|
||||
treated_units = treated_units,
|
||||
max_lambda = True, # this is terrible at least without documentation...
|
||||
L2_PEN_W = l2_pen,
|
||||
**kwargs)
|
||||
for l2_pen in L2_PEN_W ]
|
||||
|
||||
else:
|
||||
|
||||
try:
|
||||
_LAMBDA = iter(L2_PEN_W)
|
||||
except TypeError:
|
||||
# L2_PEN_W is a single value
|
||||
return loo_v_matrix(X = X,
|
||||
Y = Y,
|
||||
L2_PEN_W = L2_PEN_W,
|
||||
max_lambda = True, # this is terrible at least without documentation...
|
||||
**kwargs)
|
||||
else:
|
||||
# L2_PEN_W is an iterable of values
|
||||
return [ loo_v_matrix(X = X,
|
||||
Y = Y,
|
||||
L2_PEN_W = l2_pen,
|
||||
max_lambda = True, # this is terrible at least without documentation...
|
||||
**kwargs)
|
||||
for l2_pen in L2_PEN_W ]
|
||||
|
|
@ -0,0 +1,150 @@
|
|||
import numpy as np
|
||||
from scipy.optimize import line_search
|
||||
|
||||
import locale
|
||||
locale.setlocale(locale.LC_ALL, '')
|
||||
|
||||
class cd_res:
|
||||
def __init__(self, x, fun):
|
||||
self.x = x
|
||||
self.fun = fun
|
||||
|
||||
print_stop_iteration = 0
|
||||
|
||||
def cdl_search(score,
|
||||
guess,
|
||||
jac,
|
||||
tol = 1e-4,
|
||||
agressiveness = 0.01,# agressiveness
|
||||
alpha_mult = .9,
|
||||
max_iter = 3000,
|
||||
min_iter = 3,
|
||||
# TODO: this is a stupid default (I'm useing it out of lazyness)
|
||||
zero_eps = 1e2 * np.finfo(float).eps,
|
||||
simplex=False,
|
||||
stol = None,
|
||||
print_path=False,
|
||||
print_path_verbose = False):
|
||||
'''
|
||||
Implements coordinate descent with line search with the strong wolf
|
||||
conditions. Note, this tends to give nearly identical results as L-BFGS-B,
|
||||
and is *much* slower than that the super-fast 40 year old fortran code
|
||||
wrapped by scipy.
|
||||
'''
|
||||
assert 0 < agressiveness < 1
|
||||
assert 0 < alpha_mult < 1
|
||||
assert (guess >=0).all(), "Initial guess (`guess`) should be in the closed positive orthant"
|
||||
x_curr = guess
|
||||
alpha_t = 1.
|
||||
zero_score = val = score(x_curr)
|
||||
|
||||
for _i in range(max_iter):
|
||||
|
||||
grad = jac(x_curr)
|
||||
invalid_directions = np.logical_and(grad > 0,x_curr == 0)
|
||||
|
||||
if (grad[np.logical_not(invalid_directions)] == 0).all():
|
||||
# this happens when we're stuck at the origin and the gradient is
|
||||
# pointing in the all-negative direction
|
||||
if print_stop_iteration: print("[gradient is zero] i: %s" % (_i,))
|
||||
return cd_res(x_curr, val)
|
||||
|
||||
# Expected Loss Reduction for a unit step in each of the valid directions
|
||||
elr = sum( abs(g) for g in grad[np.logical_not(invalid_directions)])
|
||||
naiive_step_size_multiplier = agressiveness*val/elr
|
||||
grad *= naiive_step_size_multiplier * alpha_t
|
||||
|
||||
# constrain the gradient to being non-negative on axis where the
|
||||
# current guess is already zero
|
||||
|
||||
# constrain to the positive orthant
|
||||
grad[invalid_directions] = 0
|
||||
|
||||
if (grad>0).any():
|
||||
constrained = True
|
||||
alpha_ratios = grad[ grad >0 ] / x_curr[ grad >0 ]
|
||||
if (alpha_ratios > 1).any:
|
||||
max_alpha = alpha_ratios.max()
|
||||
else:
|
||||
max_alpha = 1
|
||||
else:
|
||||
constrained = False
|
||||
max_alpha = 1
|
||||
|
||||
res = line_search(f=score, myfprime=jac, xk=x_curr, pk=-grad/max_alpha) #
|
||||
alpha, _, _, possibly_wrong_new_val, _, _ = res
|
||||
if alpha is not None:
|
||||
if alpha > 1:
|
||||
alpha_t /= alpha_mult
|
||||
else:
|
||||
alpha_t *= alpha_mult
|
||||
elif constrained:
|
||||
for j in range(5): # formerly range(17), but that was excessive, in general, this succeeds happens when alpha >= 0.1 (super helpful) or alpha <= 1e-14 (super useless)
|
||||
if score(x_curr - (.3**j)*grad/max_alpha) < val:
|
||||
# This can occur when the strong wolf condition insists that the
|
||||
# current step size is too small (i.e. the gradient is too
|
||||
# consistent with the function to think that a small step is
|
||||
# optimal for a global (unconstrained) optimization.
|
||||
alpha = (.3**j)
|
||||
|
||||
# i secretly think this is stupid.
|
||||
if print_stop_iteration: print("[simple line search worked :)] i: %s, alpha: 1e-%s" % (_i,j))
|
||||
break
|
||||
else:
|
||||
# moving in the direction of the gradient yielded no improvement: stop
|
||||
if print_stop_iteration: print("[simple line search failed] i: %s" % (_i,))
|
||||
return cd_res(x_curr, val)
|
||||
else:
|
||||
# moving in the direction of the gradient yielded no improvement: stop
|
||||
if print_stop_iteration: print("[alpha is None] i: %s, grad: %s" % (_i, grad/max_alpha, ))
|
||||
return cd_res(x_curr, val)
|
||||
|
||||
# iterate
|
||||
try:
|
||||
if constrained:
|
||||
x_old, x_curr, val_old = x_curr, x_curr - min(1, alpha)*grad/max_alpha, val
|
||||
else:
|
||||
x_old, x_curr, val_old = x_curr, x_curr - alpha*grad/max_alpha, val
|
||||
except:
|
||||
import pdb; pdb.set_trace()
|
||||
|
||||
val = score(x_curr)
|
||||
val_diff = val_old - val
|
||||
|
||||
if print_path:
|
||||
try:
|
||||
#if constrained:
|
||||
print("i: %s, val: %s, val_diff: %0.2f, alpha: %0.5f, zeros: %s (constrained)" % (_i, locale.format("%d", val, grouping=True), val_diff, alpha, sum( x_curr == 0)))
|
||||
#else:
|
||||
#print("i: %s, val: %s, val_diff: %0.2f, alpha: %0.5f,ELR: %0.6f, multiplier: %0.6f, aggressiveness: %0.6f, constrained: %s, zeros: %s" % (_i, locale.format("%d", val, grouping=True), val_diff, alpha, elr, naiive_step_size_multiplier * alpha_t, agressiveness*alpha_t ,constrained,sum( x_curr == 0)))
|
||||
if print_path_verbose:
|
||||
print("grad: %s,x_curr %s" % (grad, x_curr, ))
|
||||
except Exception as err:
|
||||
print(err)
|
||||
import pdb; pdb.set_trace()
|
||||
|
||||
# rounding error can get us really close or even across the coordinate plane.
|
||||
x_curr[abs(x_curr) < zero_eps] = 0
|
||||
x_curr[x_curr < zero_eps] = 0
|
||||
|
||||
if (x_curr == 0).all() and (x_old == 0).all():
|
||||
# this happens when we were at the origin and the gradient didn't
|
||||
# take us out of the range of zero_eps
|
||||
if print_stop_iteration: print("stuck at the origin %s"% (_i,))
|
||||
return cd_res(x_curr, score(x_curr)) # tricky tricky...
|
||||
|
||||
if (x_curr < 0).any():
|
||||
import pdb; pdb.set_trace()
|
||||
print("This shouldn't ever happen if max_alpha is specified properly")
|
||||
|
||||
if val_diff/val < tol:
|
||||
# this a heuristic rule, to be sure, but seems to be useful.
|
||||
if _i > min_iter:
|
||||
if print_stop_iteration: print("[val_diff/val < tol] i: %s, val: %s, val_diff: %s" % (_i, val, val_diff, ))
|
||||
return cd_res(x_curr, val)
|
||||
else:
|
||||
raise RuntimeError('Solution did not converge to default tolerance')
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
from fit_loo import loo_v_matrix
|
||||
from fit_ct import ct_v_matrix
|
||||
import numpy as np
|
||||
|
||||
def tensor(X, Y, X_treat=None, Y_treat=None, **kwargs):
|
||||
""" Presents a unified api for ct_v_matrix and loo_v_matrix
|
||||
"""
|
||||
# PARAMETER QC
|
||||
if not isinstance(X, np.matrix): raise TypeError("X is not a matrix")
|
||||
if not isinstance(Y, np.matrix): raise TypeError("Y is not a matrix")
|
||||
if X.shape[1] == 0: raise ValueError("X.shape[1] == 0")
|
||||
if Y.shape[1] == 0: raise ValueError("Y.shape[1] == 0")
|
||||
if X.shape[0] != Y.shape[0]: raise ValueError("X and Y have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
|
||||
|
||||
if X_treat is None != Y_treat is None:
|
||||
raise ValueError("parameters `X_treat` and `Y_treat` must both be Matrices or None")
|
||||
|
||||
if X_treat is not None:
|
||||
"Fit the Treated units to the control units; assuming that Y contains pre-intervention outcomes"
|
||||
|
||||
# PARAMETER QC
|
||||
if not isinstance(X_treat, np.matrix): raise TypeError("X_treat is not a matrix")
|
||||
if not isinstance(Y_treat, np.matrix): raise TypeError("Y_treat is not a matrix")
|
||||
if X_treat.shape[1] == 0: raise ValueError("X_treat.shape[1] == 0")
|
||||
if Y_treat.shape[1] == 0: raise ValueError("Y_treat.shape[1] == 0")
|
||||
if X_treat.shape[0] != Y_treat.shape[0]: raise ValueError("X_treat and Y_treat have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
|
||||
|
||||
# FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
|
||||
# note that the weights, score, and loss function value returned here are for the in-sample predictions
|
||||
weights, v_mat, ts_score, loss, l2_pen_w, _ = \
|
||||
ct_v_matrix(X = np.vstack((X,X_treat)),
|
||||
Y = np.vstack((Y,Y_treat)),
|
||||
control_units = np.arange(X.shape[0]),
|
||||
treated_units = np.arange(X_treat.shape[0]) + X.shape[0],
|
||||
**kwargs)
|
||||
|
||||
else:
|
||||
"Fit the control units to themselves; Y may contain post-intervention outcomes"
|
||||
|
||||
weights, v_mat, ts_score, loss, l2_pen_w, _ = \
|
||||
loo_v_matrix(X = X,
|
||||
Y = Y,
|
||||
control_units = np.arange(X.shape[0]),
|
||||
treated_units = np.arange(X.shape[0]),
|
||||
# treated_units = [X.shape[0] + i for i in range(len(train))],
|
||||
**kwargs)
|
||||
return v_mat
|
||||
|
|
@ -0,0 +1,168 @@
|
|||
""" In the leave-one-out method with larger sample sizes most of the time is spent in calculating
|
||||
A.I.dot(B) using np.linalg.solve (which is much faster than the brute force method i.e. `A.I.dot(B)`).
|
||||
For example, with 200 units, about about 95% of the time is spent in this line.
|
||||
|
||||
However we can take advantage of the fact that we're calculating the
|
||||
inverse of N matrices which are all sub-matrices of a common matrix by inverting the common matrix
|
||||
and calculating each subset inverse from there.
|
||||
|
||||
https://math.stackexchange.com/a/208021/252693
|
||||
|
||||
|
||||
TODO: this could be made a bit faster by passing in the indexes (k_rng,
|
||||
k_rng2) instead of re-building them
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
#-- n = 5
|
||||
#-- p = 3
|
||||
#-- a = np.matrix(np.random.random((n,p,)))
|
||||
#-- v = np.diag(np.random.random((p,)))
|
||||
#-- x = a.dot(v).dot(a.T)
|
||||
#-- x.dot(x.I)
|
||||
#--
|
||||
#-- x = np.matrix(np.random.random((n,n,)))
|
||||
#-- x.dot(x.I)
|
||||
#--
|
||||
#-- xi = x.I
|
||||
#--
|
||||
#--
|
||||
#-- B = np.matrix(np.random.random((n,n,)))
|
||||
#--
|
||||
#-- k = np.arange(2)
|
||||
#-- N = xi.shape[0]
|
||||
#-- rng = np.arange(N)
|
||||
#-- k_rng = rng[np.logical_not(np.isin(rng,k))]
|
||||
#--
|
||||
#-- out = xi[np.ix_(k_rng,k_rng)] - xi[np.ix_(k_rng,k)].dot(xi[np.ix_(k,k_rng)])/np.linalg.det(xi[np.ix_(k,k)])
|
||||
#--
|
||||
#-- jjkkjtu = x[np.ix_(k_rng,k_rng)].I
|
||||
|
||||
def all_subinverses(x,eps=None):
|
||||
""" Given an matrix (x), calculate all the inverses of leave-one-out sub-matrices.
|
||||
param: x a square matrix for which to find the inverses of all it's leave one out sub-matrices.
|
||||
param: eps If not None, used to assert that the each calculated
|
||||
sub-matrix-inverse is within eps of the brute force calculation.
|
||||
Testing only, this slows the process way down since the inverse of
|
||||
each sub-matrix is calculated by the brute force method. Typically
|
||||
set to a multiple of `np.finfo(float).eps`
|
||||
"""
|
||||
# handy constant for indexing
|
||||
xi = x.I
|
||||
N = x.shape[0]
|
||||
rng = np.arange(N)
|
||||
out = [None,] * N
|
||||
for k in range(N):
|
||||
k_rng = rng[rng != k]
|
||||
out[k] = xi[np.ix_(k_rng,k_rng)] - xi[k_rng,k].dot(xi[k,k_rng])/xi[k,k]
|
||||
if eps is not None:
|
||||
if not (abs(out[k] - x[np.ix_(k_rng,k_rng)].I) < eps).all():
|
||||
raise RuntimeError("Fast and brute force methods were not within epsilon (%s) for sub-matrix k = %s; max difference = %s" % (eps, k, abs(xi_k - x[k_rng2].I).max(), ) )
|
||||
return out
|
||||
|
||||
def subinv_k(xi,k,eps=None):
|
||||
""" Given an matrix (x), calculate all the inverses of leave-one-out sub-matrices.
|
||||
param: x a square matrix for which to find the inverses of all it's leave one out sub-matrices.
|
||||
param: k the column and row to leave out
|
||||
param: eps If not None, used to assert that the each calculated
|
||||
sub-matrix-inverse is within eps of the brute force calculation.
|
||||
Testing only, this slows the process way down since the inverse of
|
||||
each sub-matrix is calculated by the brute force method. Typically
|
||||
set to a multiple of `np.finfo(float).eps`
|
||||
"""
|
||||
# handy constant for indexing
|
||||
N = xi.shape[0]
|
||||
rng = np.arange(N)
|
||||
k_rng = rng[rng != k]
|
||||
out = xi[np.ix_(k_rng,k_rng)] - xi[k_rng,k].dot(xi[k,k_rng])/xi[k,k]
|
||||
if eps is not None:
|
||||
if not (abs(out[k] - x[np.ix_(k_rng,k_rng)].I) < eps).all():
|
||||
raise RuntimeError("Fast and brute force methods were not within epsilon (%s) for sub-matrix k = %s; max difference = %s" % (eps, k, abs(xi_k - x[k_rng2].I).max(), ) )
|
||||
return out
|
||||
|
||||
|
||||
|
||||
# ---------------------------------------------
|
||||
# single sub-matrix
|
||||
# ---------------------------------------------
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
import time
|
||||
|
||||
n = 200
|
||||
B = np.matrix(np.random.random((n,n,)))
|
||||
|
||||
for i in range(100):
|
||||
# create a sub-matrix that meets the matching criteria
|
||||
x = np.matrix(np.random.random((n,n,)))
|
||||
try:
|
||||
zz = subinv(x,10e-10)
|
||||
break
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
print("Failed to generate a %sx%s matrix whose inverses are all within %s of the quick method")
|
||||
|
||||
|
||||
k = 5
|
||||
n_tests = 1000
|
||||
|
||||
# =======================
|
||||
t0 = time.time()
|
||||
for i in range(n_tests):
|
||||
N = xi.shape[0]
|
||||
rng = np.arange(N)
|
||||
k_rng = rng[rng != k]
|
||||
k_rng2 = np.ix_(k_rng,k_rng)
|
||||
zz = x[k_rng2].I.dot(B[k_rng2])
|
||||
|
||||
t1 = time.time()
|
||||
slow_time = t1 - t0
|
||||
print("A.I.dot(B): brute force time (N = %s): %s"% (n,t1 - t0))
|
||||
|
||||
# =======================
|
||||
t0 = time.time()
|
||||
for i in range(n_tests):
|
||||
# make the comparison fair
|
||||
if i % n == 0:
|
||||
xi = x.I
|
||||
zz = subinv_k(xi,k).dot(B[k_rng2])
|
||||
|
||||
t1 = time.time()
|
||||
fast_time = t1 - t0
|
||||
print("A.I.dot(B): quick time (N = %s): %s"% (n,t1 - t0))
|
||||
|
||||
# =======================
|
||||
t0 = time.time()
|
||||
for i in range(n_tests):
|
||||
N = xi.shape[0]
|
||||
rng = np.arange(N)
|
||||
k_rng = rng[rng != k]
|
||||
k_rng2 = np.ix_(k_rng,k_rng)
|
||||
zz = np.linalg.solve(x[k_rng2],B[k_rng2])
|
||||
|
||||
t1 = time.time()
|
||||
fast_time = t1 - t0
|
||||
print("A.I.dot(B): np.linalg.solve time (N = %s): %s"% (n,t1 - t0))
|
||||
|
||||
# ---------------------------------------------
|
||||
# ---------------------------------------------
|
||||
|
||||
t0 = time.time()
|
||||
for i in range(100): zz = subinv(x,10e-10)
|
||||
|
||||
t1 = time.time()
|
||||
slow_time = t1 - t0
|
||||
print("Full set of inverses: brute force time (N = %s): %s", (n,t1 - t0))
|
||||
|
||||
t0 = time.time()
|
||||
for i in range(100): zz = subinv(x)
|
||||
|
||||
t1 = time.time()
|
||||
fast_time = t1 - t0
|
||||
print("Full set of inverses: quick time (N = %s): %s", (n,t1 - t0))
|
||||
|
||||
# ---------------------------------------------
|
||||
# ---------------------------------------------
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
from fit_loo import loo_weights
|
||||
from fit_ct import ct_weights
|
||||
import numpy as np
|
||||
|
||||
def weights(X, X_treat=None, **kwargs):
|
||||
""" Calculate synthetic control weights
|
||||
"""
|
||||
|
||||
# PARAMETER QC
|
||||
if not isinstance(X, np.matrix): raise TypeError("X is not a matrix")
|
||||
if X_treat.shape[1] == 0: raise ValueError("X_treat.shape[1] == 0")
|
||||
|
||||
if X_treat is not None:
|
||||
"weight for the control units against the remaining controls"
|
||||
|
||||
# PARAMETER QC
|
||||
if not isinstance(X_treat, np.matrix): raise TypeError("X_treat is not a matrix")
|
||||
if X_treat.shape[1] == 0: raise ValueError("X_treat.shape[1] == 0")
|
||||
|
||||
# FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
|
||||
# note that the weights, score, and loss function value returned here are for the in-sample predictions
|
||||
return ct_weights(X = np.vstack((X,X_treat)),
|
||||
control_units = np.arange(X.shape[0]),
|
||||
treated_units = np.arange(X_treat.shape[0]) + X.shape[0],
|
||||
**kwargs)
|
||||
|
||||
else:
|
||||
"weight for the control units against the remaining controls"
|
||||
return loo_weights(X = X,
|
||||
control_units = np.arange(X.shape[0]),
|
||||
treated_units = np.arange(X.shape[0]),
|
||||
**kwargs)
|
||||
|
||||
return weights
|
||||
|
Загрузка…
Ссылка в новой задаче