This commit is contained in:
= 2018-05-18 14:08:01 -07:00
Коммит b112ca5318
14 изменённых файлов: 1885 добавлений и 0 удалений

104
.gitignore поставляемый Normal file
Просмотреть файл

@ -0,0 +1,104 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/

15
README.md Normal file
Просмотреть файл

@ -0,0 +1,15 @@
# Ridge Synthetic Controls
### Setup
This package requires [numpy](http://www.numpy.org/) and
[scipy](https://www.scipy.org/) and has been tested with ( Python 2.7.14,
Numpy 1.14.1, and Scipy 1.0.0 ) and ( Python 3.5.5, Numpy 1.13.1, and
Scipy 1.0.1 )
### API
For now, refer to the example code in `./example-code.py` for usage
details.

10
__init__.py Normal file
Просмотреть файл

@ -0,0 +1,10 @@
# PRIMARY FITTING FUNCTIONS
from fit_loo import loo_v_matrix, loo_weights, loo_score
from fit_ct import ct_v_matrix, ct_weights, ct_score
# Public API
from cross_validation import score_train_test, score_train_test_sorted_lambdas, CV_score
from tensor import tensor
from weights import weights
from lambda_utils import get_max_lambda, L2_pen_guestimate

299
cross_validation.py Normal file
Просмотреть файл

@ -0,0 +1,299 @@
from .fit_loo import loo_v_matrix, loo_score
from .fit_ct import ct_v_matrix, ct_score
from .optimizers.cd_line_search import cdl_search
import numpy as np
from concurrent import futures
def score_train_test(X,
Y,
train,
test,
X_treat=None,
Y_treat=None,
FoldNumber=None, # for consistency with score_train_test_sorted_lambdas()
**kwargs):
""" presents a unified api for ct_v_matrix and loo_v_matrix
and returns the v_mat, l2_pen_w (possibly calculated, possibly a parameter), and the score
"""
# to use `pdb.set_trace()` here, set `parallel = False` above
if X_treat is None != Y_treat is None:
raise ValueError("parameters `X_treat` and `Y_treat` must both be Matrices or None")
if X_treat is not None:
">> K-fold validation on the Treated units; assuming that Y and Y_treat are pre-intervention outcomes"
# PARAMETER QC
if not isinstance(X_treat, np.matrix): raise TypeError("X_treat is not a matrix")
if not isinstance(Y_treat, np.matrix): raise TypeError("Y_treat is not a matrix")
if X_treat.shape[1] == 0: raise ValueError("X_treat.shape[1] == 0")
if Y_treat.shape[1] == 0: raise ValueError("Y_treat.shape[1] == 0")
if X_treat.shape[0] != Y_treat.shape[0]:
raise ValueError("X_treat and Y_treat have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
# FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
# note that the weights, score, and loss function value returned here are for the in-sample predictions
weights, v_mat, ts_score, loss, l2_pen_w, _ = \
ct_v_matrix(X = np.vstack((X,X_treat[train, :])),
Y = np.vstack((Y,Y_treat[train, :])),
treated_units = [X.shape[0] + i for i in range(len(train))],
method = cdl_search,
**kwargs)
# GET THE OUT-OF-SAMPLE PREDICTION ERROR
s = ct_score(X = np.vstack((X,X_treat[test, :])),
Y = np.vstack((Y,Y_treat[test, :])),
treated_units = [X.shape[0] + i for i in range(len(test))],
V = v_mat,
L2_PEN_W = l2_pen_w)
print("LAMBDA: %0.1f, zeros %s (of %s), Score: %0.1f / %0.1f " % (kwargs["LAMBDA"],
sum(np.diag(v_mat == 0)),
v_mat.shape[0],
s,
np.power(Y_treat[test, :] - np.mean(Y_treat[test, :]),2).sum() ))
else:
">> K-fold validation on the only control units; assuming that Y contains post-intervention outcomes"
# FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
# note that the weights, score, and loss function value returned here are for the in-sample predictions
weights, v_mat, ts_score, loss, l2_pen_w, _ = \
loo_v_matrix(X = X[train, :],
Y = Y[train, :],
# treated_units = [X.shape[0] + i for i in range(len(train))],
method = cdl_search,
**kwargs)
# GET THE OUT-OF-SAMPLE PREDICTION ERROR
s = loo_score(X = X, Y = Y,
treated_units = test,
V = v_mat,
L2_PEN_W = l2_pen_w)
return v_mat, l2_pen_w, s
def score_train_test_sorted_lambdas(
LAMBDA,
start=None,
cache=False,
progress=False,
FoldNumber=None,
**kwargs):
""" a wrapper which calls score_train_test() for each element of an
array of `LAMBDA`'s, optionally caching the optimized v_mat and using it
as the start position for the next iteration.
"""
# DEFAULTS
values = [None]*len(LAMBDA)
if progress > 0:
import time
t0 = time.time()
for i,Lam in enumerate(LAMBDA):
v_mat, w, s = values[i] = score_train_test( LAMBDA = Lam, start = start, **kwargs)
if cache:
start = np.diag(v_mat)
if progress > 0 and (i % progress) == 0:
t1 = time.time()
if FoldNumber is None:
print("iteration %s of %s time: %0.4f ,lambda: %0.4f" % (i+1, len(LAMBDA), t1 - t0, Lam,))
#print("iteration %s of %s time: %0.4f ,lambda: %0.4f, diags: %s" % (i+1, len(LAMBDA), t1 - t0, Lam, np.diag(v_mat),))
else:
print("Fold %s, iteration %s of %s, time: %0.4f ,lambda: %0.4f" % (FoldNumber, i+1, len(LAMBDA), t1 - t0, Lam, ))
#print("Fold %s, iteration %s of %s, time: %0.4f ,lambda: %0.4f, diags: %s" % (FoldNumber, i+1, len(LAMBDA), t1 - t0, Lam, np.diag(v_mat),))
t0 = time.time()
return list(zip(*values))
def CV_score(X,Y,LAMBDA,X_treat=None,Y_treat=None,splits=5,sub_splits=None,quiet=False,parallel=False,max_workers=None,**kwargs):
""" Cross fold validation for 1 or more L1 Penalties, holding the L2 penalty fixed.
"""
# PARAMETER QC
if not isinstance(X, np.matrix): raise TypeError("X is not a matrix")
if not isinstance(Y, np.matrix): raise TypeError("Y is not a matrix")
if X_treat is None != Y_treat is None:
raise ValueError("parameters `X_treat` and `Y_treat` must both be Matrices or None")
if X.shape[1] == 0: raise ValueError("X.shape[1] == 0")
if Y.shape[1] == 0: raise ValueError("Y.shape[1] == 0")
if X.shape[0] != Y.shape[0]: raise ValueError("X and Y have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
try:
_LAMBDA = iter(LAMBDA)
except TypeError:
# Lambda is a single value
multi_lambda = False
__score_train_test__ = score_train_test
else:
# Lambda is an iterable of values
multi_lambda = True
__score_train_test__ = score_train_test_sorted_lambdas
if X_treat is not None:
# PARAMETER QC
if not isinstance(X_treat, np.matrix): raise TypeError("X_treat is not a matrix")
if not isinstance(Y_treat, np.matrix): raise TypeError("Y_treat is not a matrix")
if X_treat.shape[1] == 0: raise ValueError("X_treat.shape[1] == 0")
if Y_treat.shape[1] == 0: raise ValueError("Y_treat.shape[1] == 0")
if X_treat.shape[0] != Y_treat.shape[0]:
raise ValueError("X_treat and Y_treat have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
# MESSAGING
if not quiet:
print("K-fold validation with %s control and %s treated units %s predictors and %s outcomes, holding out one fold among Treated units; Assumes that `Y` and `Y_treat` are pre-intervention outcomes" %
(X.shape[0] , X_treat.shape[0],X.shape[1],Y.shape[1],))
if isinstance(splits, (int)) or np.issubdtype(splits, np.integer):
from sklearn.model_selection import KFold
splits = KFold(splits).split(np.arange(X_treat.shape[0]))
train_test_splits = [ x for x in splits ]
n_splits = len(train_test_splits)
if parallel:
if max_workers is None:
# CALCULATE A DEFAULT FOR MAX_WORKERS
import multiprocessing
multiprocessing.cpu_count()
if n_splits == 1:
print("WARNING: Using Parallel options with a single split is expected reduce performance")
max_workers = min(max(multiprocessing.cpu_count() - 2,1),len(train_test_splits))
if max_workers == 1 and n_splits > 1:
print("WARNING: Default for max_workers is 1 on a machine with %s cores is 1.")
_initialize_Global_worker_pool(max_workers)
try:
promises = [ _worker_pool.submit( __score_train_test__,
X = X,
Y = Y,
LAMBDA = LAMBDA,
X_treat = X_treat,
Y_treat = Y_treat,
train = train,
test = test,
FoldNumber = fold,
**kwargs)
for fold, (train,test) in enumerate(train_test_splits) ]
results = [ promise.result() for promise in futures.as_completed(promises)]
finally:
_clean_up_worker_pool()
else:
results = [ __score_train_test__(X = X,
Y = Y,
X_treat = X_treat,
Y_treat = Y_treat,
LAMBDA = LAMBDA,
train = train,
test = test,
FoldNumber = fold,
**kwargs)
for fold, (train,test) in enumerate(train_test_splits) ]
else: # X_treat *is* None
# MESSAGING
if not quiet:
print("Leave-one-out Cross Validation with %s control units, %s predictors and %s outcomes; Y may contain post-intervention outcomes" %
(X.shape[0],X.shape[1],Y.shape[1],) )
if isinstance(splits, (int)) or np.issubdtype(splits, np.integer):
from sklearn.model_selection import KFold
splits = KFold(splits).split(np.arange(X.shape[0]))
train_test_splits = [ x for x in splits ]
n_splits = len(train_test_splits)
if parallel:
if max_workers is None:
# CALCULATE A DEFAULT FOR MAX_WORKERS
import multiprocessing
multiprocessing.cpu_count()
if n_splits == 1:
print("WARNING: Using Parallel options with a single split is expected reduce performance")
max_workers = min(max(multiprocessing.cpu_count() - 2,1),len(train_test_splits))
if max_workers == 1 and n_splits > 1:
print("WARNING: Default for max_workers is 1 on a machine with %s cores is 1.")
_initialize_Global_worker_pool(max_workers)
try:
promises = [ _worker_pool.submit(__score_train_test__,
X = X,
Y = Y,
LAMBDA = LAMBDA,
train = train,
test = test,
FoldNumber = fold,
**kwargs)
for fold, (train,test) in enumerate(train_test_splits) ]
results = [ promise.result() for promise in futures.as_completed(promises)]
finally:
_clean_up_worker_pool()
else:
results = [ __score_train_test__(X = X,
Y = Y,
LAMBDA = LAMBDA,
train = train,
test = test,
FoldNumber = fold,
**kwargs)
for fold, (train,test) in enumerate(train_test_splits) ]
# extract the score.
v_mats, l2_pens, scores = list(zip(* results))
if multi_lambda:
total_score = [sum(s) for s in zip(*scores)]
else:
total_score = sum(scores)
return total_score
# ------------------------------------------------------------
# utilities for maintaining a worker pool
# ------------------------------------------------------------
_worker_pool = None
def _initialize_Global_worker_pool(n_workers):
global _worker_pool
if _worker_pool is not None:
return # keep it itempotent, please
_worker_pool = futures.ProcessPoolExecutor(max_workers=n_workers)
def _clean_up_worker_pool():
global _worker_pool
if _worker_pool is not None:
_worker_pool.shutdown()
_worker_pool = None
import atexit
atexit.register(_clean_up_worker_pool)

436
example-code.py Normal file
Просмотреть файл

@ -0,0 +1,436 @@
"""
USAGE:
cd path/to/RidgeSC
python example-code.py
"""
import os
import sys
sys.path.append(os.path.join(os.getcwd(),".."))
import time
import RidgeSC as SC
import numpy as np
import random
# def setup(C,N,T,K,g,gs,bs ): # controls (C), treated units (N) , time periods (T), Predictors (K), groups (g), g-scale, b-scale
if __name__ == "__main__":
# ------------------------------------------------------------
# CONTROL PARAMETERS
# ------------------------------------------------------------
random.seed(12345)
np.random.seed(10101)
# Controls (per group), Treated (per group), pre-intervention Time points, post intervention time-periods
C,N,T0,T1 = 5,5,4,4
# Causal Covariates, Confounders , Random Covariates
K,S,R = 7,4,5
# Number of Groups, Scale of the Group Effect, (groups make the time series correlated for reasons other than the observed covariates...)
groups,group_scale = 30,2
# Scale of the Correlation of the Causal Covariates
beta_scale,confounders_scale = 4,1
# ------------------------------------------------------------
# SIMULATE SOME DATA
# ------------------------------------------------------------
# COVARIATE EFFECTS
X_control = np.matrix(np.random.normal(0,1,((C)*groups, K+S+R)))
X_treated = np.matrix(np.random.normal(0,1,((N)*groups, K+S+R)))
# CAUSAL
b_cause = np.random.exponential(1,K)
b_cause *= beta_scale / b_cause.max()
# CONFOUNDERS
b_confound = np.random.exponential(1,S)
b_confound *= confounders_scale / b_confound.max()
beta_control = np.matrix(np.concatenate( ( b_cause ,b_confound, np.zeros(R)) ) ).T
beta_treated = np.matrix(np.concatenate( ( b_cause ,np.zeros(S), np.zeros(R)) ) ).T
# GROUP EFFECTS (hidden)
Y_pre_group_effects = np.random.normal(0,group_scale,(groups,T0))
Y_pre_ge_control = Y_pre_group_effects[np.repeat(np.arange(groups),C)]
Y_pre_ge_treated = Y_pre_group_effects[np.repeat(np.arange(groups),N)]
Y_post_group_effects = np.random.normal(0,group_scale,(groups,T1))
Y_post_ge_control = Y_post_group_effects[np.repeat(np.arange(groups),C)]
Y_post_ge_treated = Y_post_group_effects[np.repeat(np.arange(groups),N)]
# RANDOM ERRORS
Y_pre_err_control = np.matrix(np.random.random( ( C*groups, T0, ) ))
Y_pre_err_treated = np.matrix(np.random.random( ( N*groups, T0, ) ))
Y_post_err_control = np.matrix(np.random.random( ( C*groups, T1, ) ))
Y_post_err_treated = np.matrix(np.random.random( ( N*groups, T1, ) ))
# THE DATA GENERATING PROCESS
model = "full"
if model == "full":
""" In the full model, covariates (X) are correlated with pre and post
outcomes, and variance of the outcomes pre- and post- outcomes is
lower within groups which span both treated and control units.
"""
Y_pre_control = X_control.dot(beta_control) + Y_pre_ge_control + Y_pre_err_control
Y_pre_treated = X_treated.dot(beta_treated) + Y_pre_ge_treated + Y_pre_err_treated
Y_post_control = X_control.dot(beta_control) + Y_post_ge_control + Y_post_err_control
Y_post_treated = X_treated.dot(beta_treated) + Y_post_ge_treated + Y_post_err_treated
elif model == "hidden":
""" In the hidden model outcomes are independent of the covariates, but
variance of the outcomes pre- and post- outcomes is lower within
groups which span both treated and control units.
"""
Y_pre_control = Y_pre_ge_control + Y_pre_err_control
Y_pre_treated = Y_pre_ge_treated + Y_pre_err_treated
Y_post_control = Y_post_ge_control + Y_post_err_control
Y_post_treated = Y_post_ge_treated + Y_post_err_treated
elif model == "null":
"Purely random data"
Y_pre_control = Y_pre_err_control
Y_pre_treated = Y_pre_err_treated
Y_post_control = Y_post_err_control
Y_post_treated = Y_post_err_treated
else:
raise ValueError("Unknown model type: "+model)
# JOIN THE TREAT AND CONTROL DATA
X = np.vstack( (X_control, X_treated,) )
Y_pre = np.vstack( (Y_pre_control, Y_pre_treated, ) )
Y_post = np.vstack( (Y_post_control, Y_post_treated,) )
# in the leave-one-out scneario, the pre-treatment outcomes will be part of the covariates
X_and_Y_pre = np.hstack( ( X, Y_pre,) )
# IDENTIFIERS FOR TREAT AND CONTROL UNITS
# control_units = np.arange( C * groups )
# treated_units = np.arange( N * groups ) + C
# ------------------------------------------------------------
# find default penalties
# ------------------------------------------------------------
# get starting point for the L2 penalty
L2_pen_start_ct = SC.L2_pen_guestimate(X_control)
L2_pen_start_loo = SC.L2_pen_guestimate(X_and_Y_pre)
# get the maximum value for the L1 Penalty parameter conditional on the guestimate for the L2 penalty
L1_max_ct = SC.get_max_lambda(X_control,Y_pre_control,X_treat=X_treated,Y_treat=Y_pre_treated)
L1_max_loo = SC.get_max_lambda(X_and_Y_pre,Y_post)
print("Max L1")
if False:
"Demonstrate relations between the L1 and L2 penalties"
# get the maximum value for the L1 Penalty parameter conditional on several L2 penalty parameter values
L2_grid = (2.** np.arange(-1,2))
L1_max_loo_grid = SC.get_max_lambda(X_and_Y_pre,
Y_post,
L2_PEN_W = L2_pen_start_loo * L2_grid)
L1_max_ct_grid = SC.get_max_lambda(X_control,
Y_pre_control,
X_treat=X_treated,
Y_treat=Y_pre_treated,
L2_PEN_W = L2_pen_start_ct * L2_grid)
assert ( L1_max_loo / L1_max_loo_grid == 1/L2_grid).all()
assert ( L1_max_ct / L1_max_ct_grid == 1/L2_grid).all()
# ------------------------------------------------------------
# create a grid of penalties to try
# ------------------------------------------------------------
n_points = 10 # number of points in the grid
grid_type = "log-linear" # Usually the optimal penalties are quite Small
if grid_type == "simple":
# An equally spaced linear grid that does not include 0 or 1
grid = ( 1 + np.arange(n_points) ) / ( 1 + n_points )
elif grid_type == "linear":
# Another equally spaced linear grid
fmax = 1e-3 # lowest point in the grid relative to the max-lambda
fmin = 1e-5 # highest point in the grid relative to the max-lambda
grid = np.linspace(fmin,fmax,n_points)
elif grid_type == "log-linear":
# Another equally spaced linear grid
fmax = 1e-2 # lowest point in the grid relative to the max-lambda
fmin = 1e-4 # highest point in the grid relative to the max-lambda
grid = np.exp(np.linspace(np.log(fmin),np.log(fmax),n_points))
else:
raise ValueError("Unknown grid type: %s" % grid_type)
# ------------------------------------------------------------
# get the Cross Validation Error over a grid of L1 penalty
# parameters using both (a) Treat/Control and (b) the
# leave-one-out controls only methods
# ------------------------------------------------------------
if False:
print("starting grid scoring for treat / control scenario", grid*L1_max_ct)
grid_scores_ct = SC.CV_score(
X = X_control,
Y = Y_pre_control,
X_treat = X_treated,
Y_treat = Y_pre_treated,
# if LAMBDA is a single value, we get a single score, If it's an array of values, we get an array of scores.
LAMBDA = grid * L1_max_ct,
L2_PEN_W = L2_pen_start_ct,
# CACHE THE V MATRIX BETWEEN LAMBDA PARAMETERS (generally faster, but path dependent)
cache = False, # False by Default
# Run each of the Cross-validation folds in parallel? Often slower
# for large sample sizes because numpy.linalg.solve() already runs
# in parallel for large matrices
parallel=False,
# ANNOUNCE COMPLETION OF EACH ITERATION
progress = True)
best_L1_penalty_ct = (grid * L1_max_ct)[np.argmin(grid_scores_ct)]
if False:
# even with smaller data, this takes a while.
print("starting grid scoring for Controls Only scenario", grid*L1_max_ct)
grid_scores_loo = SC.CV_score(
X = X_and_Y_pre [np.arange(100)], # limit the amount of time...
Y = Y_post [np.arange(100)], # limit the amount of time...
# L1 Penalty. if LAMBDA is a single value (value), we get a single score, If it's an array of values, we get an array of scores.
LAMBDA = grid * L1_max_loo,
# L2 Penalty (float)
L2_PEN_W = L2_pen_start_loo,
# CACHE THE V MATRIX BETWEEN LAMBDA PARAMETERS (generally faster, but path dependent)
#cache = True, # False by Default
# Run each of the Cross-validation folds in parallel? Often slower
# for large sample sizes because numpy.linalg.solve() already runs
# in parallel for large matrices
parallel=False,
# ANNOUNCE COMPLETION OF EACH ITERATION
progress = True)
# ---------------------------------------------------------------------------
# Calculate Synthetic Control weights for a fixed pair of penalty parameters
# ---------------------------------------------------------------------------
# This is a two-step process because in principle, we can estimate a
# tensor matrix (which contains the relative weights of the covariates and
# possibly pre-treatment outcomes) in one population, and apply it to
# another population.
best_L1_penalty_ct = np.float(1908.9329)
# -----------------------------------
# Treat/Control:
# -----------------------------------
V_ct = SC.tensor(X = X_control,
Y = Y_pre_control,
X_treat = X_treated,
Y_treat = Y_pre_treated,
LAMBDA = best_L1_penalty_ct,
L2_PEN_W = L2_pen_start_ct)
SC_weights_ct = SC.weights(X = X_control,
X_treat = X_treated,
V = V_ct,
L2_PEN_W = L2_pen_start_ct)
Y_post_treated_synthetic_conrols_ct = SC_weights_ct.dot(Y_post_control)
ct_prediction_error = Y_post_treated_synthetic_conrols_ct - Y_post_treated
null_model_error = Y_post_treated - np.mean(Y_pre_treated)
R_squared_post_ct = 1 - np.power(ct_prediction_error,2).sum() / np.power(null_model_error,2).sum()
print( "C/T: Out of Sample post intervention R squared: %0.2f%% " % (100*R_squared_post_ct,))
# -----------------------------------
# Leave-One-Out with Control Only:
# -----------------------------------
if False:
# this takes a while:
V_loo = SC.tensor(
X = X_and_Y_pre [np.arange(100)], # limit the amount of time...
Y = Y_post [np.arange(100)], # limit the amount of time...
LAMBDA = best_L1_penalty_ct,
L2_PEN_W = L2_pen_start_loo)
SC_weights_loo = SC.weights(X = X_control,
V = V_ct,
L2_PEN_W = L2_pen_start_loo)
# in progress...
import pdb; pdb.set_trace()
Y_post_treated_synthetic_conrols_loo = SC_weights_ct.dot(Y_post_control)
loo_prediction_error = Y_post_treated_synthetic_conrols_loo - Y_post_treated
null_model_error = Y_post_treated - np.mean(Y_pre_treated)
R_squared_post_ct = 1 - np.power(loo_prediction_error,2).sum() / np.power(null_model_error,2).sum()
print( "LOO: Out of Sample post intervention R squared: %0.2f%% " % (100*R_squared_post_ct,))
import pdb; pdb.set_trace()
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# Optimization of the L1 and L2 parameters together (second order)
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
from scipy.optimize import fmin_l_bfgs_b, differential_evolution
import time
# -----------------------------------------------------------------
# Optimization of the L2 and L1 Penalties Simultaneously, keeping their product constant
# -----------------------------------------------------------------
# build the objective function to be minimized
# cache for L2_obj_func
n_calls = [0,]
temp_results =[]
SS = np.power(Y_pre_treated - np.mean(Y_pre_treated),2).sum()
best_L1_penalty_ct = np.float(1908.9329)
def L1_L2_obj_func (x):
n_calls[0] += 1
t1 = time.time();
score = SC.CV_score(X = X_control,
Y = Y_pre_control,
X_treat = X_treated,
Y_treat = Y_pre_treated,
# if LAMBDA is a single value, we get a single score, If it's an array of values, we get an array of scores.
LAMBDA = best_L1_penalty_ct * np.exp(x[0]),
L2_PEN_W = L2_pen_start_ct / np.exp(x[0]),
# suppress the analysis type message
quiet = True)
t2 = time.time();
temp_results.append((n_calls[0],x,score))
print("calls: %s, time: %0.4f, x0: %0.4f, Cross Validation Error: %s, out-of-sample R-Squared: %s" % (n_calls[0], t2 - t1, x[0], score, 1 - score / SS ))
#print("calls: %s, time: %0.4f, x0: %0.4f, x1: %0.4f, Cross Validation Error: %s, R-Squared: %s" % (n_calls[0], t2 - t1, x[0], x[1], score, 1 - score / SS ))
return score
# the actual optimization
print("Starting L2 Penalty optimization")
results = differential_evolution(L1_L2_obj_func, bounds = ((-6,6,),) )
#results = differential_evolution(L1_L2_obj_func, bounds = ((-6,6,),)*2 )
import pdb; pdb.set_trace()
NEW_best_L1_penalty_ct = best_L1_penalty_ct * np.exp(results.x[0])
best_L2_penalty = L2_pen_start_ct * np.exp(results.x[1])
print("DE optimized L2 Penalty: %s, DE optimized L1 penalty: %s" % (NEW_best_L1_penalty_ct, best_L2_penalty,) )
# -----------------------------------------------------------------
# Optimization of the L2 Parameter alone
# -----------------------------------------------------------------
# -----------------------
# build the objective function to be minimized
# -----------------------
# OBJECTIVE FUNCTION(S) TO MINIMIZE USING DE
# cache for L2_obj_func
n_calls = [0,]
temp_results =[]
SS = np.power(Y_pre_treated - np.mean(Y_pre_treated),2).sum()
best_L1_penalty_ct = np.float(1908.9329)
def L2_obj_func (x):
n_calls[0] += 1
t1 = time.time();
score = SC.CV_score(X = X_control,
Y = Y_pre_control,
X_treat = X_treated,
Y_treat = Y_pre_treated,
# if LAMBDA is a single value, we get a single score, If it's an array of values, we get an array of scores.
LAMBDA = best_L1_penalty_ct,
L2_PEN_W = L2_pen_start_ct * np.exp(x[0]),
# suppress the analysis type message
quiet = True)
t2 = time.time();
temp_results.append((n_calls[0],x,score))
print("calls: %s, time: %0.4f, x0: %0.4f, Cross Validation Error: %s, out-of-sample R-Squared: %s" %
(n_calls[0], t2 - t1, x[0], score, 1 - score / SS ))
return score
# the actual optimization
print("Starting L2 Penalty optimization")
results = differential_evolution(L2_obj_func, bounds = ((-6,6,),))
NEW_L2_pen_start_ct = L2_pen_start_ct * np.exp(results.x[1])
print("DE optimized L2 Penalty: %s, using fixed L1 penalty: %s" % (NEW_L2_pen_start_ct, best_L1_penalty_ct,) )
# -----------------------------------------------------------------
# Optimization of the L2 and L1 Penalties Simultaneously
# -----------------------------------------------------------------
# build the objective function to be minimized
# cache for L2_obj_func
n_calls = [0,]
temp_results =[]
SS = np.power(Y_pre_treated - np.mean(Y_pre_treated),2).sum()
best_L1_penalty_ct = np.float(1908.9329)
def L1_L2_obj_func (x):
n_calls[0] += 1
t1 = time.time();
score = SC.CV_score(X = X_control,
Y = Y_pre_control,
X_treat = X_treated,
Y_treat = Y_pre_treated,
# if LAMBDA is a single value, we get a single score, If it's an array of values, we get an array of scores.
LAMBDA = best_L1_penalty_ct * np.exp(x[0]),
L2_PEN_W = L2_pen_start_ct * np.exp(x[1]),
# suppress the analysis type message
quiet = True)
t2 = time.time();
temp_results.append((n_calls[0],x,score))
print("calls: %s, time: %0.4f, x0: %0.4f, x1: %0.4f, Cross Validation Error: %s, out-of-sample R-Squared: %s" % (n_calls[0], t2 - t1, x[0], x[1], score, 1 - score / SS ))
return score
# the actual optimization
print("Starting L2 Penalty optimization")
results = differential_evolution(L1_L2_obj_func, bounds = ((-6,6,),)*2 )
import pdb; pdb.set_trace()
NEW_best_L1_penalty_ct = best_L1_penalty_ct * np.exp(results.x[0])
best_L2_penalty = L2_pen_start_ct * np.exp(results.x[1])
print("DE optimized L2 Penalty: %s, DE optimized L1 penalty: %s" % (NEW_best_L1_penalty_ct, best_L2_penalty,) )

175
fit_ct.py Normal file
Просмотреть файл

@ -0,0 +1,175 @@
from numpy import dot, ones, diag, matrix, zeros, array,absolute, mean,var, linalg, prod,shape,sqrt
import numpy as np
import pandas as pd
import itertools
import timeit
import warnings
from .optimizers.cd_line_search import cdl_search
warnings.filterwarnings('ignore')
def ct_v_matrix(X,
Y,
LAMBDA = 0,
treated_units = None,
control_units = None,
start = None,
L2_PEN_W = None,
method = cdl_search,
intercept = True,
max_lambda = False, # this is terrible at least without documentation...
**kwargs):
'''
Computes and sets the optimal v_matrix for the given moments and
penalty parameter.
:param X: Matrix of Covariates
:param Y: Matrix of Outcomes
:param LAMBDA: penalty parameter used to shrink L1 norm of v/v.max() toward zero
:param treated_units: a list containing the position (rows) of the treated units within X and Y
:param control_units: a list containing the position (rows) of the control units within X and Y
:param start: initial values for the diagonals of the tensor matrix
:param L2_PEN_W: L2 penalty on the magnitude of the deviance of the weight vector from null. Optional.
:param method: The name of a method to be used by scipy.optimize.minimize, or a callable with the same API as scipy.optimize.minimize
:param intercept: If True, weights are penalized toward the 1 / the number of controls, else weights are penalized toward zero
:;aram max_lambda: if True, the return value is the maximum L1 penalty for which at least one element of the tensor matrix is non-zero
:param **kwargs: additional arguments passed to the optimizer
'''
# DEFAULTS
if treated_units is None:
if control_units is None:
raise ValueError("At least on of treated_units or control_units is required")
# Set the treated units to the not-control units
treated_units = list(set(range(X.shape[0])) - set(control_units))
if control_units is None:
control_units = list(set(range(X.shape[0])) - set(treated_units))
# Parameter QC
if len(set(treated_units).intersection(control_units)):
raise ValueError("Treated and Control units must be exclusive")
if not isinstance(X, matrix): raise TypeError("X is not a matrix")
if not isinstance(Y, matrix): raise TypeError("Y is not a matrix")
if X.shape[1] == 0: raise ValueError("X.shape[1] == 0")
if Y.shape[1] == 0: raise ValueError("Y.shape[1] == 0")
if X.shape[0] != Y.shape[0]: raise ValueError("X and Y have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
if not isinstance(LAMBDA, (float, int)): raise TypeError( "LAMBDA is not a number")
if L2_PEN_W is None: L2_PEN_W = mean(var(X, axis = 0))
if not isinstance(L2_PEN_W, (float, int)): raise TypeError( "L2_PEN_W is not a number")
# CONSTANTS
C, N, K = len(control_units), len(treated_units), X.shape[1]
if start is None: start = zeros(K) # formerly: .1 * ones(K)
Y_treated = Y[treated_units,:]
Y_control = Y[control_units,:]
X_treated = X[treated_units,:]
X_control = X[control_units,:]
if intercept:
Y_treated -= Y_control.mean(axis=0)
# INITIALIZE PARTIAL DERIVATIVES
dA_dV_ki = [ X_control[:, k ].dot(X_control[:, k ].T) + # i,j are on the diagonal (both equal to k)
X_control[:, k ].dot(X_control[:, k ].T) for k in range(K)] # 8
dB_dV_ki = [ X_control[:, k ].dot(X_treated[:, k ].T) + # i,j are on the diagonal (both equal to k)
X_control[:, k ].dot(X_treated[:, k ].T) for k in range(K)] # 9
def _score(V):
dv = diag(V)
weights, _, _ ,_ = _weights(dv)
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
return ((Ey **2).sum() + LAMBDA * absolute(V).sum()).copy() # (...).copy() assures that x.flags.writeable is True
def _grad(V):
""" Calculates just the diagonal of dGamma0_dV
There is an implementation that allows for all elements of V to be varied...
"""
dv = diag(V)
weights, A, B, AinvB = _weights(dv)
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
dGamma0_dV_term2 = zeros(K)
dPI_dV = zeros((C, N))
#Ai = A.I
for k in range(K):
dPI_dV.fill(0) # faster than re-allocating the memory each loop.
dA = dA_dV_ki[k]
dB = dB_dV_ki[k]
dPI_dV = linalg.solve(A,(dB - dA.dot(AinvB)))
#dPI_dV = Ai.dot(dB - dA.dot(AinvB))
dGamma0_dV_term2[k] = (Ey * Y_control.T.dot(dPI_dV).T.getA()).sum()
return LAMBDA - 2 * dGamma0_dV_term2
L2_PEN_W_mat = 2 * L2_PEN_W * diag(ones(X_control.shape[0]))
def _weights(V):
weights = zeros((C, N))
A = X_control.dot(2*V).dot(X_control.T) + L2_PEN_W_mat # 5
B = X_treated.dot(2*V).dot(X_control.T).T # 6
b = linalg.solve(A,B)
if intercept:
weights = b + 1/C
else:
weights = b
return weights, A, B,b
if max_lambda:
grad0 = _grad(zeros(K))
return -grad0[grad0 < 0].min()
# DO THE OPTIMIZATION
if isinstance(method, str):
from scipy.optimize import minimize
opt = minimize(_score, start.copy(), jac = _grad, method = method, **kwargs)
else:
assert callable(method), "Method must be a valid method name for scipy.optimize.minimize or a minimizer"
opt = method(_score, start.copy(), jac = _grad, **kwargs)
v_mat = diag(opt.x)
# CALCULATE weights AND ts_score
weights, _, _ ,_ = _weights(v_mat)
errors = Y_treated - weights.T.dot(Y_control)
ts_loss = opt.fun
ts_score = linalg.norm(errors) / sqrt(prod(errors.shape))
#if True:
# _do_gradient_check()
if intercept:
# not above, b/c Y_treated was already offset at the start
weights += 1/C
return weights, v_mat, ts_score, ts_loss, L2_PEN_W, opt
def ct_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, intercept = True):
if treated_units is None:
if control_units is None:
raise ValueError("At least on of treated_units or control_units is required")
# Set the treated units to the not-control units
treated_units = list(set(range(X.shape[0])) - set(control_units))
if control_units is None:
control_units = list(set(range(X.shape[0])) - set(treated_units))
C, N, K = len(control_units), len(treated_units), X.shape[1]
X_treated = X[treated_units,:]
X_control = X[control_units,:]
A = X_control.dot(2*V).dot(X_control.T) + 2 * L2_PEN_W * diag(ones(X_control.shape[0])) # 5
B = X_treated.dot(2*V).dot(X_control.T).T # 6
weights = linalg.solve(A,B)
if intercept:
weights += 1/C
return weights.T
def ct_score(Y, X, V, L2_PEN_W, LAMBDA = 0, treated_units = None, control_units = None,**kwargs):
if treated_units is None:
if control_units is None:
raise ValueError("At least on of treated_units or control_units is required")
# Set the treated units to the not-control units
treated_units = list(set(range(X.shape[0])) - set(control_units))
if control_units is None:
control_units = list(set(range(X.shape[0])) - set(treated_units))
weights = ct_weights(X = X, V = V, L2_PEN_W = L2_PEN_W, treated_units = treated_units, control_units = control_units,**kwargs)
Y_tr = Y[treated_units, :]
Y_c = Y[control_units, :]
Ey = (Y_tr - weights.dot(Y_c)).getA()
np.power(Y_tr - np.mean(Y_tr),2).sum()
return (Ey **2).sum() + LAMBDA * V.sum()

366
fit_loo.py Normal file
Просмотреть файл

@ -0,0 +1,366 @@
from numpy import dot, ones, diag, matrix, zeros, array,absolute, mean,var, linalg, prod,shape,sqrt
import numpy as np
import pandas as pd
import itertools
import timeit
import warnings
from utils.sub_matrix_inverse import subinv_k, all_subinverses
from .optimizers.cd_line_search import cdl_search
warnings.filterwarnings('ignore')
def loo_v_matrix(X,
Y,
LAMBDA = 0,
treated_units = None,
control_units = None,
non_neg_weights = False,
start = None,
L2_PEN_W = None,
method = cdl_search,
intercept = True,
max_lambda = False, # this is terrible at least without documentation...
solve_method = "standard",
**kwargs):
'''
Computes and sets the optimal v_matrix for the given moments and
penalty parameter.
:param X: Matrix of Covariates
:param Y: Matrix of Outcomes
:param LAMBDA: penalty parameter used to shrink L1 norm of v/v.max() toward zero
:param treated_units: a list containing the position (rows) of the treated units within X and Y
:param control_units: a list containing the position (rows) of the control units within X and Y
:param start: initial values for the diagonals of the tensor matrix
:param L2_PEN_W: L2 penalty on the magnitude of the deviance of the weight vector from null. Optional.
:param method: The name of a method to be used by scipy.optimize.minimize, or a callable with the same API as scipy.optimize.minimize
:param intercept: If True, weights are penalized toward the 1 / the number of controls, else weights are penalized toward zero
:;aram max_lambda: if True, the return value is the maximum L1 penalty for which at least one element of the tensor matrix is non-zero
:;aram solve_method: Method for solving A.I.dot(B). Either "standard" or "step-down".
:param **kwargs: additional arguments passed to the optimizer
'''
# (by default all the units are treated and all are controls)
if treated_units is None:
if control_units is None:
# Neither provided; INCLUDE ALL SAMPLES AS BOTH TREAT AND CONTROL UNIT.
# (this is the typical controls-only Loo V-matrix estimation)
control_units = list(range(X.shape[0]))
treated_units = control_units
else:
# Set the treated units to the not-control units
treated_units = list(set(range(X.shape[0])) - set(control_units))
else:
if control_units is None:
# Set the control units to the not-treated units
control_units = list(set(range(X.shape[0])) - set(treated_units))
control_units = np.array(control_units)
treated_units = np.array(treated_units)
# parameter QC
if not isinstance(X, matrix): raise TypeError("X is not a matrix")
if not isinstance(Y, matrix): raise TypeError("Y is not a matrix")
if X.shape[1] == 0: raise ValueError("X.shape[1] == 0")
if Y.shape[1] == 0: raise ValueError("Y.shape[1] == 0")
if X.shape[0] != Y.shape[0]: raise ValueError("X and Y have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
if not isinstance(LAMBDA, (float, int)): raise TypeError( "LAMBDA is not a number")
if L2_PEN_W is None: L2_PEN_W = mean(var(X, axis = 0))
if not isinstance(L2_PEN_W, (float, int)): raise TypeError( "L2_PEN_W is not a number")
assert not non_neg_weights, "Bounds not implemented"
# CONSTANTS
C, N, K = len(control_units), len(treated_units), X.shape[1]
if start is None: start = zeros(K) # formerly: .1 * ones(K)
assert N > 0; "No control units"
assert C > 0; "No treated units"
assert K > 0; "variables to fit (X.shape[1] == 0)"
# CREATE THE INDEX THAT INDICATES THE ELIGIBLE CONTROLS FOR EACH TREATED UNIT
in_controls = [list(set(control_units) - set([trt_unit])) for trt_unit in treated_units]
in_controls2 = [np.ix_(i,i) for i in in_controls] # this is a much faster alternative to A[:,index][index,:]
ctrl_rng = np.arange(len(control_units))
out_controls = [ctrl_rng[control_units != trt_unit] for trt_unit in treated_units]
out_treated = [ctrl_rng[control_units == trt_unit] for trt_unit in treated_units] # this is non-trivial when there control units are also being predicted.
if intercept:
for i, trt_unit in enumerate(treated_units):
Y[trt_unit,:] -= Y[in_controls[i],:].mean(axis=0)
# handy constants (for speed purposes):
Y_treated = Y[treated_units,:]
Y_control = Y[control_units,:]
X_treated = X[treated_units,:]
X_control = X[control_units,:]
# INITIALIZE PARTIAL DERIVATIVES
dA_dV_ki = [ [None,] *N for i in range(K)]
dB_dV_ki = [ [None,] *N for i in range(K)]
b_i = [None,] *N
for i, k in itertools.product(range(N), range(K)): # TREATED unit i, moment k
Xc = X[in_controls[i], : ]
Xt = X[treated_units[i], : ]
dA_dV_ki [k][i] = Xc[:, k ].dot(Xc[:, k ].T) + Xc[:, k ].dot(Xc[:, k ].T) # 8
dB_dV_ki [k][i] = Xc[:, k ].dot(Xt[:, k ].T) + Xc[:, k ].dot(Xt[:, k ].T) # 9
del Xc, Xt, i, k
#assert (dA_dV_ki [k][i] == X[index, k ].dot(X[index, k ].T) + X[index, k ].dot(X[index, k ].T)).all()
# https://math.stackexchange.com/a/1471836/252693
def _score(V):
dv = diag(V)
weights, _, _ = _weights(dv)
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
return ((Ey **2).sum() + LAMBDA * absolute(V).sum()).copy() # (...).copy() assures that x.flags.writeable is True
def _grad(V):
""" Calculates just the diagonal of dGamma0_dV
There is an implementation that allows for all elements of V to be varied...
"""
dv = diag(V)
weights, A, B = _weights(dv)
Ey = (Y_treated - weights.T.dot(Y_control)).getA()
dGamma0_dV_term2 = zeros(K)
dPI_dV = zeros((C, N))
if solve_method == "step-down":
Ai_cache = all_subinverses(A)
for k in range(K):
dPI_dV.fill(0) # faster than re-allocating the memory each loop.
for i, index in enumerate(in_controls):
dA = dA_dV_ki[k][i]
dB = dB_dV_ki[k][i]
if solve_method == "step-down":
b = Ai_cache[i].dot(dB - dA.dot(b_i[i]))
else:
b = linalg.solve(A[in_controls2[i]],dB - dA.dot(b_i[i]))
dPI_dV[index, i] = b.flatten() # TODO: is the Transpose an error???
dGamma0_dV_term2[k] = (Ey * Y_control.T.dot(dPI_dV).T.getA()).sum()
return LAMBDA - 2 * dGamma0_dV_term2 # (...).copy() assures that x.flags.writeable is True // * (1 - 2*(V < 0)) was droped b/c we're restricting to positive V's
L2_PEN_W_mat = 2 * L2_PEN_W * diag(ones(X_control.shape[0]))
def _weights(V):
weights = zeros((C, N))
if solve_method == "step-down":
A = X_control.dot(V + V.T).dot(X_control.T) + 2 * L2_PEN_W * diag(ones(X_control.shape[0])) # 5
B = X_treated.dot(V + V.T).dot(X_control.T) # 6
Ai = A.I
for i, trt_unit in enumerate(treated_units):
if trt_unit in control_units:
(b) = subinv_k(Ai,_k).dot(B[out_controls[i],i])
else:
(b) = Ai.dot(B[:, i])
b_i[i] = b
weights[out_controls[i], i] = b.flatten()
elif solve_method == "standard":
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
B = X.dot(V + V.T).dot(X.T).T # 6
for i, trt_unit in enumerate(treated_units):
(b) = b_i[i] = linalg.solve(A[in_controls2[i]], B[in_controls[i], trt_unit])
weights[out_controls[i], i] = b.flatten()
else:
raise ValueError("Unknown Solve Method: " + solve_method)
return weights, A, B
if max_lambda:
grad0 = _grad(zeros(K))
return -grad0[grad0 < 0].min()
# DO THE OPTIMIZATION
if isinstance(method, str):
from scipy.optimize import minimize
opt = minimize(_score, start.copy(), jac = _grad, method = method, **kwargs)
else:
assert callable(method), "Method must be a valid method name for scipy.optimize.minimize or a minimizer"
opt = method(_score, start.copy(), jac = _grad, **kwargs)
v_mat = diag(opt.x)
# CALCULATE weights AND ts_score
weights, _, _ = _weights(v_mat)
errors = Y_treated - weights.T.dot(Y_control)
ts_loss = opt.fun
ts_score = linalg.norm(errors) / sqrt(prod(errors.shape))
#if True:
# _do_gradient_check()
if intercept:
for i, trt_unit in enumerate(treated_units):
weights[out_controls[i], i] += 1/len(out_controls[i])
return weights, v_mat, ts_score, ts_loss, L2_PEN_W, opt
def loo_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, intercept = True, solve_method = "standard"):
if treated_units is None:
if control_units is None:
# both not provided, include all samples as both treat and control unit.
control_units = list(range(X.shape[0]))
treated_units = control_units
else:
# Set the treated units to the not-control units
treated_units = list(set(range(X.shape[0])) - set(control_units))
else:
if control_units is None:
# Set the control units to the not-treated units
control_units = list(set(range(X.shape[0])) - set(treated_units))
control_units = np.array(control_units)
treated_units = np.array(treated_units)
[C, N, K] = [len(control_units), len(treated_units), X.shape[1]]
# index with positions of the controls relative to the incoming data
in_controls = [list(set(control_units) - set([trt_unit])) for trt_unit in treated_units]
in_controls2 = [np.ix_(i,i) for i in in_controls] # this is a much faster alternative to A[:,index][index,:]
# index of the controls relative to the rows of the outgoing C x N matrix of weights
ctrl_rng = np.arange(len(control_units))
out_controls = [ctrl_rng[control_units != trt_unit] for trt_unit in treated_units]
out_treated = [ctrl_rng[control_units == trt_unit] for trt_unit in treated_units] # this is non-trivial when there control units are also being predicted.
# constants for indexing
X_control = X[control_units,:]
X_treat = X[treated_units,:]
weights = zeros((C, N))
if solve_method == "step-down":
A = X_control.dot(V + V.T).dot(X_control.T) + 2 * L2_PEN_W * diag(ones(X_control.shape[0])) # 5
B = X_treat.dot( V + V.T).dot(X_control.T) # 6
Ai = A.I
for i, trt_unit in enumerate(treated_units):
if trt_unit in control_units:
(b) = subinv_k(Ai,_k).dot(B[out_controls[i],i])
else:
(b) = Ai.dot(B[:, i])
weights[out_controls[i], i] = b.flatten()
if intercept:
weights[out_controls[i], i] += 1/len(out_controls[i])
elif solve_method == "standard":
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
B = X.dot(V + V.T).dot(X.T).T # 6
for i, trt_unit in enumerate(treated_units):
(b) = linalg.solve(A[in_controls2[i]], B[in_controls[i], trt_unit])
weights[out_controls[i], i] = b.flatten()
if intercept:
weights[out_controls[i], i] += 1/len(out_controls[i])
else:
raise ValueError("Unknown Solve Method: " + solve_method)
return weights.T
def __old__loo_weights(X, V, L2_PEN_W, treated_units = None, control_units = None, intercept = True):
assert False
if treated_units is None:
if control_units is None:
# both not provided, include all samples as both treat and control unit.
control_units = list(range(X.shape[0]))
treated_units = control_units
else:
# Set the treated units to the not-control units
treated_units = list(set(range(X.shape[0])) - set(control_units))
else:
if control_units is None:
# Set the control units to the not-treated units
control_units = list(set(range(X.shape[0])) - set(treated_units))
[C, N, K] = [len(control_units), len(treated_units), X.shape[1]]
index_i = [list(set(list(range(200))) - set([col])) for col in treated_units]
index_i2 = [np.ix_(i,i) for i in index_i] # this is a much faster alternative to A[:,index][index,:]
index_map = {c_unit : row_at for row_at, c_unit in enumerate(control_units)}
weights = zeros((C, N))
A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
B = X.dot(V + V.T).dot(X.T).T # 6
for col, index in enumerate(index_i):
b = linalg.solve(A[index_i2[col]], B[index, col])
weights[np.vectorize(index_map.get)(index), col] = b.T
if intercept:
weights[index, col] += 1/len(index)
return weights
def loo_score(Y, X, V, L2_PEN_W, LAMBDA = 0, treated_units = None, control_units = None,**kwargs):
if treated_units is None:
if control_units is None:
# both not provided, include all samples as both treat and control unit.
control_units = list(range(X.shape[0]))
treated_units = control_units
else:
# Set the treated units to the not-control units
treated_units = list(set(range(X.shape[0])) - set(control_units))
else:
if control_units is None:
# Set the control units to the not-treated units
control_units = list(set(range(X.shape[0])) - set(treated_units))
weights = loo_weights(X = X,
V = V,
L2_PEN_W = L2_PEN_W,
treated_units = treated_units,
control_units = control_units,
**kwargs)
Y_tr = Y[treated_units, :]
Y_c = Y[control_units, :]
Ey = (Y_tr - weights.dot(Y_c)).getA()
return (Ey **2).sum() + LAMBDA * V.sum()
def fold_v_matrix(Xt,Xc,Yt,Yc,LAMBDA = .1,L2_PEN_W = .1,guess = None,method = 'Newton-CG',**kwargs):
""" Calculates the V matrix with an explicit Treatment and Control folds,
where the match is optimized on the error of the outcomes.
don't use this. it hasn't been thoroughly been checked for bugs (compared
to loo_v_matrix and ct_v_matrix) and lacks several of the optimizations of loo_v_matrix
"""
C,K = Xc.shape; N,T = Yt.shape
two_L2_PEN_W = 0#2*L2_PEN_W*diag(ones(C))
def _score(diagV):
""" calculates just the diagonal of dGamma0_dV
"""
V = diag(diagV)
A = Xc.dot(2*V).dot(Xc.T) + two_L2_PEN_W # 5
B = Xc.dot(2*V).dot(Xt.T) # 6
Pi = A.I.dot(B)
Ey = (Yt - Pi.T.dot(Yc)).getA()
return (Ey **2).sum() + LAMBDA * absolute(Pi.T).sum()
def _grad(diagV):
""" calculates just the diagonal of dGamma0_dV
"""
V = diag(diagV)
assert Xc.shape[0] == Yc.shape[0]
# CALCULATE CONSTANTS
A = Xc.dot(2*V).dot(Xc.T) + two_L2_PEN_W # 5
B = Xc.dot(2*V).dot(Xt.T) # 6
Ai = A.I
Pi = Ai.dot(B)
Ey = (Yt - Pi.T.dot(Yc)).getA()
# INITIALIZE PARTIAL DERIVATIVES
dGamma0_dV_term2 = zeros((K,K))
for i in range(K):
dA = Xc[:, i ].dot(Xc[:, i ].T) + Xc[:, i ].dot(Xc[:, i ].T) # 8
dB = Xc[:, i ].dot(Xt [:, i ].T) + Xc[:, i ].dot(Xt [:, i ].T) # 9
dPI_dV = Ai.dot(dB - dA.dot(Pi)) # 7
dGamma0_dV_term2[i,i] = (Ey * Yc.T.dot(dPI_dV).T.getA()).sum()
return diag(LAMBDA * (1 - 2*(V<0)) - 2 * dGamma0_dV_term2)
def _weights(V):
A = Xc.dot(2*V).dot(Xc.T) + two_L2_PEN_W # 5
B = Xc.dot(2*V).dot(Xt.T) # 6
#for col in treated_unit:
#w_col = A[-col,-col].I.dot(B[:,col])
return A.I.dot(B).T
# DO THE OPTIMIZATION
opt = minimize(_score, zeros(Xc.shape[1]) if guess is None else guess,
jac = _grad,
method = method, **kwargs)
v_mat = diag(opt.x)
ts_loss = opt.fun
# CALCULATE weights AND ts_score
weights = _weights(v_mat)
errors = Yt - weights.dot(Yc)
ts_score = linalg.norm(errors) / sqrt(prod(errors.shape))
return weights, v_mat, ts_score, ts_loss, opt

79
lambda_utils.py Normal file
Просмотреть файл

@ -0,0 +1,79 @@
from fit_loo import loo_v_matrix
from fit_ct import ct_v_matrix
from optimizers.cd_line_search import cdl_search
import numpy as np
def L2_pen_guestimate(X):
return np.mean(np.var(X, axis = 0))
def get_max_lambda(X,Y,L2_PEN_W=None,X_treat=None,Y_treat=None,**kwargs):
""" returns the maximum value of the L1 penalty for which the elements of tensor matrix (V) are not all zero.
"""
# PARAMETER QC
if not isinstance(X, np.matrix): raise TypeError("X is not a matrix")
if not isinstance(Y, np.matrix): raise TypeError("Y is not a matrix")
if X_treat is None != Y_treat is None:
raise ValueError("parameters `X_treat` and `Y_treat` must both be Matrices or None")
if X.shape[1] == 0: raise ValueError("X.shape[1] == 0")
if Y.shape[1] == 0: raise ValueError("Y.shape[1] == 0")
if X.shape[0] != Y.shape[0]: raise ValueError("X and Y have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
if L2_PEN_W is None: L2_PEN_W = np.mean(np.var(X, axis = 0))
if X_treat is not None:
# PARAMETER QC
if not isinstance(X_treat, np.matrix): raise TypeError("X_treat is not a matrix")
if not isinstance(Y_treat, np.matrix): raise TypeError("Y_treat is not a matrix")
if X_treat.shape[1] == 0: raise ValueError("X_treat.shape[1] == 0")
if Y_treat.shape[1] == 0: raise ValueError("Y_treat.shape[1] == 0")
if X_treat.shape[0] != Y_treat.shape[0]:
raise ValueError("X_treat and Y_treat have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
control_units = np.arange(X.shape[0])
treated_units = np.arange(X.shape[0],X.shape[0] + X_treat.shape[0])
try:
_LAMBDA = iter(L2_PEN_W)
except TypeError:
# L2_PEN_W is a single value
return ct_v_matrix(X = np.vstack((X,X_treat)),
Y = np.vstack((Y,Y_treat)),
L2_PEN_W = L2_PEN_W,
control_units = control_units,
treated_units = treated_units,
max_lambda = True, # this is terrible at least without documentation...
**kwargs)
else:
# L2_PEN_W is an iterable of values
return [ ct_v_matrix(X = np.vstack((X,X_treat)),
Y = np.vstack((Y,Y_treat)),
control_units = control_units,
treated_units = treated_units,
max_lambda = True, # this is terrible at least without documentation...
L2_PEN_W = l2_pen,
**kwargs)
for l2_pen in L2_PEN_W ]
else:
try:
_LAMBDA = iter(L2_PEN_W)
except TypeError:
# L2_PEN_W is a single value
return loo_v_matrix(X = X,
Y = Y,
L2_PEN_W = L2_PEN_W,
max_lambda = True, # this is terrible at least without documentation...
**kwargs)
else:
# L2_PEN_W is an iterable of values
return [ loo_v_matrix(X = X,
Y = Y,
L2_PEN_W = l2_pen,
max_lambda = True, # this is terrible at least without documentation...
**kwargs)
for l2_pen in L2_PEN_W ]

0
optimizers/__init__.py Normal file
Просмотреть файл

Просмотреть файл

@ -0,0 +1,150 @@
import numpy as np
from scipy.optimize import line_search
import locale
locale.setlocale(locale.LC_ALL, '')
class cd_res:
def __init__(self, x, fun):
self.x = x
self.fun = fun
print_stop_iteration = 0
def cdl_search(score,
guess,
jac,
tol = 1e-4,
agressiveness = 0.01,# agressiveness
alpha_mult = .9,
max_iter = 3000,
min_iter = 3,
# TODO: this is a stupid default (I'm useing it out of lazyness)
zero_eps = 1e2 * np.finfo(float).eps,
simplex=False,
stol = None,
print_path=False,
print_path_verbose = False):
'''
Implements coordinate descent with line search with the strong wolf
conditions. Note, this tends to give nearly identical results as L-BFGS-B,
and is *much* slower than that the super-fast 40 year old fortran code
wrapped by scipy.
'''
assert 0 < agressiveness < 1
assert 0 < alpha_mult < 1
assert (guess >=0).all(), "Initial guess (`guess`) should be in the closed positive orthant"
x_curr = guess
alpha_t = 1.
zero_score = val = score(x_curr)
for _i in range(max_iter):
grad = jac(x_curr)
invalid_directions = np.logical_and(grad > 0,x_curr == 0)
if (grad[np.logical_not(invalid_directions)] == 0).all():
# this happens when we're stuck at the origin and the gradient is
# pointing in the all-negative direction
if print_stop_iteration: print("[gradient is zero] i: %s" % (_i,))
return cd_res(x_curr, val)
# Expected Loss Reduction for a unit step in each of the valid directions
elr = sum( abs(g) for g in grad[np.logical_not(invalid_directions)])
naiive_step_size_multiplier = agressiveness*val/elr
grad *= naiive_step_size_multiplier * alpha_t
# constrain the gradient to being non-negative on axis where the
# current guess is already zero
# constrain to the positive orthant
grad[invalid_directions] = 0
if (grad>0).any():
constrained = True
alpha_ratios = grad[ grad >0 ] / x_curr[ grad >0 ]
if (alpha_ratios > 1).any:
max_alpha = alpha_ratios.max()
else:
max_alpha = 1
else:
constrained = False
max_alpha = 1
res = line_search(f=score, myfprime=jac, xk=x_curr, pk=-grad/max_alpha) #
alpha, _, _, possibly_wrong_new_val, _, _ = res
if alpha is not None:
if alpha > 1:
alpha_t /= alpha_mult
else:
alpha_t *= alpha_mult
elif constrained:
for j in range(5): # formerly range(17), but that was excessive, in general, this succeeds happens when alpha >= 0.1 (super helpful) or alpha <= 1e-14 (super useless)
if score(x_curr - (.3**j)*grad/max_alpha) < val:
# This can occur when the strong wolf condition insists that the
# current step size is too small (i.e. the gradient is too
# consistent with the function to think that a small step is
# optimal for a global (unconstrained) optimization.
alpha = (.3**j)
# i secretly think this is stupid.
if print_stop_iteration: print("[simple line search worked :)] i: %s, alpha: 1e-%s" % (_i,j))
break
else:
# moving in the direction of the gradient yielded no improvement: stop
if print_stop_iteration: print("[simple line search failed] i: %s" % (_i,))
return cd_res(x_curr, val)
else:
# moving in the direction of the gradient yielded no improvement: stop
if print_stop_iteration: print("[alpha is None] i: %s, grad: %s" % (_i, grad/max_alpha, ))
return cd_res(x_curr, val)
# iterate
try:
if constrained:
x_old, x_curr, val_old = x_curr, x_curr - min(1, alpha)*grad/max_alpha, val
else:
x_old, x_curr, val_old = x_curr, x_curr - alpha*grad/max_alpha, val
except:
import pdb; pdb.set_trace()
val = score(x_curr)
val_diff = val_old - val
if print_path:
try:
#if constrained:
print("i: %s, val: %s, val_diff: %0.2f, alpha: %0.5f, zeros: %s (constrained)" % (_i, locale.format("%d", val, grouping=True), val_diff, alpha, sum( x_curr == 0)))
#else:
#print("i: %s, val: %s, val_diff: %0.2f, alpha: %0.5f,ELR: %0.6f, multiplier: %0.6f, aggressiveness: %0.6f, constrained: %s, zeros: %s" % (_i, locale.format("%d", val, grouping=True), val_diff, alpha, elr, naiive_step_size_multiplier * alpha_t, agressiveness*alpha_t ,constrained,sum( x_curr == 0)))
if print_path_verbose:
print("grad: %s,x_curr %s" % (grad, x_curr, ))
except Exception as err:
print(err)
import pdb; pdb.set_trace()
# rounding error can get us really close or even across the coordinate plane.
x_curr[abs(x_curr) < zero_eps] = 0
x_curr[x_curr < zero_eps] = 0
if (x_curr == 0).all() and (x_old == 0).all():
# this happens when we were at the origin and the gradient didn't
# take us out of the range of zero_eps
if print_stop_iteration: print("stuck at the origin %s"% (_i,))
return cd_res(x_curr, score(x_curr)) # tricky tricky...
if (x_curr < 0).any():
import pdb; pdb.set_trace()
print("This shouldn't ever happen if max_alpha is specified properly")
if val_diff/val < tol:
# this a heuristic rule, to be sure, but seems to be useful.
if _i > min_iter:
if print_stop_iteration: print("[val_diff/val < tol] i: %s, val: %s, val_diff: %s" % (_i, val, val_diff, ))
return cd_res(x_curr, val)
else:
raise RuntimeError('Solution did not converge to default tolerance')

48
tensor.py Normal file
Просмотреть файл

@ -0,0 +1,48 @@
from fit_loo import loo_v_matrix
from fit_ct import ct_v_matrix
import numpy as np
def tensor(X, Y, X_treat=None, Y_treat=None, **kwargs):
""" Presents a unified api for ct_v_matrix and loo_v_matrix
"""
# PARAMETER QC
if not isinstance(X, np.matrix): raise TypeError("X is not a matrix")
if not isinstance(Y, np.matrix): raise TypeError("Y is not a matrix")
if X.shape[1] == 0: raise ValueError("X.shape[1] == 0")
if Y.shape[1] == 0: raise ValueError("Y.shape[1] == 0")
if X.shape[0] != Y.shape[0]: raise ValueError("X and Y have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
if X_treat is None != Y_treat is None:
raise ValueError("parameters `X_treat` and `Y_treat` must both be Matrices or None")
if X_treat is not None:
"Fit the Treated units to the control units; assuming that Y contains pre-intervention outcomes"
# PARAMETER QC
if not isinstance(X_treat, np.matrix): raise TypeError("X_treat is not a matrix")
if not isinstance(Y_treat, np.matrix): raise TypeError("Y_treat is not a matrix")
if X_treat.shape[1] == 0: raise ValueError("X_treat.shape[1] == 0")
if Y_treat.shape[1] == 0: raise ValueError("Y_treat.shape[1] == 0")
if X_treat.shape[0] != Y_treat.shape[0]: raise ValueError("X_treat and Y_treat have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0],))
# FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
# note that the weights, score, and loss function value returned here are for the in-sample predictions
weights, v_mat, ts_score, loss, l2_pen_w, _ = \
ct_v_matrix(X = np.vstack((X,X_treat)),
Y = np.vstack((Y,Y_treat)),
control_units = np.arange(X.shape[0]),
treated_units = np.arange(X_treat.shape[0]) + X.shape[0],
**kwargs)
else:
"Fit the control units to themselves; Y may contain post-intervention outcomes"
weights, v_mat, ts_score, loss, l2_pen_w, _ = \
loo_v_matrix(X = X,
Y = Y,
control_units = np.arange(X.shape[0]),
treated_units = np.arange(X.shape[0]),
# treated_units = [X.shape[0] + i for i in range(len(train))],
**kwargs)
return v_mat

0
utils/__init__.py Normal file
Просмотреть файл

168
utils/sub_matrix_inverse.py Normal file
Просмотреть файл

@ -0,0 +1,168 @@
""" In the leave-one-out method with larger sample sizes most of the time is spent in calculating
A.I.dot(B) using np.linalg.solve (which is much faster than the brute force method i.e. `A.I.dot(B)`).
For example, with 200 units, about about 95% of the time is spent in this line.
However we can take advantage of the fact that we're calculating the
inverse of N matrices which are all sub-matrices of a common matrix by inverting the common matrix
and calculating each subset inverse from there.
https://math.stackexchange.com/a/208021/252693
TODO: this could be made a bit faster by passing in the indexes (k_rng,
k_rng2) instead of re-building them
"""
import numpy as np
#-- n = 5
#-- p = 3
#-- a = np.matrix(np.random.random((n,p,)))
#-- v = np.diag(np.random.random((p,)))
#-- x = a.dot(v).dot(a.T)
#-- x.dot(x.I)
#--
#-- x = np.matrix(np.random.random((n,n,)))
#-- x.dot(x.I)
#--
#-- xi = x.I
#--
#--
#-- B = np.matrix(np.random.random((n,n,)))
#--
#-- k = np.arange(2)
#-- N = xi.shape[0]
#-- rng = np.arange(N)
#-- k_rng = rng[np.logical_not(np.isin(rng,k))]
#--
#-- out = xi[np.ix_(k_rng,k_rng)] - xi[np.ix_(k_rng,k)].dot(xi[np.ix_(k,k_rng)])/np.linalg.det(xi[np.ix_(k,k)])
#--
#-- jjkkjtu = x[np.ix_(k_rng,k_rng)].I
def all_subinverses(x,eps=None):
""" Given an matrix (x), calculate all the inverses of leave-one-out sub-matrices.
param: x a square matrix for which to find the inverses of all it's leave one out sub-matrices.
param: eps If not None, used to assert that the each calculated
sub-matrix-inverse is within eps of the brute force calculation.
Testing only, this slows the process way down since the inverse of
each sub-matrix is calculated by the brute force method. Typically
set to a multiple of `np.finfo(float).eps`
"""
# handy constant for indexing
xi = x.I
N = x.shape[0]
rng = np.arange(N)
out = [None,] * N
for k in range(N):
k_rng = rng[rng != k]
out[k] = xi[np.ix_(k_rng,k_rng)] - xi[k_rng,k].dot(xi[k,k_rng])/xi[k,k]
if eps is not None:
if not (abs(out[k] - x[np.ix_(k_rng,k_rng)].I) < eps).all():
raise RuntimeError("Fast and brute force methods were not within epsilon (%s) for sub-matrix k = %s; max difference = %s" % (eps, k, abs(xi_k - x[k_rng2].I).max(), ) )
return out
def subinv_k(xi,k,eps=None):
""" Given an matrix (x), calculate all the inverses of leave-one-out sub-matrices.
param: x a square matrix for which to find the inverses of all it's leave one out sub-matrices.
param: k the column and row to leave out
param: eps If not None, used to assert that the each calculated
sub-matrix-inverse is within eps of the brute force calculation.
Testing only, this slows the process way down since the inverse of
each sub-matrix is calculated by the brute force method. Typically
set to a multiple of `np.finfo(float).eps`
"""
# handy constant for indexing
N = xi.shape[0]
rng = np.arange(N)
k_rng = rng[rng != k]
out = xi[np.ix_(k_rng,k_rng)] - xi[k_rng,k].dot(xi[k,k_rng])/xi[k,k]
if eps is not None:
if not (abs(out[k] - x[np.ix_(k_rng,k_rng)].I) < eps).all():
raise RuntimeError("Fast and brute force methods were not within epsilon (%s) for sub-matrix k = %s; max difference = %s" % (eps, k, abs(xi_k - x[k_rng2].I).max(), ) )
return out
# ---------------------------------------------
# single sub-matrix
# ---------------------------------------------
if __name__ == "__main__":
import time
n = 200
B = np.matrix(np.random.random((n,n,)))
for i in range(100):
# create a sub-matrix that meets the matching criteria
x = np.matrix(np.random.random((n,n,)))
try:
zz = subinv(x,10e-10)
break
except:
pass
else:
print("Failed to generate a %sx%s matrix whose inverses are all within %s of the quick method")
k = 5
n_tests = 1000
# =======================
t0 = time.time()
for i in range(n_tests):
N = xi.shape[0]
rng = np.arange(N)
k_rng = rng[rng != k]
k_rng2 = np.ix_(k_rng,k_rng)
zz = x[k_rng2].I.dot(B[k_rng2])
t1 = time.time()
slow_time = t1 - t0
print("A.I.dot(B): brute force time (N = %s): %s"% (n,t1 - t0))
# =======================
t0 = time.time()
for i in range(n_tests):
# make the comparison fair
if i % n == 0:
xi = x.I
zz = subinv_k(xi,k).dot(B[k_rng2])
t1 = time.time()
fast_time = t1 - t0
print("A.I.dot(B): quick time (N = %s): %s"% (n,t1 - t0))
# =======================
t0 = time.time()
for i in range(n_tests):
N = xi.shape[0]
rng = np.arange(N)
k_rng = rng[rng != k]
k_rng2 = np.ix_(k_rng,k_rng)
zz = np.linalg.solve(x[k_rng2],B[k_rng2])
t1 = time.time()
fast_time = t1 - t0
print("A.I.dot(B): np.linalg.solve time (N = %s): %s"% (n,t1 - t0))
# ---------------------------------------------
# ---------------------------------------------
t0 = time.time()
for i in range(100): zz = subinv(x,10e-10)
t1 = time.time()
slow_time = t1 - t0
print("Full set of inverses: brute force time (N = %s): %s", (n,t1 - t0))
t0 = time.time()
for i in range(100): zz = subinv(x)
t1 = time.time()
fast_time = t1 - t0
print("Full set of inverses: quick time (N = %s): %s", (n,t1 - t0))
# ---------------------------------------------
# ---------------------------------------------

35
weights.py Normal file
Просмотреть файл

@ -0,0 +1,35 @@
from fit_loo import loo_weights
from fit_ct import ct_weights
import numpy as np
def weights(X, X_treat=None, **kwargs):
""" Calculate synthetic control weights
"""
# PARAMETER QC
if not isinstance(X, np.matrix): raise TypeError("X is not a matrix")
if X_treat.shape[1] == 0: raise ValueError("X_treat.shape[1] == 0")
if X_treat is not None:
"weight for the control units against the remaining controls"
# PARAMETER QC
if not isinstance(X_treat, np.matrix): raise TypeError("X_treat is not a matrix")
if X_treat.shape[1] == 0: raise ValueError("X_treat.shape[1] == 0")
# FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
# note that the weights, score, and loss function value returned here are for the in-sample predictions
return ct_weights(X = np.vstack((X,X_treat)),
control_units = np.arange(X.shape[0]),
treated_units = np.arange(X_treat.shape[0]) + X.shape[0],
**kwargs)
else:
"weight for the control units against the remaining controls"
return loo_weights(X = X,
control_units = np.arange(X.shape[0]),
treated_units = np.arange(X.shape[0]),
**kwargs)
return weights