adding dual penalty optimization

This commit is contained in:
Jason Thorpe 2019-04-02 13:43:51 -07:00
Родитель 6d89d186b6
Коммит f2d29e43d2
7 изменённых файлов: 335 добавлений и 140 удалений

Просмотреть файл

@ -55,8 +55,11 @@ confidence=
# no Warning level messages displayed, use"--disable=all --enable=classes
# --disable=W"
disable=print-statement,
line-too-long, # handing this over to black
bad-continuation, # handing this over to black
useless-object-inheritance, # trying to keep compatibility with python 2.7
missing-type-doc,
missing-return-type-doc,
parameter-unpacking,
unpacking-in-except,
old-raise-syntax,

Просмотреть файл

@ -10,6 +10,7 @@ from SparseSC.fit_fold import fold_v_matrix
from SparseSC.fit_loo import loo_v_matrix
from SparseSC.fit_ct import ct_v_matrix, ct_score
def score_train_test(
X,
Y,
@ -67,7 +68,6 @@ def score_train_test(
weights penalty, and the out-of-sample score
:rtype: tuple
"""
# to use `pdb.set_trace()` here, set `parallel = False` above
if (X_treat is None) != (Y_treat is None):
raise ValueError(
"parameters `X_treat` and `Y_treat` must both be Matrices or None"
@ -178,40 +178,43 @@ def score_train_test(
return v_mat, w_pen, s
def score_train_test_sorted_w_pens(w_pen,
start=None,
cache=False,
progress=False,
FoldNumber=None,
**kwargs):
def score_train_test_sorted_w_pens(
w_pen, start=None, cache=False, progress=False, FoldNumber=None, **kwargs
):
""" a wrapper which calls score_train_test() for each element of an
array of `w_pen`'s, optionally caching the optimized v_mat and using it
as the start position for the next iteration.
"""
# DEFAULTS
values = [None]*len(w_pen)
values = [None] * len(w_pen)
if progress > 0:
import time
t0 = time.time()
for i,_w_pen in enumerate(w_pen):
v_mat, _, _ = values[i] = score_train_test( w_pen = _w_pen, start = start, **kwargs)
for i, _w_pen in enumerate(w_pen):
v_mat, _, _ = values[i] = score_train_test(w_pen=_w_pen, start=start, **kwargs)
if cache:
start = np.diag(v_mat)
if progress > 0 and (i % progress) == 0:
t1 = time.time()
if FoldNumber is None:
print("w_pen: %0.4f, value %s of %s, time elapsed: %0.4f sec." %
(_w_pen, i+1, len(w_pen), t1 - t0, ))
#print("iteration %s of %s time: %0.4f ,w_pen: %0.4f, diags: %s" %
print(
"w_pen: %0.4f, value %s of %s, time elapsed: %0.4f sec."
% (_w_pen, i + 1, len(w_pen), t1 - t0)
)
# print("iteration %s of %s time: %0.4f ,w_pen: %0.4f, diags: %s" %
# (i+1, len(w_pen), t1 - t0, _w_pen, np.diag(v_mat),))
else:
print("Fold %s,w_pen: %0.4f, value %s of %s, time elapsed: %0.4f sec." %
(FoldNumber, _w_pen, i+1, len(w_pen), t1 - t0, ))
#print("Fold %s, iteration %s of %s, time: %0.4f ,w_pen: %0.4f, diags: %s" %
print(
"Fold %s,w_pen: %0.4f, value %s of %s, time elapsed: %0.4f sec."
% (FoldNumber, _w_pen, i + 1, len(w_pen), t1 - t0)
)
# print("Fold %s, iteration %s of %s, time: %0.4f ,w_pen: %0.4f, diags: %s" %
# (FoldNumber, i+1, len(w_pen), t1 - t0, _w_pen, np.diag(v_mat),))
t0 = time.time()
@ -268,12 +271,12 @@ def CV_score(
X_treat=None,
Y_treat=None,
splits=5,
# sub_splits=None,
# sub_splits=None,
quiet=False,
parallel=False,
max_workers=None,
# this is here for API consistency:
progress=None, # pylint: disable=unused-argument
progress=None, # pylint: disable=unused-argument
**kwargs
):
"""
@ -308,7 +311,7 @@ def CV_score(
iter(w_pen)
except TypeError:
w_pen_is_iterable = False
else :
else:
w_pen_is_iterable = True
__score_train_test__ = score_train_test_sorted_w_pens
@ -316,7 +319,7 @@ def CV_score(
iter(v_pen)
except TypeError:
v_pen_is_iterable = False
else :
else:
v_pen_is_iterable = True
__score_train_test__ = score_train_test_sorted_v_pens
@ -349,7 +352,7 @@ def CV_score(
except TypeError:
from sklearn.model_selection import KFold
splits = KFold(splits, True).split(np.arange(X_treat.shape[0]))
splits = KFold(splits, shuffle=True).split(np.arange(X_treat.shape[0]))
train_test_splits = list(splits)
n_splits = len(train_test_splits)
@ -441,7 +444,7 @@ def CV_score(
except TypeError:
from sklearn.model_selection import KFold
splits = KFold(splits).split(np.arange(X.shape[0]))
splits = KFold(splits, shuffle=True).split(np.arange(X.shape[0]))
train_test_splits = [x for x in splits]
n_splits = len(train_test_splits)
@ -472,7 +475,7 @@ def CV_score(
if max_workers == 1 and n_splits > 1:
print(
"WARNING: Default for max_workers is 1 on a machine with %s cores is 1."
) # pylint: disable=line-too-long
)
_initialize_Global_worker_pool(max_workers)

Просмотреть файл

@ -15,8 +15,6 @@ def estimate_effects(
ret_pl=False,
ret_CI=False,
level=0.95,
w_pen=None,
v_pen=None,
**kwargs
):
r"""
@ -33,8 +31,9 @@ def estimate_effects(
the controls when N1>1)
:param ret_CI:
:param level:
:param w_pen:
:param v_pen:
:param kwargs: Additional parameters passed to fit()
:returns: An instance of SparseSCEstResults with the fitted results
:Keyword Args: Passed on to fit()
"""
@ -62,8 +61,7 @@ def estimate_effects(
verbose=0,
min_iter=-1,
tol=1,
w_pen=w_pen,
v_pen=v_pen,
**kwargs
)
Y_sc = fit_res.predict(Y[control_units, :])
diffs = Y - Y_sc
@ -130,6 +128,7 @@ class SparseSCEstResults(object):
Holds estimation info
"""
# pylint: disable=redefined-outer-name
def __init__(self, fit, pl_res_pre, pl_res_post, pl_res_post_scaled, ind_CI=None):
"""
:param fit: The fit() return object

Просмотреть файл

@ -3,6 +3,7 @@
Implements round-robin fitting of Sparse Synthetic Controls Model for DGP based analysis
"""
import numpy as np
from warnings import warn
from sklearn.model_selection import KFold
# From the Public API
@ -26,15 +27,9 @@ def fit(
grid=None, # USER SUPPLIED GRID OF COVARIATE PENALTIES
grid_min=1e-6,
grid_max=1,
grid_points=20,
choice="min",
cv_folds=10,
grid_length=20,
grid_iterations=2,
gradient_folds=10,
gradient_seed=10101,
model_type="retrospective",
custom_donor_pool=None,
# VERBOSITY
progress=True,
**kwargs
):
r"""
@ -57,7 +52,7 @@ def fit(
:param w_pen: Penalty applied to the difference
between the current weights and the null weights (1/n). default
provided by :func:``w_pen_guestimate``.
:type w_pen: float, Optional
:type w_pen: float | float[], optional
:param v_pen: penalty
(penalties) applied to the magnitude of the covariate weights.
@ -66,7 +61,7 @@ def fit(
:type v_pen: float | float[], optional
:param grid: only used when `v_pen` is not provided.
Defaults to ``np.exp(np.linspace(np.log(grid_min),np.log(grid_max),grid_points))``
Defaults to ``np.exp(np.linspace(np.log(grid_min),np.log(grid_max),grid_length))``
:type grid: float | float[], optional
:param grid_min: Lower bound for ``grid`` when
@ -79,9 +74,12 @@ def fit(
range ``(0,1]``
:type grid_max: float, default = 1
:param grid_points: number of points in the ``grid`` parameter when
:param grid_length: number of points in the ``grid`` parameter when
``v_pen`` and ``grid`` are not provided
:type grid_points: int, default = 20
:type grid_length: int, default = 20
:param grid_iterations: number of times the grid should be alternated between ``v_pen`` and ``w_pen``
:type grid_iterations: int, default = 20
:param choice: Method for choosing from among the
v_pen. Only used when v_pen is an
@ -164,9 +162,153 @@ def fit(
``iterable``, or when model_type is not one of the allowed values
"""
assert X.shape[0] == Y.shape[0]
w_pen_is_iterable = False
try:
iter(w_pen)
except TypeError:
pass
else:
if v_pen is None:
raise ValueError("When v_pen is an iterable, v_pen must be provided")
w_pen_is_iterable = True
v_pen_is_iterable = False
try:
iter(v_pen)
except TypeError:
pass
else:
v_pen_is_iterable = True
if w_pen is None:
raise ValueError("When v_pen is an iterable, w_pen must be provided")
if v_pen_is_iterable and w_pen_is_iterable:
raise ValueError("Features and Weights penalties are both iterables")
if v_pen_is_iterable or w_pen_is_iterable:
return _fit(X, Y, treated_units, w_pen, v_pen, **kwargs)
if v_pen is not None and w_pen is not None:
return _fit(X, Y, treated_units, w_pen, v_pen, **kwargs)
# Here, either v_pen or w_pen is None (possibly both)
verbose = kwargs.get("verbose", 1)
if grid is None:
grid = np.exp(np.linspace(np.log(grid_min), np.log(grid_max), grid_length))
if treated_units is not None:
control_units = [u for u in range(Y.shape[0]) if u not in treated_units]
_X, _Y = X[control_units, :], Y[control_units, :]
else:
_X, _Y = X, Y
model_fits = []
last_axis = None
while grid_iterations > 0:
grid_iterations -= 1
v_pen, w_pen, axis = _build_penalties(
_X, _Y, v_pen, w_pen, grid, gradient_folds, verbose
)
if last_axis:
assert axis != last_axis
model_fit = _fit(X, Y, treated_units, w_pen, v_pen, **kwargs)
model_fits.append(model_fit)
if axis == "v_pen":
v_pen, w_pen = model_fit.fitted_v_pen, None
else:
v_pen, w_pen = None, model_fit.fitted_w_pen
last_axis = axis
# RETURN THE FINAL MODEL AND ATTACH PREVIOUS MODEL FITS TO THE RESULT
out = model_fits.pop()
out.model_fits = model_fits
return out
def _build_penalties(X, Y, v_pen, w_pen, grid, gradient_folds, verbose):
""" Build (sensible?) defaults for the v_pen and w_pen
"""
if w_pen is None:
if v_pen is None:
# use the guestimate for w_pen and generate a grid based sequence for v_pen
w_pen = w_pen_guestimate(X)
v_pen_max = get_max_v_pen(
X, Y, w_pen=w_pen, grad_splits=gradient_folds, verbose=verbose
)
axis = "v_pen"
v_pen = grid * v_pen_max
else:
w_pen_max = get_max_w_pen(
X, Y, v_pen=v_pen, grad_splits=gradient_folds, verbose=verbose
)
axis = "w_pen"
w_pen = grid * w_pen_max
else: # w_pen is not None:
v_pen_max = get_max_v_pen(
X, Y, w_pen=w_pen, grad_splits=gradient_folds, verbose=verbose
)
axis = "v_pen"
v_pen = grid * v_pen_max
return v_pen, w_pen, axis
def _fit(
X,
Y,
treated_units=None,
w_pen=None, # Float
v_pen=None, # Float or an array of floats
# PARAMETERS USED TO CONSTRUCT DEFAULT GRID COVARIATE_PENALTIES
choice="min",
cv_folds=10,
gradient_folds=10,
gradient_seed=10101,
model_type="retrospective",
custom_donor_pool=None,
# VERBOSITY
progress=True,
**kwargs
):
assert X.shape[0] == Y.shape[0]
# -- verbose = kwargs.get("verbose", 1)
if (not callable(choice)) and (choice not in ("min",)):
raise ValueError("Unexpected value for choice parameter: %s" % choice)
w_pen_is_iterable = False
try:
iter(w_pen)
except TypeError:
pass
else:
if v_pen is None:
raise ValueError("When v_pen is an iterable, v_pen must be provided")
w_pen_is_iterable = True
v_pen_is_iterable = False
try:
iter(v_pen)
except TypeError:
pass
else:
v_pen_is_iterable = True
if w_pen is None:
raise ValueError("When v_pen is an iterable, w_pen must be provided")
if v_pen_is_iterable and w_pen_is_iterable:
raise ValueError("Features and Weights penalties are both iterables")
if treated_units is not None:
# --------------------------------------------------
@ -194,21 +336,8 @@ def fit(
Ytest = Y[treated_units, :]
# --------------------------------------------------
# (sensible?) defaults
# Actual work
# --------------------------------------------------
# Get the weight penalty guestimate: very quick ( milliseconds )
if w_pen is None:
w_pen = w_pen_guestimate(Xtrain)
if v_pen is None:
if grid is None:
grid = np.exp(
np.linspace(np.log(grid_min), np.log(grid_max), grid_points)
)
# GET THE MAXIMUM v_penS: quick ~ ( seconds to tens of seconds )
v_pen_max = get_max_v_pen(
Xtrain, Ytrain, w_pen=w_pen, grad_splits=gradient_folds, verbose=verbose
)
v_pen = grid * v_pen_max
if model_type == "retrospective":
# Retrospective Treatment Effects: ( *model_type = "prospective"*)
@ -223,8 +352,8 @@ def fit(
Y=Ytrain,
splits=cv_folds,
v_pen=v_pen,
progress=progress,
w_pen=w_pen,
progress=progress,
grad_splits=gradient_folds,
random_state=gradient_seed, # TODO: Cleanup Task 1
quiet=not progress,
@ -232,7 +361,15 @@ def fit(
)
# GET THE INDEX OF THE BEST SCORE
best_v_pen = __choose(scores, v_pen, choice)
if w_pen_is_iterable:
best_v_pen = v_pen
best_w_pen = __choose(scores, w_pen, choice)
elif v_pen_is_iterable:
best_w_pen = w_pen
best_v_pen = __choose(scores, v_pen, choice)
else:
best_v_pen = v_pen
best_w_pen = w_pen
# --------------------------------------------------
# Phase 2: extract V and weights: slow ( tens of seconds to minutes )
@ -241,6 +378,7 @@ def fit(
best_V = tensor(
X=Xtrain,
Y=Ytrain,
w_pen=best_w_pen,
v_pen=best_v_pen,
grad_splits=gradient_folds,
random_state=gradient_seed, # TODO: Cleanup Task 1
@ -304,8 +442,8 @@ def fit(
Y=Y,
splits=cv_folds,
v_pen=v_pen,
progress=progress,
w_pen=w_pen,
progress=progress,
grad_splits=gradient_folds,
random_state=gradient_seed, # TODO: Cleanup Task 1
quiet=not progress,
@ -313,7 +451,15 @@ def fit(
)
# GET THE INDEX OF THE BEST SCORE
best_v_pen = __choose(scores, v_pen, choice)
if w_pen_is_iterable:
best_v_pen = v_pen
best_w_pen = __choose(scores, w_pen, choice)
elif v_pen_is_iterable:
best_w_pen = w_pen
best_v_pen = __choose(scores, v_pen, choice)
else:
best_v_pen = v_pen
best_w_pen = w_pen
# --------------------------------------------------
# Phase 2: extract V and weights: slow ( tens of seconds to minutes )
@ -322,6 +468,7 @@ def fit(
best_V = tensor(
X=X,
Y=Y,
w_pen=best_w_pen,
v_pen=best_v_pen,
grad_splits=gradient_folds,
random_state=gradient_seed, # TODO: Cleanup Task 1
@ -353,7 +500,15 @@ def fit(
)
# GET THE INDEX OF THE BEST SCORE
best_v_pen = __choose(scores, v_pen, choice)
if w_pen_is_iterable:
best_v_pen = v_pen
best_w_pen = __choose(scores, w_pen, choice)
elif v_pen_is_iterable:
best_w_pen = w_pen
best_v_pen = __choose(scores, v_pen, choice)
else:
best_v_pen = v_pen
best_w_pen = w_pen
# --------------------------------------------------
# Phase 2: extract V and weights: slow ( tens of seconds to minutes )
@ -364,6 +519,7 @@ def fit(
Y=Ytrain,
X_treat=Xtest,
Y_treat=Ytest,
w_pen=best_w_pen,
v_pen=best_v_pen,
**kwargs
)
@ -382,10 +538,10 @@ def fit(
custom_donor_pool_t = custom_donor_pool[treated_units, :]
custom_donor_pool_c = custom_donor_pool[control_units, :]
sc_weights[treated_units, :] = weights(
Xtrain, Xtest, V=best_V, w_pen=w_pen, custom_donor_pool=custom_donor_pool_t
Xtrain, Xtest, V=best_V, w_pen=best_w_pen, custom_donor_pool=custom_donor_pool_t
)
sc_weights[control_units, :] = weights(
Xtrain, V=best_V, w_pen=w_pen, custom_donor_pool=custom_donor_pool_c
Xtrain, V=best_V, w_pen=best_w_pen, custom_donor_pool=custom_donor_pool_c
)
else:
@ -396,24 +552,6 @@ def fit(
control_units = None
# --------------------------------------------------
# (sensible?) defaults
# --------------------------------------------------
if v_pen is None:
if grid is None:
grid = np.exp(
np.linspace(np.log(grid_min), np.log(grid_max), grid_points)
)
# GET THE MAXIMUM v_penS: quick ~ ( seconds to tens of seconds )
v_pen_max = get_max_v_pen(
X, Y, w_pen=w_pen, grad_splits=gradient_folds, verbose=verbose
)
v_pen = grid * v_pen_max
# Get the weight penalty guestimate: very quick ( milliseconds )
if w_pen is None:
w_pen = w_pen_guestimate(X)
# --------------------------------------------------
# Phase 1: extract cross fold residual errors for each v_pen
# --------------------------------------------------
@ -433,7 +571,15 @@ def fit(
)
# GET THE INDEX OF THE BEST SCORE
best_v_pen = __choose(scores, v_pen, choice)
if w_pen_is_iterable:
best_v_pen = v_pen
best_w_pen = __choose(scores, w_pen, choice)
elif v_pen_is_iterable:
best_w_pen = w_pen
best_v_pen = __choose(scores, v_pen, choice)
else:
best_v_pen = v_pen
best_w_pen = w_pen
# --------------------------------------------------
# Phase 2: extract V and weights: slow ( tens of seconds to minutes )
@ -442,6 +588,7 @@ def fit(
best_V = tensor(
X=X,
Y=Y,
w_pen=best_w_pen,
v_pen=best_v_pen,
grad_splits=gradient_folds,
random_state=gradient_seed, # TODO: Cleanup Task 1
@ -450,22 +597,24 @@ def fit(
# GET THE BEST SET OF WEIGHTS
sc_weights = weights(
X, V=best_V, w_pen=w_pen, custom_donor_pool=custom_donor_pool
X, V=best_V, w_pen=best_w_pen, custom_donor_pool=custom_donor_pool
)
return SparseSCFit(
X,
Y,
control_units,
treated_units,
model_type,
X=X,
Y=Y,
control_units=control_units,
treated_units=treated_units,
model_type=model_type,
# fitting parameters
best_v_pen,
w_pen,
v_pen,
best_V,
fitted_v_pen=best_v_pen,
fitted_w_pen=best_w_pen,
initial_w_pen=w_pen,
initial_v_pen=v_pen,
V=best_V,
# Fitted Synthetic Controls
sc_weights,
sc_weights=sc_weights,
scores=scores,
)
@ -476,18 +625,15 @@ def __choose(scores, penalties, choice):
try:
iter(penalties)
except TypeError:
best_penalty_parameter = penalties
return penalties
else:
if choice == "min":
best_i = np.argmin(scores)
best_penalty_parameter = penalties[best_i]
elif callable(choice):
best_penalty_parameter = choice(scores)
else:
# TODO: this is a terrible place to throw this error
raise ValueError("Unexpected value for choice parameter: %s" % choice)
return penalties[np.argmin(scores)]
return best_penalty_parameter
if callable(choice):
return choice(scores)
raise ValueError("Unexpected value for choice parameter: %s" % choice)
class SparseSCFit(object):
@ -504,12 +650,14 @@ class SparseSCFit(object):
treated_units,
model_type,
# fitting parameters:
V_penalty,
w_pen,
v_pen,
fitted_v_pen,
fitted_w_pen,
initial_v_pen,
initial_w_pen,
V,
# Fitted Synthetic Controls:
sc_weights,
scores,
):
# DATA
@ -520,16 +668,19 @@ class SparseSCFit(object):
self.model_type = model_type
# FITTING PARAMETERS
self.V_penalty = V_penalty
self.w_pen = w_pen
self.v_pen = v_pen
self.fitted_w_pen = fitted_w_pen
self.fitted_v_pen = fitted_v_pen
self.initial_w_pen = initial_w_pen
self.initial_v_pen = initial_v_pen
self.V = V
self.scores = scores
# FITTED SYNTHETIC CONTROLS
self.sc_weights = sc_weights
def predict(self, Ydonor=None):
""" predict method
"""
predict method
"""
if Ydonor is None:
if self.model_type != "full":
@ -544,22 +695,11 @@ class SparseSCFit(object):
"""
return _SparseFit_string_template % (
self.model_type,
self.V_penalty,
self.w_pen,
self.fitted_v_pen,
self.fitted_w_pen,
np.diag(self.V),
)
# TODO: CALCULATE ERRORS AND R-SQUARED'S
# ct_prediction_error = Y_SC_test - Ytest
# null_model_error = Ytest - np.mean(Xtest)
# betternull_model_error = (Ytest.T - np.mean(Xtest,1)).T
# print("#--------------------------------------------------")
# print("OUTER FOLD %s OF %s: Group Mean R-squared: %0.3f%%; Individual Mean R-squared: %0.3f%%" % (
# i + 1,
# 100*(1 - np.power(ct_prediction_error,2).sum() / np.power(null_model_error,2).sum()) ,
# 100*(1 - np.power(ct_prediction_error,2).sum() /np.power(betternull_model_error,2).sum() )))
# print("#--------------------------------------------------")
def show(self):
""" display goodness of figures illustrating goodness of fit
"""
@ -571,3 +711,15 @@ V penalty: %s
W penalty: %s
V: %s
"""
# TODO: CALCULATE ERRORS AND R-SQUARED'S
# ct_prediction_error = Y_SC_test - Ytest
# null_model_error = Ytest - np.mean(Xtest)
# betternull_model_error = (Ytest.T - np.mean(Xtest,1)).T
# print("#--------------------------------------------------")
# print("OUTER FOLD %s OF %s: Group Mean R-squared: %0.3f%%; Individual Mean R-squared: %0.3f%%" % (
# i + 1,
# 100*(1 - np.power(ct_prediction_error,2).sum() / np.power(null_model_error,2).sum()) ,
# 100*(1 - np.power(ct_prediction_error,2).sum() /np.power(betternull_model_error,2).sum() )))
# print("#--------------------------------------------------")

Просмотреть файл

@ -130,15 +130,15 @@ def cdl_search(
else:
val0 = score(np.zeros(x_curr.shape[0]))
# -- if (x_curr == 0).all():
# -- # Force a single step away form the origin if it is at least a little
# -- # useful. Intuition: the curvature at the origin is typically
# -- # exceedingly sharp (becasue we're going from a state with "no
# -- # information" to "some information" in the covariate space, and as
# -- # result the strong wolf conditions will have a strong tendency to
# -- # fail. However, the origin is rarely optimal so forcing a step away
# -- # form the origin will be necessary in most cases.
# -- x_curr, val = cdl_step (score, guess, jac, val, learning_rate, zero_eps, print_path)
# if (x_curr == 0).all():
# # Force a single step away form the origin if it is at least a little
# # useful. Intuition: the curvature at the origin is typically
# # exceedingly sharp (becasue we're going from a state with "no
# # information" to "some information" in the covariate space, and as
# # result the strong wolf conditions will have a strong tendency to
# # fail. However, the origin is rarely optimal so forcing a step away
# # form the origin will be necessary in most cases.
# x_curr, val = cdl_step (score, guess, jac, val, learning_rate, zero_eps, print_path)
for _i in range(max_iter):

Просмотреть файл

@ -71,6 +71,13 @@ def tensor(X, Y, X_treat=None, Y_treat=None, grad_splits=None, **kwargs):
else:
# Fit the control units to themselves; Y may contain post-intervention outcomes:
adjusted=False
if kwargs["w_pen"] < 1:
adjusted=True
#-- new_pen = 1.5
#-- print("w_pen: %s -> %s"% (kwargs["w_pen"], new_pen))
#-- kwargs["w_pen"] = new_pen
if grad_splits is not None:
_, v_mat, _, _, _, _ = fold_v_matrix(
X=X,
@ -82,6 +89,9 @@ def tensor(X, Y, X_treat=None, Y_treat=None, grad_splits=None, **kwargs):
**kwargs
)
if adjusted:
print("vmat: %s" % (np.diag(v_mat)))
else:
_, v_mat, _, _, _, _ = loo_v_matrix(
X=X,

Просмотреть файл

@ -4,19 +4,25 @@ Tests for model fitness
import unittest
import numpy as np
import random
import SparseSC as SC
from os.path import join, abspath, dirname
from dgp.factor_model import factor_dgp
# import matplotlib.pyplot as plt
exec(
exec(# pylint: disable=exec-used
open(join(dirname(abspath(__file__)), "..", "examples", "example_graphs.py")).read()
) # if we don't want an __init__.py
# pylint: disable=no-self-use
class TestDGPs(unittest.TestCase):
"""
testing fixture
"""
def testSimpleTrendDGP(self):
"""
No X, just Y; half the donors are great, other half are bad
@ -30,23 +36,35 @@ class TestDGPs(unittest.TestCase):
T = T0 + T1
proto_sim = np.array(range(0, T, 1), ndmin=2)
proto_not = np.array(range(0, 2 * T, 2), ndmin=2)
proto_not[0,2] += 1
te = np.hstack((np.zeros((1, T0)), np.full((1, T0), 2)))
proto_tr = proto_sim + te
Y1 = np.matmul(np.ones((N1, 1)), proto_tr)
Y0_sim = np.matmul(np.ones((N0_sim, 1)), proto_sim)
Y0_not = np.matmul(np.ones((N0_not, 1)), proto_not)
Y = np.vstack((Y1, Y0_sim, Y0_not))
ret_full = SC.estimate_effects(
Y[:, :T0], Y[:, T0:], treated_units, ret_CI=True, max_n_pl=200
) # just getting V_pen
V_penalty = ret_full.fit.V_penalty
#Y += np.random.normal(0, 0.01, Y.shape)
# OPTIMIZE OVER THE V_PEN'S
ret_full = SC.estimate_effects(
Y[:, :T0],
Y[:, T0:],
treated_units,
ret_CI=True,
max_n_pl=200,
grid_min=1e-2,
w_pen=1e6, # 1e-11,
) # just getting V_pen
V_penalty = ret_full.fit.fitted_v_pen
# OPTIMIZE OVER THE W_PEN'S
ret = SC.estimate_effects(
Y[:, :T0],
Y[:, T0:],
treated_units,
v_pen=[V_penalty],
w_pen=0.00000000001,
v_pen=V_penalty,
w_pen=1e-6, # 1e-11,
ret_CI=True,
)
Y_sc = ret.fit.predict(Y[control_units, :])
@ -55,10 +73,15 @@ class TestDGPs(unittest.TestCase):
# print(weight_sums[0])
# print(np.mean(weight_sums[0]))
print(ret_full.fit.scores)
p_value = ret.p_value
print("p-value: %s" % p_value)
print( ret.CI)
import pdb; pdb.set_trace()
# print(ret)
assert 2 in ret.CI, "Confidence interval does not include the true effect"
p_value = ret.p_value
assert p_value is not None
# import pdb; pdb.set_trace()
assert p_value < 0.001, "P-value is larger than expected"
# [sc_raw, sc_diff] = ind_sc_plots(Y[0, :], Y_sc[0, :], T0, ind_ci=ret.ind_CI)
@ -75,6 +98,9 @@ class TestDGPs(unittest.TestCase):
# # plt.show()
def testFactorDGP(self):
"""
factor dbp based test
"""
N1, N0 = 2, 100
treated_units = [0, 1]
T0, T1 = 20, 10
@ -109,6 +135,8 @@ class TestDGPs(unittest.TestCase):
if __name__ == "__main__":
import random
random.seed(12345)
np.random.seed(10101)