CATE validation - uplift uniform confidence bands (#840)

Add support for multiplier bootstrap uniform confidence band error bars for uplift curves
This commit is contained in:
Amar Venugopal 2024-03-19 09:54:10 -07:00 коммит произвёл GitHub
Родитель ed4fe33b2b
Коммит 27d3101e23
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
7 изменённых файлов: 381 добавлений и 149 удалений

Просмотреть файл

@ -147,6 +147,20 @@ CATE Interpreters
econml.cate_interpreter.SingleTreeCateInterpreter
econml.cate_interpreter.SingleTreePolicyInterpreter
.. _validation_api:
CATE Validation
---------------
.. autosummary::
:toctree: _autosummary
econml.validate.DRTester
econml.validate.BLPEvaluationResults
econml.validate.CalibrationEvaluationResults
econml.validate.UpliftEvaluationResults
econml.validate.EvaluationResults
.. _scorers_api:
CATE Scorers

Просмотреть файл

@ -5,7 +5,7 @@ import pandas as pd
import scipy.stats as st
from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor
from econml.validate.drtester import DRtester
from econml.validate.drtester import DRTester
from econml.dml import DML
@ -70,7 +70,7 @@ class TestDRTester(unittest.TestCase):
).fit(Y=Ytrain, T=Dtrain, X=Xtrain)
# test the DR outcome difference
my_dr_tester = DRtester(
my_dr_tester = DRTester(
model_regression=reg_y,
model_propensity=reg_t,
cate=cate
@ -123,7 +123,7 @@ class TestDRTester(unittest.TestCase):
).fit(Y=Ytrain, T=Dtrain, X=Xtrain)
# test the DR outcome difference
my_dr_tester = DRtester(
my_dr_tester = DRTester(
model_regression=reg_y,
model_propensity=reg_t,
cate=cate
@ -148,8 +148,8 @@ class TestDRTester(unittest.TestCase):
self.assertRaises(ValueError, res.plot_toc, k)
else: # real treatment, k = 1
self.assertTrue(res.plot_cal(k) is not None)
self.assertTrue(res.plot_qini(k) is not None)
self.assertTrue(res.plot_toc(k) is not None)
self.assertTrue(res.plot_qini(k, 'ucb2') is not None)
self.assertTrue(res.plot_toc(k, 'ucb1') is not None)
self.assertLess(res_df.blp_pval.values[0], 0.05) # heterogeneity
self.assertGreater(res_df.cal_r_squared.values[0], 0) # good R2
@ -171,7 +171,7 @@ class TestDRTester(unittest.TestCase):
).fit(Y=Ytrain, T=Dtrain, X=Xtrain)
# test the DR outcome difference
my_dr_tester = DRtester(
my_dr_tester = DRTester(
model_regression=reg_y,
model_propensity=reg_t,
cate=cate
@ -193,8 +193,8 @@ class TestDRTester(unittest.TestCase):
for kwargs in [{}, {'Xval': Xval}]:
with self.assertRaises(Exception) as exc:
my_dr_tester.evaluate_cal(kwargs)
self.assertTrue(
str(exc.exception) == "Must fit nuisance models on training sample data to use calibration test"
self.assertEqual(
str(exc.exception), "Must fit nuisance models on training sample data to use calibration test"
)
def test_exceptions(self):
@ -212,7 +212,7 @@ class TestDRTester(unittest.TestCase):
).fit(Y=Ytrain, T=Dtrain, X=Xtrain)
# test the DR outcome difference
my_dr_tester = DRtester(
my_dr_tester = DRTester(
model_regression=reg_y,
model_propensity=reg_t,
cate=cate
@ -223,11 +223,11 @@ class TestDRTester(unittest.TestCase):
with self.assertRaises(Exception) as exc:
func()
if func.__name__ == 'evaluate_cal':
self.assertTrue(
str(exc.exception) == "Must fit nuisance models on training sample data to use calibration test"
self.assertEqual(
str(exc.exception), "Must fit nuisance models on training sample data to use calibration test"
)
else:
self.assertTrue(str(exc.exception) == "Must fit nuisances before evaluating")
self.assertEqual(str(exc.exception), "Must fit nuisances before evaluating")
my_dr_tester = my_dr_tester.fit_nuisance(
Xval, Dval, Yval, Xtrain, Dtrain, Ytrain
@ -242,12 +242,12 @@ class TestDRTester(unittest.TestCase):
with self.assertRaises(Exception) as exc:
func()
if func.__name__ == 'evaluate_blp':
self.assertTrue(
str(exc.exception) == "CATE predictions not yet calculated - must provide Xval"
self.assertEqual(
str(exc.exception), "CATE predictions not yet calculated - must provide Xval"
)
else:
self.assertTrue(str(exc.exception) ==
"CATE predictions not yet calculated - must provide both Xval, Xtrain")
self.assertEqual(str(exc.exception),
"CATE predictions not yet calculated - must provide both Xval, Xtrain")
for func in [
my_dr_tester.evaluate_cal,
@ -256,19 +256,19 @@ class TestDRTester(unittest.TestCase):
]:
with self.assertRaises(Exception) as exc:
func(Xval=Xval)
self.assertTrue(
str(exc.exception) == "CATE predictions not yet calculated - must provide both Xval, Xtrain")
self.assertEqual(
str(exc.exception), "CATE predictions not yet calculated - must provide both Xval, Xtrain")
cal_res = my_dr_tester.evaluate_cal(Xval, Xtrain)
self.assertGreater(cal_res.cal_r_squared[0], 0) # good R2
with self.assertRaises(Exception) as exc:
my_dr_tester.evaluate_uplift(metric='blah')
self.assertTrue(
str(exc.exception) == "Unsupported metric - must be one of ['toc', 'qini']"
self.assertEqual(
str(exc.exception), "Unsupported metric 'blah' - must be one of ['toc', 'qini']"
)
my_dr_tester = DRtester(
my_dr_tester = DRTester(
model_regression=reg_y,
model_propensity=reg_t,
cate=cate
@ -278,5 +278,11 @@ class TestDRTester(unittest.TestCase):
qini_res = my_dr_tester.evaluate_uplift(Xval, Xtrain)
self.assertLess(qini_res.pvals[0], 0.05)
with self.assertRaises(Exception) as exc:
qini_res.plot_uplift(tmt=1, err_type='blah')
self.assertEqual(
str(exc.exception), "Invalid error type 'blah'; must be one of [None, 'ucb2', 'ucb1']"
)
autoc_res = my_dr_tester.evaluate_uplift(Xval, Xtrain, metric='toc')
self.assertLess(autoc_res.pvals[0], 0.05)

Просмотреть файл

@ -5,7 +5,9 @@
A suite of validation methods for CATE models.
"""
from .drtester import DRtester
from .drtester import DRTester
from .results import BLPEvaluationResults, CalibrationEvaluationResults, UpliftEvaluationResults, EvaluationResults
__all__ = ['DRtester']
__all__ = ['DRTester',
'BLPEvaluationResults', 'CalibrationEvaluationResults', 'UpliftEvaluationResults', 'EvaluationResults']

Просмотреть файл

@ -8,12 +8,13 @@ from sklearn.model_selection import cross_val_predict, StratifiedKFold, KFold
from statsmodels.api import OLS
from statsmodels.tools import add_constant
from econml.utilities import deprecated
from .results import CalibrationEvaluationResults, BLPEvaluationResults, UpliftEvaluationResults, EvaluationResults
from .utils import calculate_dr_outcomes, calc_uplift
class DRtester:
class DRTester:
"""
Validation tests for CATE models. Includes the best linear predictor (BLP) test as in Chernozhukov et al. (2022),
the calibration test in Dwivedi et al. (2020), and the QINI coefficient as in Radcliffe (2007).
@ -46,47 +47,62 @@ class DRtester:
The calibration r-squared metric is similar to the standard R-square score in that it can take any value
less than or equal to 1, with scores closer to 1 indicating a better calibrated CATE model.
**QINI**
**Uplift Modeling**
Units are ordered by predicted CATE values and a running measure of the average treatment effect in each cohort is
kept as we progress through ranks. The QINI coefficient is then the area under the resulting curve, with a value
of 0 interpreted as corresponding to a model with randomly assigned CATE coefficients. All calculations are
performed on validation dataset results, using the training set as input.
kept as we progress through ranks. The resulting TOC curve can then be plotted and its integral calculated and used
as a measure of true heterogeneity captured by the CATE model; this integral is referred to as the AUTOC (area
under TOC). The QINI curve is a variant of this curve that also incorporates treatment probability; its integral is
referred to as the QINI coefficient.
More formally, the QINI curve is given by the following function:
More formally, the TOC and QINI curves are given by the following functions:
.. math::
\\tau_{TOC}(q) = \\mathrm{Cov}(
Y^{DR}(g,p),
\\frac{
\\mathbb{1}\\{\\hat{\\tau}(Z) \\geq \\hat{\\mu}(q)\\}
}{
\\mathrm{Pr}(\\hat{\\tau}(Z) \\geq \\hat{\\mu}(q))
}
)
\\tau_{QINI}(q) = \\mathrm{Cov}(Y^{DR}(g,p), \\mathbb{1}\\{\\hat{\\tau}(Z) \\geq \\hat{\\mu}(q)\\})
Where :math:`q` is the desired quantile, :math:`\\hat{\\mu}` is the quantile function, and :math:`\\hat{\\tau}` is
the predicted CATE function.
:math:`Y^{DR}(g,p)` refers to the doubly robust outcome difference (relative to control) for the given observation.
The QINI coefficient is then given by:
The AUTOC and QINI coefficient are then given by:
.. math::
AUTOC = \\int_0^1 \\tau_{TOC}(q) dq
QINI = \\int_0^1 \\tau_{QINI}(q) dq
Parameters
----------
model_regression: estimator
Nuisance model estimator used to fit the outcome to features. Must be able to implement `fit' and `predict'
Nuisance model estimator used to fit the outcome to features. Must be able to implement `fit` and `predict`
methods
model_propensity: estimator
Nuisance model estimator used to fit the treatment assignment to features. Must be able to implement `fit'
method and either `predict' (in the case of binary treatment) or `predict_proba' methods (in the case of
Nuisance model estimator used to fit the treatment assignment to features. Must be able to implement `fit`
method and either `predict` (in the case of binary treatment) or `predict_proba` methods (in the case of
multiple categorical treatments).
n_splits: integer, default 5
Number of splits used to generate cross-validated predictions
cate: estimator
Fitted conditional average treatment effect (CATE) estimator to be validated.
cv: int or list, default 5
Splitter used for cross-validation. Can be either an integer (corresponding to the number of desired folds)
or a list of indices corresponding to membership in each fold.
References
----------
[Chernozhukov2022] V. Chernozhukov et al.
Generic Machine Learning Inference on Heterogeneous Treatment Effects in Randomized Experiments
arXiv preprint arXiv:1712.04802, 2022.
@ -97,7 +113,6 @@ class DRtester:
arXiv preprint arXiv:2008.10109, 2020.
`<https://arxiv.org/abs/2008.10109>`_
[Radcliffe2007] N. Radcliffe
Using control groups to target on predicted lift: Building and assessing uplift model.
Direct Marketing Analytics Journal (2007), pages 1421.
@ -175,7 +190,7 @@ class DRtester:
Generates nuisance predictions and calculates doubly robust (DR) outcomes either by (1) cross-fitting in the
validation sample, or (2) fitting in the training sample and applying to the validation sample. If Xtrain,
Dtrain, and ytrain are all not None, then option (2) will be implemented, otherwise, option (1) will be
implemented. In order to use the `evaluate_cal' method then Xtrain, Dtrain, and ytrain must all be specified.
implemented. In order to use the `evaluate_cal` method then Xtrain, Dtrain, and ytrain must all be specified.
Parameters
----------
@ -186,12 +201,12 @@ class DRtester:
the control status be equal to 0, and all other treatments integers starting at 1.
yval: vector of length n_val
Outcomes for the validation sample
Xtrain: (n_train x k) matrix or vector of length n, default ``None``
Xtrain: (n_train x k) matrix or vector of length n, optional
Features used in nuisance models for training sample
Dtrain: vector of length n_train, default ``None''
Dtrain: vector of length n_train, optional
Treatment assignment of training sample. Control status must be minimum value. It is recommended to have
the control status be equal to 0, and all other treatments integers starting at 1.
ytrain: vector of length n_train, defaul ``None``
ytrain: vector of length n_train, optional
Outcomes for the training sample
Returns
@ -332,7 +347,7 @@ class DRtester:
----------
Xval: (n_val x n_treatment) matrix
Validation set features to be used to predict (and potentially fit) DR outcomes in CATE model
Xtrain (n_train x n_treatment) matrix, defaul ``None``
Xtrain (n_train x n_treatment) matrix, optional
Training set features used to fit CATE model
Returns
@ -359,11 +374,11 @@ class DRtester:
Parameters
----------
Xval: (n_val x n_treatment) matrix, default ``None``
Validation sample features for CATE model. If not specified, then `fit_cate' method must already have been
Xval: (n_val x n_treatment) matrix, optional
Validation sample features for CATE model. If not specified, then `fit_cate` method must already have been
implemented
Xtrain: (n_train x n_treatment) matrix, default ``None``
Training sample features for CATE model. If not specified, then `fit cate' method must already have been
Xtrain: (n_train x n_treatment) matrix, optional
Training sample features for CATE model. If not specified, then `fit cate` method must already have been
implemented (with Xtrain specified)
n_groups: integer, default 4
Number of quantile-based groups used to calculate calibration score.
@ -433,17 +448,17 @@ class DRtester:
Xtrain: np.array = None
) -> BLPEvaluationResults:
"""
Implements the best linear predictor (BLP) test as in [Chernozhukov2022]. `fit_nusiance' method must already
Implements the best linear predictor (BLP) test as in [Chernozhukov2022]. `fit_nusiance` method must already
be implemented.
Parameters
----------
Xval: (n_val x k) matrix, default ``None''
Validation sample features for CATE model. If not specified, then `fit_cate' method must already have been
Xval: (n_val x k) matrix, optional
Validation sample features for CATE model. If not specified, then `fit_cate` method must already have been
implemented
Xtrain: (n_train x k) matrix, default ``None''
Xtrain: (n_train x k) matrix, optional
Training sample features for CATE model. If specified, then CATE is fitted on training sample and applied
to Xval. If specified, then Xtrain, Dtrain, Ytrain must have been specified in `fit_nuisance' method (and
to Xval. If specified, then Xtrain, Dtrain, Ytrain must have been specified in `fit_nuisance` method (and
vice-versa)
Returns
@ -489,29 +504,32 @@ class DRtester:
Xval: np.array = None,
Xtrain: np.array = None,
percentiles: np.array = np.linspace(5, 95, 50),
metric: str = 'qini'
metric: str = 'qini',
n_bootstrap: int = 1000
) -> UpliftEvaluationResults:
"""
Calculates QINI coefficient for the given model as in Radcliffe (2007), where units are ordered by predicted
Calculates uplift curves and coefficients for the given model, where units are ordered by predicted
CATE values and a running measure of the average treatment effect in each cohort is kept as we progress
through ranks. The QINI coefficient is then the area under the resulting curve, with a value of 0 interpreted
through ranks. The uplift coefficient is then the area under the resulting curve, with a value of 0 interpreted
as corresponding to a model with randomly assigned CATE coefficients. All calculations are performed on
validation dataset results, using the training set as input.
Parameters
----------
Xval: (n_val x k) matrix, default ``None''
Validation sample features for CATE model. If not specified, then `fit_cate' method must already have been
Xval: (n_val x k) matrix, optional
Validation sample features for CATE model. If not specified, then `fit_cate` method must already have been
implemented
Xtrain: (n_train x k) matrix, default ``None''
Xtrain: (n_train x k) matrix, optional
Training sample features for CATE model. If specified, then CATE is fitted on training sample and applied
to Xval. If specified, then Xtrain, Dtrain, Ytrain must have been specified in `fit_nuisance' method (and
to Xval. If specified, then Xtrain, Dtrain, Ytrain must have been specified in `fit_nuisance` method (and
vice-versa)
percentiles: one-dimensional array, default ``np.linspace(5, 95, 50)''
percentiles: one-dimensional array, default ``np.linspace(5, 95, 50)``
Array of percentiles over which the QINI curve should be constructed. Defaults to 5%-95% in intervals of
5%.
metric: string, default 'qini'
Which type of uplift curve to evaluate. Must be one of ['toc', 'qini']
n_bootstrap: integer, default 1000
Number of bootstrap samples to run when calculating uniform confidence bands.
Returns
-------
@ -532,7 +550,8 @@ class DRtester:
self.cate_preds_val_,
self.dr_val_,
percentiles,
metric
metric,
n_bootstrap
)
coeffs = [coeff]
errs = [err]
@ -546,7 +565,8 @@ class DRtester:
self.cate_preds_val_[:, k],
self.dr_val_[:, k],
percentiles,
metric
metric,
n_bootstrap
)
coeffs.append(coeff)
errs.append(err)
@ -568,20 +588,25 @@ class DRtester:
self,
Xval: np.array = None,
Xtrain: np.array = None,
n_groups: int = 4
n_groups: int = 4,
n_bootstrap: int = 1000
) -> EvaluationResults:
"""
Implements the best linear prediction (`evaluate_blp'), calibration (`evaluate_cal'), uplift curve
('evaluate_uplift') methods
Implements the best linear prediction (`evaluate_blp`), calibration (`evaluate_cal`), uplift curve
(`evaluate_uplift`) methods
Parameters
----------
Xval: (n_cal x k) matrix, default ``None''
Validation sample features for CATE model. If not specified, then `fit_cate' method must already have been
Xval: (n_cal x k) matrix, optional
Validation sample features for CATE model. If not specified, then `fit_cate` method must already have been
implemented
Xtrain: (n_train x k) matrix, default ``None''
Training sample features for CATE model. If not specified, then `fit_cate' method must already have been
Xtrain: (n_train x k) matrix, optional
Training sample features for CATE model. If not specified, then `fit_cate` method must already have been
implemented
n_groups: integer, default 4
Number of quantile-based groups used to calculate calibration score.
n_bootstrap: integer, default 1000
Number of bootstrap samples to run when calculating uniform confidence bands for uplift curves.
Returns
-------
@ -595,8 +620,8 @@ class DRtester:
blp_res = self.evaluate_blp()
cal_res = self.evaluate_cal(n_groups=n_groups)
qini_res = self.evaluate_uplift(metric='qini')
toc_res = self.evaluate_uplift(metric='toc')
qini_res = self.evaluate_uplift(metric='qini', n_bootstrap=n_bootstrap)
toc_res = self.evaluate_uplift(metric='toc', n_bootstrap=n_bootstrap)
self.res = EvaluationResults(
blp_res=blp_res,
@ -606,3 +631,9 @@ class DRtester:
)
return self.res
@deprecated("DRtester has been renamed 'DRTester' and the old name has been deprecated and will be removed "
"in a future release. Please use 'DRTester' instead.")
class DRtester(DRTester):
pass

Просмотреть файл

@ -20,6 +20,7 @@ class CalibrationEvaluationResults:
treatments: list or numpy array of floats
Sequence of treatment labels
"""
def __init__(
self,
cal_r_squared: np.array,
@ -99,6 +100,7 @@ class BLPEvaluationResults:
treatments: list or numpy array of floats
Sequence of treatment labels
"""
def __init__(
self,
params: List[float],
@ -154,6 +156,7 @@ class UpliftEvaluationResults:
Dictionary mapping treatment levels to dataframes containing
necessary data for plotting uplift curves
"""
def __init__(
self,
params: List[float],
@ -188,7 +191,7 @@ class UpliftEvaluationResults:
}).round(3)
return res
def plot_uplift(self, tmt: Any):
def plot_uplift(self, tmt: Any, err_type: str = None):
"""
Plots uplift curves.
@ -197,6 +200,10 @@ class UpliftEvaluationResults:
tmt: any (sortable)
Name of treatment to plot.
err_type: str
Type of error to plot. Accepted values are normal (None), two-sided uniform confidence band ('ucb2'),
or 1-sided uniform confidence band ('ucb1').
Returns
-------
matplotlib plot with percentage treated on x-axis and uplift metric (and 95% CI) on y-axis
@ -205,18 +212,38 @@ class UpliftEvaluationResults:
raise ValueError(f'Invalid treatment; must be one of {self.treatments[1:]}')
df = self.curves[tmt].copy()
df['95_err'] = 1.96 * df['err']
if err_type is None:
df['95_err'] = 1.96 * df['err']
elif err_type == 'ucb2':
df['95_err'] = df['uniform_critical_value'] * df['err']
elif err_type == 'ucb1':
df['95_err'] = df['uniform_one_side_critical_value'] * df['err']
else:
raise ValueError(f"Invalid error type {err_type!r}; must be one of [None, 'ucb2', 'ucb1']")
res = self.summary()
coeff = round(res.loc[res['treatment'] == tmt]['est'].values[0], 3)
err = round(res.loc[res['treatment'] == tmt]['se'].values[0], 3)
fig = df.plot(
kind='scatter',
x='Percentage treated',
y='value',
yerr='95_err',
ylabel='Gain over Random',
title=f"Treatment = {tmt}, Integral = {coeff} +/- {err}"
)
if err_type == 'ucb1':
fig = df.plot(
kind='scatter',
x='Percentage treated',
y='value',
yerr=[[df['95_err'], np.zeros(len(df))]],
ylabel='Gain over Random',
title=f"Treatment = {tmt}, Integral = {coeff} +/- {err}"
)
else:
fig = df.plot(
kind='scatter',
x='Percentage treated',
y='value',
yerr='95_err',
ylabel='Gain over Random',
title=f"Treatment = {tmt}, Integral = {coeff} +/- {err}"
)
return fig
@ -239,6 +266,7 @@ class EvaluationResults:
toc_res: UpliftEvaluationResults object
Results object for TOC test
"""
def __init__(
self,
cal_res: CalibrationEvaluationResults,
@ -290,7 +318,7 @@ class EvaluationResults:
"""
return self.cal.plot_cal(tmt)
def plot_qini(self, tmt: int):
def plot_qini(self, tmt: int, err_type: str = None):
"""
Plots QINI curves.
@ -299,13 +327,17 @@ class EvaluationResults:
tmt: integer
Treatment level to plot
err_type: str
Type of error to plot. Accepted values are normal (None), two-sided uniform confidence band ('ucb2'),
or 1-sided uniform confidence band ('ucb1').
Returns
-------
matplotlib plot with percentage treated on x-axis and QINI value (and 95% CI) on y-axis
"""
return self.qini.plot_uplift(tmt)
return self.qini.plot_uplift(tmt, err_type)
def plot_toc(self, tmt: int):
def plot_toc(self, tmt: int, err_type: str = None):
"""
Plots TOC curves.
@ -314,8 +346,12 @@ class EvaluationResults:
tmt: integer
Treatment level to plot
err_type: str
Type of error to plot. Accepted values are normal (None), two-sided uniform confidence band ('ucb2'),
or 1-sided uniform confidence band ('ucb1').
Returns
-------
matplotlib plot with percentage treated on x-axis and TOC value (and 95% CI) on y-axis
"""
return self.toc.plot_uplift(tmt)
return self.toc.plot_uplift(tmt, err_type)

Просмотреть файл

@ -53,11 +53,14 @@ def calc_uplift(
cate_preds_val: np.array,
dr_val: np.array,
percentiles: np.array,
metric: str
metric: str,
n_bootstrap: int = 1000
) -> Tuple[float, float, pd.DataFrame]:
"""
Helper function for QINI curve generation and QINI coefficient calculation.
See documentation for "evaluate_qini" method for more details.
Helper function for uplift curve generation and coefficient calculation.
Calculates uplift curve points, integral, and errors on both points and integral.
Also calculates appropriate critical value multipliers for confidence intervals (via multiplier bootstrap).
See documentation for "drtester.evaluate_uplift" method for more details.
Parameters
----------
@ -72,6 +75,8 @@ def calc_uplift(
Array of percentiles over which the QINI curve should be constructed. Defaults to 5%-95% in intervals of 5%.
metric: string
String indicating whether to calculate TOC or QINI; should be one of ['toc', 'qini']
n_bootstrap: integer, default 1000
Number of bootstrap samples to run when calculating uniform confidence bands.
Returns
-------
@ -94,10 +99,19 @@ def calc_uplift(
toc[it] = np.mean(dr_val[inds]) - ate # tau(q) := E[Y(1) - Y(0) | tau(X) >= q[it]] - E[Y(1) - Y(0)]
toc_psi[it, :] = np.squeeze((dr_val - ate) * (inds / group_prob - 1) - toc[it])
else:
raise ValueError("Unsupported metric - must be one of ['toc', 'qini']")
raise ValueError(f"Unsupported metric {metric!r} - must be one of ['toc', 'qini']")
toc_std[it] = np.sqrt(np.mean(toc_psi[it] ** 2) / n) # standard error of tau(q)
w = np.random.normal(0, 1, size=(n, n_bootstrap))
mboot = (toc_psi / toc_std.reshape(-1, 1)) @ w / n
max_mboot = np.max(np.abs(mboot), axis=0)
uniform_critical_value = np.percentile(max_mboot, 95)
min_mboot = np.min(mboot, axis=0)
uniform_one_side_critical_value = np.abs(np.percentile(min_mboot, 5))
coeff_psi = np.sum(toc_psi[:-1] * np.diff(percentiles).reshape(-1, 1) / 100, 0)
coeff = np.sum(toc[:-1] * np.diff(percentiles) / 100)
coeff_stderr = np.sqrt(np.mean(coeff_psi ** 2) / n)
@ -105,7 +119,9 @@ def calc_uplift(
curve_df = pd.DataFrame({
'Percentage treated': 100 - percentiles,
'value': toc,
'err': toc_std
'err': toc_std,
'uniform_critical_value': uniform_critical_value,
'uniform_one_side_critical_value': uniform_one_side_critical_value
})
return coeff, coeff_stderr, curve_df

Различия файлов скрыты, потому что одна или несколько строк слишком длинны