зеркало из https://github.com/py-why/EconML.git
Enable support for sklearn 1.4
Signed-off-by: Keith Battocchi <kebatt@microsoft.com>
This commit is contained in:
Родитель
b8a5e2f3c8
Коммит
ab572dea46
|
@ -663,7 +663,7 @@ To add fixed effect heterogeneity, we can create one-hot encodings of the id, wh
|
|||
from econml.dml import LinearDML
|
||||
from sklearn.preprocessing import OneHotEncoder
|
||||
# removing one id to avoid colinearity, as is standard for fixed effects
|
||||
X_oh = OneHotEncoder(sparse=False).fit_transform(X)[:, 1:]
|
||||
X_oh = OneHotEncoder(sparse_output=False).fit_transform(X)[:, 1:]
|
||||
|
||||
est = LinearDML(model_y=RandomForestRegressor(),
|
||||
model_t=RandomForestRegressor())
|
||||
|
|
|
@ -44,7 +44,7 @@ from ._cate_estimator import (BaseCateEstimator, LinearCateEstimator,
|
|||
TreatmentExpansionMixin)
|
||||
from .inference import BootstrapInference
|
||||
from .utilities import (_deprecate_positional, check_input_arrays,
|
||||
cross_product, filter_none_kwargs, strata_from_discrete_arrays,
|
||||
cross_product, filter_none_kwargs, one_hot_encoder, strata_from_discrete_arrays,
|
||||
inverse_onehot, jacify_featurizer, ndim, reshape, shape, transpose)
|
||||
from .sklearn_extensions.model_selection import ModelSelector
|
||||
|
||||
|
@ -780,7 +780,7 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator):
|
|||
categories = self.categories
|
||||
if categories != 'auto':
|
||||
categories = [categories] # OneHotEncoder expects a 2D array with features per column
|
||||
self.transformer = OneHotEncoder(categories=categories, sparse=False, drop='first')
|
||||
self.transformer = one_hot_encoder(categories=categories, drop='first')
|
||||
self.transformer.fit(reshape(T, (-1, 1)))
|
||||
self._d_t = (len(self.transformer.categories_[0]) - 1,)
|
||||
elif self.treatment_featurizer:
|
||||
|
@ -792,7 +792,7 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator):
|
|||
self.transformer = None
|
||||
|
||||
if self.discrete_instrument:
|
||||
self.z_transformer = OneHotEncoder(categories='auto', sparse=False, drop='first')
|
||||
self.z_transformer = one_hot_encoder(categories='auto', drop='first')
|
||||
self.z_transformer.fit(reshape(Z, (-1, 1)))
|
||||
else:
|
||||
self.z_transformer = None
|
||||
|
|
|
@ -16,7 +16,7 @@ from sklearn.pipeline import Pipeline
|
|||
from sklearn.utils import check_array, check_X_y
|
||||
from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
|
||||
from ..utilities import (check_inputs, check_models, broadcast_unit_treatments, reshape_treatmentwise_effects,
|
||||
inverse_onehot, transpose, _deprecate_positional)
|
||||
one_hot_encoder, inverse_onehot, transpose, _deprecate_positional)
|
||||
from .._shap import _shap_explain_model_cate
|
||||
|
||||
|
||||
|
@ -109,7 +109,7 @@ class TLearner(TreatmentExpansionMixin, LinearCateEstimator):
|
|||
categories = self.categories
|
||||
if categories != 'auto':
|
||||
categories = [categories] # OneHotEncoder expects a 2D array with features per column
|
||||
self.transformer = OneHotEncoder(categories=categories, sparse=False, drop='first')
|
||||
self.transformer = one_hot_encoder(categories=categories, drop='first')
|
||||
T = self.transformer.fit_transform(T.reshape(-1, 1))
|
||||
self._d_t = T.shape[1:]
|
||||
T = inverse_onehot(T)
|
||||
|
@ -232,7 +232,7 @@ class SLearner(TreatmentExpansionMixin, LinearCateEstimator):
|
|||
categories = self.categories
|
||||
if categories != 'auto':
|
||||
categories = [categories] # OneHotEncoder expects a 2D array with features per column
|
||||
self.transformer = OneHotEncoder(categories=categories, sparse=False, drop='first')
|
||||
self.transformer = one_hot_encoder(categories=categories, drop='first')
|
||||
T = self.transformer.fit_transform(T.reshape(-1, 1))
|
||||
self._d_t = (T.shape[1], )
|
||||
# Note: unlike other Metalearners, we need the controls' encoded column for training
|
||||
|
@ -375,7 +375,7 @@ class XLearner(TreatmentExpansionMixin, LinearCateEstimator):
|
|||
categories = self.categories
|
||||
if categories != 'auto':
|
||||
categories = [categories] # OneHotEncoder expects a 2D array with features per column
|
||||
self.transformer = OneHotEncoder(categories=categories, sparse=False, drop='first')
|
||||
self.transformer = one_hot_encoder(categories=categories, drop='first')
|
||||
T = self.transformer.fit_transform(T.reshape(-1, 1))
|
||||
self._d_t = T.shape[1:]
|
||||
T = inverse_onehot(T)
|
||||
|
@ -537,7 +537,7 @@ class DomainAdaptationLearner(TreatmentExpansionMixin, LinearCateEstimator):
|
|||
categories = self.categories
|
||||
if categories != 'auto':
|
||||
categories = [categories] # OneHotEncoder expects a 2D array with features per column
|
||||
self.transformer = OneHotEncoder(categories=categories, sparse=False, drop='first')
|
||||
self.transformer = one_hot_encoder(categories=categories, drop='first')
|
||||
T = self.transformer.fit_transform(T.reshape(-1, 1))
|
||||
self._d_t = T.shape[1:]
|
||||
T = inverse_onehot(T)
|
||||
|
|
|
@ -39,7 +39,7 @@ from .._cate_estimator import BaseCateEstimator, LinearCateEstimator, TreatmentE
|
|||
from ._causal_tree import CausalTree
|
||||
from ..inference import NormalInferenceResults
|
||||
from ..inference._inference import Inference
|
||||
from ..utilities import (reshape, reshape_Y_T, MAX_RAND_SEED, check_inputs, _deprecate_positional,
|
||||
from ..utilities import (one_hot_encoder, reshape, reshape_Y_T, MAX_RAND_SEED, check_inputs, _deprecate_positional,
|
||||
cross_product, inverse_onehot, check_input_arrays, jacify_featurizer,
|
||||
_RegressionWrapper, deprecated, ndim)
|
||||
from sklearn.model_selection import check_cv
|
||||
|
@ -676,7 +676,7 @@ class DMLOrthoForest(BaseOrthoForest):
|
|||
categories = self.categories
|
||||
if categories != 'auto':
|
||||
categories = [categories] # OneHotEncoder expects a 2D array with features per column
|
||||
self.transformer = OneHotEncoder(categories=categories, sparse=False, drop='first')
|
||||
self.transformer = one_hot_encoder(categories=categories, drop='first')
|
||||
d_t_in = T.shape[1:]
|
||||
T = self.transformer.fit_transform(T.reshape(-1, 1))
|
||||
self._d_t = T.shape[1:]
|
||||
|
@ -1030,7 +1030,7 @@ class DROrthoForest(BaseOrthoForest):
|
|||
categories = self.categories
|
||||
if categories != 'auto':
|
||||
categories = [categories] # OneHotEncoder expects a 2D array with features per column
|
||||
self.transformer = OneHotEncoder(categories=categories, sparse=False, drop='first')
|
||||
self.transformer = one_hot_encoder(categories=categories, drop='first')
|
||||
d_t_in = T.shape[1:]
|
||||
T = self.transformer.fit_transform(T.reshape(-1, 1))
|
||||
self._d_t = T.shape[1:]
|
||||
|
|
|
@ -108,7 +108,7 @@ class WeightedModelMixin:
|
|||
|
||||
# Normalize inputs
|
||||
X, y, X_offset, y_offset, X_scale = _preprocess_data(
|
||||
X, y, fit_intercept=self.fit_intercept, normalize=False,
|
||||
X, y, fit_intercept=self.fit_intercept,
|
||||
copy=self.copy_X, check_input=check_input if check_input is not None else True,
|
||||
sample_weight=sample_weight)
|
||||
# Weight inputs
|
||||
|
@ -737,7 +737,7 @@ class DebiasedLasso(WeightedLasso):
|
|||
super().fit(X, y, sample_weight, check_input)
|
||||
# Center X, y
|
||||
X, y, X_offset, y_offset, X_scale = _preprocess_data(
|
||||
X, y, fit_intercept=self.fit_intercept, normalize=False,
|
||||
X, y, fit_intercept=self.fit_intercept,
|
||||
copy=self.copy_X, check_input=check_input, sample_weight=sample_weight)
|
||||
|
||||
# Calculate quantities that will be used later on. Account for centered data
|
||||
|
|
|
@ -842,13 +842,17 @@ def _cross_val_predict(estimator, X, y=None, *, groups=None, cv=None,
|
|||
# independent, and that it is pickle-able.
|
||||
parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
|
||||
pre_dispatch=pre_dispatch)
|
||||
predictions = parallel(delayed(_fit_and_predict)(
|
||||
clone(estimator, safe=safe), X, y, train, test, verbose, fit_params, method)
|
||||
for train, test in splits)
|
||||
|
||||
from pkg_resources import parse_version
|
||||
if parse_version(sklearn.__version__) < parse_version("0.24.0"):
|
||||
# Prior to 0.24.0, this private scikit-learn method returned a tuple of two values
|
||||
predictions = [p[0] for p in predictions]
|
||||
# verbose was removed from sklearn's non-public _fit_and_predict method in 1.4
|
||||
if parse_version(sklearn.__version__) < parse_version("1.4"):
|
||||
predictions = parallel(delayed(_fit_and_predict)(
|
||||
clone(estimator, safe=safe), X, y, train, test, verbose, fit_params, method)
|
||||
for train, test in splits)
|
||||
else:
|
||||
predictions = parallel(delayed(_fit_and_predict)(
|
||||
clone(estimator, safe=safe), X, y, train, test, fit_params, method)
|
||||
for train, test in splits)
|
||||
|
||||
inv_test_indices = np.empty(len(test_indices), dtype=int)
|
||||
inv_test_indices[test_indices] = np.arange(len(test_indices))
|
||||
|
|
|
@ -25,7 +25,7 @@ from ...dml import LinearDML, CausalForestDML
|
|||
from ...inference import NormalInferenceResults
|
||||
from ...sklearn_extensions.linear_model import WeightedLasso
|
||||
from ...sklearn_extensions.model_selection import GridSearchCVList
|
||||
from ...utilities import _RegressionWrapper, get_feature_names_or_default, inverse_onehot
|
||||
from ...utilities import _RegressionWrapper, get_feature_names_or_default, inverse_onehot, one_hot_encoder
|
||||
|
||||
# TODO: this utility is documented but internal; reimplement?
|
||||
from sklearn.utils import _safe_indexing
|
||||
|
@ -203,8 +203,7 @@ class _ColumnTransformer(TransformerMixin):
|
|||
if cat_cols.shape[1] > 0:
|
||||
self.has_cats = True
|
||||
# NOTE: set handle_unknown to 'ignore' so that we don't throw at runtime if given a novel value
|
||||
self.one_hot_encoder = OneHotEncoder(sparse=False,
|
||||
handle_unknown='ignore').fit(cat_cols)
|
||||
self.one_hot_encoder = one_hot_encoder(handle_unknown='ignore').fit(cat_cols)
|
||||
else:
|
||||
self.has_cats = False
|
||||
self.d_x = X.shape[1]
|
||||
|
@ -335,12 +334,12 @@ def _process_feature(name, feat_ind, verbose, categorical_inds, categories, hete
|
|||
# we achieve this by pipelining the X scaling with the Y and T models (with fixed scaling, not refitting)
|
||||
|
||||
hinds = heterogeneity_inds[feat_ind]
|
||||
WX_transformer = ColumnTransformer([('encode', OneHotEncoder(drop='first', sparse=False),
|
||||
WX_transformer = ColumnTransformer([('encode', one_hot_encoder(drop='first'),
|
||||
[ind for ind in categorical_inds
|
||||
if ind != feat_ind]),
|
||||
('drop', 'drop', feat_ind)],
|
||||
remainder=StandardScaler())
|
||||
W_transformer = ColumnTransformer([('encode', OneHotEncoder(drop='first', sparse=False),
|
||||
W_transformer = ColumnTransformer([('encode', one_hot_encoder(drop='first'),
|
||||
[ind for ind in categorical_inds
|
||||
if ind != feat_ind and ind not in hinds]),
|
||||
('drop', 'drop', hinds),
|
||||
|
@ -732,8 +731,7 @@ class CausalAnalysis:
|
|||
if train_y_model:
|
||||
# perform model selection for the Y model using all X, not on a per-column basis
|
||||
allX = ColumnTransformer([('encode',
|
||||
OneHotEncoder(
|
||||
drop='first', sparse=False),
|
||||
one_hot_encoder(drop='first'),
|
||||
self.categorical)],
|
||||
remainder=StandardScaler()).fit_transform(X)
|
||||
|
||||
|
@ -757,7 +755,7 @@ class CausalAnalysis:
|
|||
|
||||
# note that this needs to happen after wrapping to generalize to the multi-class case,
|
||||
# since otherwise we'll have too many columns to be able to train a classifier
|
||||
y = OneHotEncoder(drop='first', sparse=False).fit_transform(y)
|
||||
y = one_hot_encoder(drop='first').fit_transform(y)
|
||||
|
||||
assert y.ndim == 1 or y.shape[1] == 1, ("Multiclass classification isn't supported" if self.classification
|
||||
else "Only a single outcome is supported")
|
||||
|
|
|
@ -235,7 +235,7 @@ class TestDeepIV(unittest.TestCase):
|
|||
|
||||
def one_hot(col, **kwargs):
|
||||
z = col.reshape(-1, 1)
|
||||
enc = OneHotEncoder(sparse=False, **kwargs)
|
||||
enc = OneHotEncoder(sparse_output=False, **kwargs)
|
||||
return enc.fit_transform(z)
|
||||
|
||||
def sensf(x):
|
||||
|
@ -375,7 +375,7 @@ Response:{y}".format(**{'x': x.shape, 'z': z.shape,
|
|||
|
||||
def one_hot(col, **kwargs):
|
||||
z = col.reshape(-1, 1)
|
||||
enc = OneHotEncoder(sparse=False, **kwargs)
|
||||
enc = OneHotEncoder(sparse_output=False, **kwargs)
|
||||
return enc.fit_transform(z)
|
||||
|
||||
def sensf(x):
|
||||
|
|
|
@ -910,7 +910,7 @@ class TestDML(unittest.TestCase):
|
|||
[1, 4, 5, 7, 9, 10, 12, 14, 17])
|
||||
|
||||
dml = LinearDML(model_y=LinearRegression(), model_t=LinearRegression(),
|
||||
fit_cate_intercept=False, featurizer=OneHotEncoder(sparse=False),
|
||||
fit_cate_intercept=False, featurizer=OneHotEncoder(sparse_output=False),
|
||||
cv=[splits, splits[::-1]])
|
||||
|
||||
T = np.tile([1, 2, 3], 6)
|
||||
|
|
|
@ -99,7 +99,7 @@ class TestUtilities(unittest.TestCase):
|
|||
|
||||
def test_inverse_onehot(self):
|
||||
T = np.random.randint(4, size=100)
|
||||
T_oh = OneHotEncoder(categories='auto', sparse=False).fit_transform(T.reshape(-1, 1))[:, 1:]
|
||||
T_oh = OneHotEncoder(categories='auto', sparse_output=False).fit_transform(T.reshape(-1, 1))[:, 1:]
|
||||
T_inv = inverse_onehot(T_oh)
|
||||
np.testing.assert_array_equal(T, T_inv)
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
import scipy.sparse
|
||||
import sklearn
|
||||
import sparse as sp
|
||||
import itertools
|
||||
import inspect
|
||||
|
@ -18,7 +19,7 @@ from sklearn.linear_model import LassoCV, MultiTaskLassoCV, Lasso, MultiTaskLass
|
|||
from functools import reduce, wraps
|
||||
from sklearn.utils import check_array, check_X_y
|
||||
from sklearn.utils.validation import assert_all_finite
|
||||
from sklearn.preprocessing import PolynomialFeatures, LabelEncoder
|
||||
from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures, LabelEncoder
|
||||
import warnings
|
||||
from warnings import warn
|
||||
from collections.abc import Iterable
|
||||
|
@ -1508,3 +1509,15 @@ def strata_from_discrete_arrays(arrs):
|
|||
curr_array = temp + curr_array * len(enc.classes_)
|
||||
|
||||
return curr_array
|
||||
|
||||
|
||||
def one_hot_encoder(sparse=False, **kwargs):
|
||||
"""
|
||||
Wrapper for sklearn's OneHotEncoder that handles the name change from `sparse` to `sparse_output`
|
||||
between sklearn versions 1.1 and 1.2.
|
||||
"""
|
||||
from pkg_resources import parse_version
|
||||
if parse_version(sklearn.__version__) < parse_version("1.2"):
|
||||
return OneHotEncoder(sparse=sparse, **kwargs)
|
||||
else:
|
||||
return OneHotEncoder(sparse_output=sparse, **kwargs)
|
||||
|
|
|
@ -22,7 +22,7 @@ classifiers = [
|
|||
dependencies = [
|
||||
"numpy",
|
||||
"scipy > 1.4.0",
|
||||
"scikit-learn >= 1.0, < 1.4",
|
||||
"scikit-learn >= 1.0, < 1.5",
|
||||
"sparse",
|
||||
"joblib >= 0.13.0",
|
||||
"statsmodels >= 0.10",
|
||||
|
|
Загрузка…
Ссылка в новой задаче