Add polynom regressor and classifier to gcm
This replaces the ProductRegressor. Signed-off-by: Patrick Bloebaum <bloebp@amazon.com>
This commit is contained in:
Родитель
fb5b4d5260
Коммит
2ed7cf4e93
|
@ -16,7 +16,6 @@ from dowhy.gcm.cms import ProbabilisticCausalModel
|
|||
from dowhy.gcm.fcms import AdditiveNoiseModel, ClassificationModel, ClassifierFCM, PredictionModel
|
||||
from dowhy.gcm.graph import CAUSAL_MECHANISM, get_ordered_predecessors, is_root_node, validate_causal_model_assignment
|
||||
from dowhy.gcm.ml import (
|
||||
create_elastic_net_regressor,
|
||||
create_hist_gradient_boost_classifier,
|
||||
create_hist_gradient_boost_regressor,
|
||||
create_lasso_regressor,
|
||||
|
@ -31,14 +30,16 @@ from dowhy.gcm.ml.classification import (
|
|||
create_extra_trees_classifier,
|
||||
create_gaussian_nb_classifier,
|
||||
create_knn_classifier,
|
||||
create_polynom_logistic_regression_classifier,
|
||||
create_random_forest_classifier,
|
||||
create_support_vector_classifier,
|
||||
)
|
||||
from dowhy.gcm.ml.regression import (
|
||||
create_ada_boost_regressor,
|
||||
create_elastic_net_regressor,
|
||||
create_extra_trees_regressor,
|
||||
create_knn_regressor,
|
||||
create_product_regressor,
|
||||
create_polynom_regressor,
|
||||
)
|
||||
from dowhy.gcm.stochastic_models import EmpiricalDistribution
|
||||
from dowhy.gcm.util.general import (
|
||||
|
@ -51,6 +52,7 @@ from dowhy.gcm.util.general import (
|
|||
|
||||
_LIST_OF_POTENTIAL_CLASSIFIERS = [
|
||||
partial(create_logistic_regression_classifier, max_iter=1000),
|
||||
partial(create_polynom_logistic_regression_classifier, max_iter=1000),
|
||||
create_random_forest_classifier,
|
||||
create_hist_gradient_boost_classifier,
|
||||
create_extra_trees_classifier,
|
||||
|
@ -62,6 +64,7 @@ _LIST_OF_POTENTIAL_CLASSIFIERS = [
|
|||
_LIST_OF_POTENTIAL_REGRESSORS = [
|
||||
create_linear_regressor,
|
||||
create_ridge_regressor,
|
||||
create_polynom_regressor,
|
||||
partial(create_lasso_regressor, max_iter=5000),
|
||||
partial(create_elastic_net_regressor, max_iter=5000),
|
||||
create_random_forest_regressor,
|
||||
|
@ -70,7 +73,6 @@ _LIST_OF_POTENTIAL_REGRESSORS = [
|
|||
create_extra_trees_regressor,
|
||||
create_knn_regressor,
|
||||
create_ada_boost_regressor,
|
||||
create_product_regressor,
|
||||
]
|
||||
|
||||
|
||||
|
@ -149,11 +151,11 @@ def select_model(
|
|||
else:
|
||||
if use_linear_prediction_models:
|
||||
return find_best_model(
|
||||
[create_linear_regressor, create_product_regressor], X, Y, model_selection_splits=2
|
||||
[create_linear_regressor, create_polynom_regressor], X, Y, model_selection_splits=2
|
||||
)()
|
||||
else:
|
||||
return find_best_model(
|
||||
[create_hist_gradient_boost_regressor, create_product_regressor], X, Y, model_selection_splits=2
|
||||
[create_hist_gradient_boost_regressor, create_polynom_regressor], X, Y, model_selection_splits=2
|
||||
)()
|
||||
elif model_selection_quality == AssignmentQuality.BETTER:
|
||||
if target_is_categorical:
|
||||
|
|
|
@ -8,6 +8,7 @@ from .classification import (
|
|||
create_gaussian_process_classifier,
|
||||
create_hist_gradient_boost_classifier,
|
||||
create_logistic_regression_classifier,
|
||||
create_polynom_logistic_regression_classifier,
|
||||
create_random_forest_classifier,
|
||||
)
|
||||
from .regression import (
|
||||
|
@ -19,6 +20,7 @@ from .regression import (
|
|||
create_lasso_regressor,
|
||||
create_linear_regressor,
|
||||
create_linear_regressor_with_given_parameters,
|
||||
create_polynom_regressor,
|
||||
create_random_forest_regressor,
|
||||
create_ridge_regressor,
|
||||
create_support_vector_regressor,
|
||||
|
|
|
@ -7,6 +7,8 @@ from typing import List
|
|||
import numpy as np
|
||||
import sklearn
|
||||
from packaging import version
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import PolynomialFeatures
|
||||
|
||||
if version.parse(sklearn.__version__) < version.parse("1.0"):
|
||||
from sklearn.experimental import enable_hist_gradient_boosting # noqa
|
||||
|
@ -74,3 +76,13 @@ def create_knn_classifier(**kwargs) -> SklearnClassificationModel:
|
|||
|
||||
def create_gaussian_nb_classifier(**kwargs) -> SklearnClassificationModel:
|
||||
return SklearnClassificationModel(GaussianNB(**kwargs))
|
||||
|
||||
|
||||
def create_polynom_logistic_regression_classifier(
|
||||
degree: int = 3, **kwargs_logistic_regression
|
||||
) -> SklearnClassificationModel:
|
||||
return SklearnClassificationModel(
|
||||
make_pipeline(
|
||||
PolynomialFeatures(degree=degree, include_bias=False), LogisticRegression(**kwargs_logistic_regression)
|
||||
)
|
||||
)
|
||||
|
|
|
@ -7,6 +7,8 @@ from typing import Any
|
|||
import numpy as np
|
||||
import sklearn
|
||||
from packaging import version
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import PolynomialFeatures
|
||||
|
||||
if version.parse(sklearn.__version__) < version.parse("1.0"):
|
||||
from sklearn.experimental import enable_hist_gradient_boosting # noqa
|
||||
|
@ -115,8 +117,10 @@ def create_ada_boost_regressor(**kwargs) -> SklearnRegressionModel:
|
|||
return SklearnRegressionModel(AdaBoostRegressor(**kwargs))
|
||||
|
||||
|
||||
def create_product_regressor() -> PredictionModel:
|
||||
return ProductRegressor()
|
||||
def create_polynom_regressor(degree: int = 3, **kwargs_linear_model) -> SklearnRegressionModel:
|
||||
return SklearnRegressionModel(
|
||||
make_pipeline(PolynomialFeatures(degree=degree, include_bias=False), LinearRegression(**kwargs_linear_model))
|
||||
)
|
||||
|
||||
|
||||
class InvertibleIdentityFunction(InvertibleFunction):
|
||||
|
@ -141,18 +145,3 @@ class InvertibleLogarithmicFunction(InvertibleFunction):
|
|||
|
||||
def evaluate_inverse(self, X: np.ndarray) -> np.ndarray:
|
||||
return np.exp(X)
|
||||
|
||||
|
||||
class ProductRegressor(PredictionModel):
|
||||
def __init__(self):
|
||||
self._one_hot_encoders = {}
|
||||
|
||||
def fit(self, X, Y):
|
||||
self._one_hot_encoders = fit_one_hot_encoders(X)
|
||||
|
||||
def predict(self, X):
|
||||
X = apply_one_hot_encoding(X, self._one_hot_encoders)
|
||||
return np.prod(X, axis=1).reshape(-1, 1)
|
||||
|
||||
def clone(self):
|
||||
return ProductRegressor()
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
import numpy as np
|
||||
from flaky import flaky
|
||||
|
||||
from dowhy.gcm.ml import create_polynom_logistic_regression_classifier
|
||||
|
||||
|
||||
@flaky(max_runs=3)
|
||||
def test_when_fit_and_predict_polynom_classifier_then_returns_accurate_results():
|
||||
def _generate_data():
|
||||
X = np.random.normal(0, 1, (1000, 2))
|
||||
Y = []
|
||||
|
||||
for x in X:
|
||||
if x[0] * x[1] > 0:
|
||||
Y.append("Class 0")
|
||||
else:
|
||||
Y.append("Class 1")
|
||||
|
||||
return X, np.array(Y)
|
||||
|
||||
X_training, Y_training = _generate_data()
|
||||
X_test, Y_test = _generate_data()
|
||||
mdl = create_polynom_logistic_regression_classifier()
|
||||
mdl.fit(X_training, Y_training)
|
||||
|
||||
assert np.sum(mdl.predict(X_test).reshape(-1) == Y_test) > 950
|
|
@ -1,22 +1,39 @@
|
|||
import numpy as np
|
||||
from _pytest.python_api import approx
|
||||
from flaky import flaky
|
||||
|
||||
from dowhy.gcm.ml.regression import create_product_regressor
|
||||
from dowhy.gcm.ml.regression import create_polynom_regressor
|
||||
|
||||
|
||||
def test_when_use_product_regressor_then_computes_correct_values():
|
||||
X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
|
||||
@flaky(max_runs=3)
|
||||
def test_when_fit_and_predict_polynom_regressor_then_returns_accurate_results():
|
||||
X = np.random.normal(0, 1, (100, 2))
|
||||
Y = X[:, 0] * X[:, 1]
|
||||
|
||||
mdl = create_product_regressor()
|
||||
# No fit needed
|
||||
mdl = create_polynom_regressor()
|
||||
mdl.fit(X, Y)
|
||||
|
||||
assert mdl.predict(X).reshape(-1) == approx(np.array([6, 120, 504]))
|
||||
X_test = np.random.normal(0, 1, (100, 2))
|
||||
Y_test = X_test[:, 0] * X_test[:, 1]
|
||||
|
||||
assert mdl.predict(X_test).reshape(-1) == approx(Y_test, abs=1e-10)
|
||||
|
||||
|
||||
def test_when_input_is_categorical_when_use_product_regressor_then_computes_correct_values():
|
||||
X = np.column_stack([np.array(["Class 1", "Class 2"]).astype(object), np.array([1, 2])]).astype(object)
|
||||
@flaky(max_runs=3)
|
||||
def test_when_given_categorical_training_data_when_fit_and_predict_polynom_regressor_then_returns_accurate_results():
|
||||
def _generate_data():
|
||||
X = np.column_stack(
|
||||
[np.random.choice(2, 100, replace=True).astype(str), np.random.normal(0, 1, (100, 2)).astype(object)]
|
||||
).astype(object)
|
||||
Y = []
|
||||
for i in range(X.shape[0]):
|
||||
Y.append(X[i, 1] * X[i, 2] if X[i, 0] == "0" else X[i, 1] + X[i, 2])
|
||||
|
||||
mdl = create_product_regressor()
|
||||
mdl.fit(X, np.zeros(2)) # Need to fit one-hot-encoder
|
||||
return X, np.array(Y)
|
||||
|
||||
assert mdl.predict(X).reshape(-1) == approx(np.array([0, 2]))
|
||||
X_training, Y_training = _generate_data()
|
||||
X_test, Y_test = _generate_data()
|
||||
mdl = create_polynom_regressor()
|
||||
mdl.fit(X_training, Y_training)
|
||||
|
||||
assert mdl.predict(X_test).reshape(-1) == approx(Y_test, abs=1e-10)
|
||||
|
|
|
@ -19,7 +19,7 @@ def _generate_linear_regression_data():
|
|||
|
||||
def _generate_non_linear_regression_data():
|
||||
X = np.random.normal(0, 1, (1000, 5))
|
||||
Y = np.sum(X**2, axis=1)
|
||||
Y = np.sum(np.log(abs(X)), axis=1)
|
||||
|
||||
return X, Y
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче