Migrate responsibleai tests to use utilities in rai_test_utils - Part 2 (#1956)
* Update requirements-dev.txt * Update requirements-dev.txt * Code changes with new rai_test_utils Signed-off-by: Gaurav Gupta <gaugup@microsoft.com> --------- Signed-off-by: Gaurav Gupta <gaugup@microsoft.com>
This commit is contained in:
Родитель
46f31058c4
Коммит
23cc0d8a4a
|
@ -4,4 +4,4 @@ pytest-mock==3.6.1
|
|||
requests==2.25.1
|
||||
|
||||
requirements-parser==0.2.0
|
||||
rai_test_utils==0.0.0
|
||||
rai_test_utils==0.1.0
|
||||
|
|
|
@ -309,7 +309,7 @@ class TestMatrixFilter(object):
|
|||
# Test quantile binning on CRIM feature in california housing dataset,
|
||||
# which errored out due to first category not fitting into bins
|
||||
(X_train, X_test, y_train, y_test,
|
||||
feature_names) = create_housing_data(test_size=0.5)
|
||||
feature_names) = create_housing_data()
|
||||
|
||||
model_task = ModelTask.REGRESSION
|
||||
matrix_features = ['Population']
|
||||
|
|
|
@ -7,4 +7,4 @@ pytest-mock==3.6.1
|
|||
# Required for responsibleai package tests
|
||||
deptree~=0.0.10
|
||||
xgboost<=1.0.0
|
||||
rai_test_utils==0.0.0
|
||||
rai_test_utils==0.1.0
|
||||
|
|
|
@ -8,7 +8,9 @@ import pandas as pd
|
|||
import pytest
|
||||
import shap
|
||||
from sklearn.model_selection import train_test_split
|
||||
from tests.common_utils import create_adult_income_dataset, create_housing_data
|
||||
from tests.common_utils import create_adult_income_dataset
|
||||
|
||||
from rai_test_utils.datasets.tabular import create_housing_data
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
|
|
|
@ -8,12 +8,7 @@ import pandas as pd
|
|||
import pytest
|
||||
# Defines common utilities for responsibleai tests
|
||||
from dice_ml.utils import helpers
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.datasets import fetch_california_housing, load_breast_cancer
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.preprocessing import OneHotEncoder, StandardScaler
|
||||
|
||||
from rai_test_utils.datasets.tabular import \
|
||||
create_iris_data as _create_iris_data
|
||||
|
@ -35,39 +30,6 @@ def create_iris_data():
|
|||
return X_train, X_test, y_train, y_test, feature_names, classes
|
||||
|
||||
|
||||
def create_housing_data(create_small_dataset=True):
|
||||
# Import California housing dataset
|
||||
housing = fetch_california_housing()
|
||||
# Split data into train and test
|
||||
if create_small_dataset:
|
||||
x_train, x_test, y_train, y_test = train_test_split(housing.data,
|
||||
housing.target,
|
||||
train_size=500,
|
||||
test_size=50,
|
||||
random_state=7)
|
||||
else:
|
||||
x_train, x_test, y_train, y_test = train_test_split(housing.data,
|
||||
housing.target,
|
||||
test_size=0.2,
|
||||
random_state=7)
|
||||
return x_train, x_test, y_train, y_test, housing.feature_names
|
||||
|
||||
|
||||
def create_cancer_data():
|
||||
breast_cancer_data = load_breast_cancer()
|
||||
classes = breast_cancer_data.target_names.tolist()
|
||||
|
||||
# Split data into train and test
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
breast_cancer_data.data, breast_cancer_data.target,
|
||||
test_size=0.2, random_state=0)
|
||||
feature_names = breast_cancer_data.feature_names
|
||||
classes = breast_cancer_data.target_names.tolist()
|
||||
X_train = pd.DataFrame(X_train, columns=feature_names)
|
||||
X_test = pd.DataFrame(X_test, columns=feature_names)
|
||||
return X_train, X_test, y_train, y_test, feature_names, classes
|
||||
|
||||
|
||||
class FetchDiceAdultCensusIncomeDataset(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
@ -108,28 +70,6 @@ def create_adult_income_dataset(create_small_dataset=True):
|
|||
feature_columns, feature_range_keys
|
||||
|
||||
|
||||
def create_complex_classification_pipeline(
|
||||
X_train, y_train, continuous_features, categorical_features):
|
||||
# We create the preprocessing pipelines for both
|
||||
# numeric and categorical data.
|
||||
numeric_transformer = Pipeline(steps=[
|
||||
('scaler', StandardScaler())])
|
||||
|
||||
categorical_transformer = Pipeline(steps=[
|
||||
('onehot', OneHotEncoder(handle_unknown='ignore'))])
|
||||
|
||||
transformations = ColumnTransformer(
|
||||
transformers=[
|
||||
('num', numeric_transformer, continuous_features),
|
||||
('cat', categorical_transformer, categorical_features)])
|
||||
|
||||
# Append classifier to preprocessing pipeline.
|
||||
# Now we have a full prediction pipeline.
|
||||
pipeline = Pipeline(steps=[('preprocessor', transformations),
|
||||
('classifier', RandomForestClassifier())])
|
||||
return pipeline.fit(X_train, y_train)
|
||||
|
||||
|
||||
def assert_series_and_dict_equal(left: pd.Series, right: dict):
|
||||
left_json = left.to_json(orient="index")
|
||||
left_dict = json.loads(left_json)
|
||||
|
|
|
@ -12,20 +12,20 @@ import numpy as np
|
|||
import pandas as pd
|
||||
import pytest
|
||||
from tests.causal_manager_validator import validate_causal
|
||||
from tests.common_utils import (create_adult_income_dataset,
|
||||
create_cancer_data,
|
||||
create_complex_classification_pipeline,
|
||||
create_housing_data, create_iris_data)
|
||||
from tests.common_utils import create_adult_income_dataset, create_iris_data
|
||||
from tests.counterfactual_manager_validator import validate_counterfactual
|
||||
from tests.error_analysis_validator import (setup_error_analysis,
|
||||
validate_error_analysis)
|
||||
from tests.explainer_manager_validator import (setup_explainer,
|
||||
validate_explainer)
|
||||
|
||||
from rai_test_utils.datasets.tabular import \
|
||||
create_binary_classification_dataset
|
||||
from rai_test_utils.datasets.tabular import (
|
||||
create_binary_classification_dataset, create_cancer_data,
|
||||
create_housing_data)
|
||||
from rai_test_utils.models.model_utils import (create_models_classification,
|
||||
create_models_regression)
|
||||
from rai_test_utils.models.sklearn import \
|
||||
create_complex_classification_pipeline
|
||||
from responsibleai import ModelAnalysis, ModelTask
|
||||
from responsibleai._internal.constants import ManagerNames
|
||||
from responsibleai._tools.shared.state_directory_management import \
|
||||
|
@ -72,7 +72,7 @@ class TestModelAnalysis(object):
|
|||
ManagerNames.EXPLAINER])
|
||||
def test_model_analysis_cancer(self, manager_type):
|
||||
X_train, X_test, y_train, y_test, _, classes = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
models = create_models_classification(X_train, y_train)
|
||||
X_train[LABELS] = y_train
|
||||
X_test[LABELS] = y_test
|
||||
|
|
|
@ -8,11 +8,11 @@ from unittest.mock import MagicMock
|
|||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from tests.common_utils import (create_cancer_data, create_housing_data,
|
||||
create_iris_data)
|
||||
from tests.common_utils import create_iris_data
|
||||
|
||||
from rai_test_utils.datasets.tabular import \
|
||||
create_binary_classification_dataset
|
||||
from rai_test_utils.datasets.tabular import (
|
||||
create_binary_classification_dataset, create_cancer_data,
|
||||
create_housing_data)
|
||||
from rai_test_utils.models.lightgbm import create_lightgbm_classifier
|
||||
from rai_test_utils.models.sklearn import \
|
||||
create_sklearn_random_forest_regressor
|
||||
|
@ -119,7 +119,7 @@ class TestModelAnalysisValidations:
|
|||
|
||||
def test_validate_serializer(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
model = create_lightgbm_classifier(X_train, y_train)
|
||||
|
||||
X_train['target'] = y_train
|
||||
|
@ -189,7 +189,7 @@ class TestModelAnalysisValidations:
|
|||
|
||||
def test_model_predictions_predict(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
|
||||
X_train['target'] = y_train
|
||||
X_test['target'] = y_test
|
||||
|
@ -209,7 +209,7 @@ class TestModelAnalysisValidations:
|
|||
|
||||
def test_model_predictions_predict_proba(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
|
||||
X_train['target'] = y_train
|
||||
X_test['target'] = y_test
|
||||
|
@ -231,7 +231,7 @@ class TestModelAnalysisValidations:
|
|||
|
||||
def test_model_analysis_incorrect_task_type(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
model = create_lightgbm_classifier(X_train, y_train)
|
||||
|
||||
X_train['target'] = y_train
|
||||
|
@ -250,7 +250,7 @@ class TestModelAnalysisValidations:
|
|||
|
||||
def test_mismatch_train_test_features(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
model = create_lightgbm_classifier(X_train, y_train)
|
||||
|
||||
X_train['target'] = y_train
|
||||
|
@ -268,7 +268,7 @@ class TestModelAnalysisValidations:
|
|||
|
||||
def test_unsupported_train_test_types(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
model = create_lightgbm_classifier(X_train, y_train)
|
||||
|
||||
X_train['target'] = y_train
|
||||
|
@ -286,7 +286,7 @@ class TestModelAnalysisValidations:
|
|||
|
||||
def test_train_labels(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
model = create_lightgbm_classifier(X_train, y_train)
|
||||
|
||||
X_train['target'] = y_train
|
||||
|
|
|
@ -11,20 +11,20 @@ import numpy as np
|
|||
import pandas as pd
|
||||
import pytest
|
||||
from tests.causal_manager_validator import validate_causal
|
||||
from tests.common_utils import (create_adult_income_dataset,
|
||||
create_cancer_data,
|
||||
create_complex_classification_pipeline,
|
||||
create_housing_data, create_iris_data)
|
||||
from tests.common_utils import create_adult_income_dataset, create_iris_data
|
||||
from tests.counterfactual_manager_validator import validate_counterfactual
|
||||
from tests.error_analysis_validator import (setup_error_analysis,
|
||||
validate_error_analysis)
|
||||
from tests.explainer_manager_validator import (setup_explainer,
|
||||
validate_explainer)
|
||||
|
||||
from rai_test_utils.datasets.tabular import \
|
||||
create_binary_classification_dataset
|
||||
from rai_test_utils.datasets.tabular import (
|
||||
create_binary_classification_dataset, create_cancer_data,
|
||||
create_housing_data)
|
||||
from rai_test_utils.models.model_utils import (create_models_classification,
|
||||
create_models_regression)
|
||||
from rai_test_utils.models.sklearn import \
|
||||
create_complex_classification_pipeline
|
||||
from responsibleai import ModelTask, RAIInsights
|
||||
from responsibleai._internal.constants import (ManagerNames,
|
||||
SerializationAttributes)
|
||||
|
@ -78,7 +78,7 @@ class TestRAIInsights(object):
|
|||
ManagerNames.EXPLAINER])
|
||||
def test_rai_insights_cancer(self, manager_type):
|
||||
X_train, X_test, y_train, y_test, _, classes = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
models = create_models_classification(X_train, y_train)
|
||||
X_train[LABELS] = y_train
|
||||
X_test[LABELS] = y_test
|
||||
|
|
|
@ -4,11 +4,12 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from tests.common_utils import create_housing_data, create_iris_data
|
||||
from tests.common_utils import create_iris_data
|
||||
|
||||
from erroranalysis._internal.constants import (PRED_Y, ROW_INDEX, TRUE_Y,
|
||||
ModelTask)
|
||||
from rai_test_utils.datasets.tabular import create_simple_titanic_data
|
||||
from rai_test_utils.datasets.tabular import (create_housing_data,
|
||||
create_simple_titanic_data)
|
||||
from rai_test_utils.models.sklearn import (
|
||||
create_sklearn_random_forest_regressor, create_sklearn_svm_classifier,
|
||||
create_titanic_pipeline)
|
||||
|
|
|
@ -6,8 +6,9 @@ from tempfile import TemporaryDirectory
|
|||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from tests.common_utils import create_housing_data, create_iris_data
|
||||
from tests.common_utils import create_iris_data
|
||||
|
||||
from rai_test_utils.datasets.tabular import create_housing_data
|
||||
from rai_test_utils.models.sklearn import (
|
||||
create_sklearn_random_forest_classifier,
|
||||
create_sklearn_random_forest_regressor)
|
||||
|
|
|
@ -8,13 +8,13 @@ from tempfile import TemporaryDirectory
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from tests.common_utils import (create_adult_income_dataset,
|
||||
create_complex_classification_pipeline,
|
||||
create_iris_data)
|
||||
from tests.common_utils import create_adult_income_dataset, create_iris_data
|
||||
|
||||
from rai_test_utils.datasets.tabular import \
|
||||
create_binary_classification_dataset
|
||||
from rai_test_utils.models.lightgbm import create_lightgbm_classifier
|
||||
from rai_test_utils.models.sklearn import \
|
||||
create_complex_classification_pipeline
|
||||
from responsibleai import ModelTask, RAIInsights
|
||||
from responsibleai._internal.constants import (ManagerNames,
|
||||
SerializationAttributes)
|
||||
|
|
|
@ -8,11 +8,11 @@ import numpy as np
|
|||
import pandas as pd
|
||||
import pytest
|
||||
from lightgbm import LGBMClassifier
|
||||
from tests.common_utils import (create_cancer_data, create_housing_data,
|
||||
create_iris_data)
|
||||
from tests.common_utils import create_iris_data
|
||||
|
||||
from rai_test_utils.datasets.tabular import \
|
||||
create_binary_classification_dataset
|
||||
from rai_test_utils.datasets.tabular import (
|
||||
create_binary_classification_dataset, create_cancer_data,
|
||||
create_housing_data)
|
||||
from rai_test_utils.models.lightgbm import create_lightgbm_classifier
|
||||
from rai_test_utils.models.sklearn import \
|
||||
create_sklearn_random_forest_regressor
|
||||
|
@ -157,7 +157,7 @@ class TestRAIInsightsValidations:
|
|||
|
||||
def test_validate_serializer(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
model = create_lightgbm_classifier(X_train, y_train)
|
||||
|
||||
X_train[TARGET] = y_train
|
||||
|
@ -227,7 +227,7 @@ class TestRAIInsightsValidations:
|
|||
|
||||
def test_model_predictions_predict(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
|
||||
X_train[TARGET] = y_train
|
||||
X_test[TARGET] = y_test
|
||||
|
@ -247,7 +247,7 @@ class TestRAIInsightsValidations:
|
|||
|
||||
def test_model_predictions_predict_proba(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
|
||||
X_train[TARGET] = y_train
|
||||
X_test[TARGET] = y_test
|
||||
|
@ -269,7 +269,7 @@ class TestRAIInsightsValidations:
|
|||
|
||||
def test_incorrect_task_type(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
model = create_lightgbm_classifier(X_train, y_train)
|
||||
|
||||
X_train[TARGET] = y_train
|
||||
|
@ -287,7 +287,7 @@ class TestRAIInsightsValidations:
|
|||
|
||||
def test_mismatch_train_test_features(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
model = create_lightgbm_classifier(X_train, y_train)
|
||||
|
||||
X_train[TARGET] = y_train
|
||||
|
@ -344,7 +344,7 @@ class TestRAIInsightsValidations:
|
|||
|
||||
def test_unsupported_train_test_types(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
model = create_lightgbm_classifier(X_train, y_train)
|
||||
|
||||
X_train[TARGET] = y_train
|
||||
|
@ -363,7 +363,7 @@ class TestRAIInsightsValidations:
|
|||
|
||||
def test_classes_exceptions(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
model = create_lightgbm_classifier(X_train, y_train)
|
||||
|
||||
X_train[TARGET] = y_train
|
||||
|
@ -414,7 +414,7 @@ class TestRAIInsightsValidations:
|
|||
|
||||
def test_dataset_exception(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
model = create_lightgbm_classifier(X_train, y_train)
|
||||
X_train_feature_names = X_train.columns.tolist()
|
||||
|
||||
|
@ -453,7 +453,7 @@ class TestRAIInsightsValidations:
|
|||
|
||||
def test_classes_passes(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
model = create_lightgbm_classifier(X_train, y_train)
|
||||
|
||||
X_train[TARGET] = y_train
|
||||
|
@ -471,7 +471,7 @@ class TestRAIInsightsValidations:
|
|||
|
||||
def test_no_model_but_serializer_provided(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
|
||||
X_train[TARGET] = y_train
|
||||
X_test[TARGET] = y_test
|
||||
|
@ -489,7 +489,7 @@ class TestRAIInsightsValidations:
|
|||
|
||||
def test_feature_metadata(self):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
model = create_lightgbm_classifier(X_train, y_train)
|
||||
|
||||
X_train[TARGET] = y_train
|
||||
|
|
|
@ -3,12 +3,11 @@
|
|||
import pickle
|
||||
from pathlib import Path
|
||||
|
||||
from rai_test_utils.datasets.tabular import create_cancer_data
|
||||
from rai_test_utils.models.lightgbm import create_lightgbm_classifier
|
||||
from responsibleai import RAIInsights
|
||||
from responsibleai._internal.constants import SerializationAttributes
|
||||
|
||||
from .common_utils import create_cancer_data
|
||||
|
||||
|
||||
class PickleSerializer:
|
||||
def save(self, model, model_dir):
|
||||
|
@ -35,7 +34,7 @@ class TestModelSerializer:
|
|||
|
||||
def test_init_with_pickle_serializer(self, tmpdir):
|
||||
X_train, X_test, y_train, y_test, _, _ = \
|
||||
create_cancer_data()
|
||||
create_cancer_data(return_dataframe=True)
|
||||
model = create_lightgbm_classifier(X_train, y_train)
|
||||
|
||||
X_train['target'] = y_train
|
||||
|
|
Загрузка…
Ссылка в новой задаче