Migrate responsibleai tests to use utilities in rai_test_utils - Part 2 (#1956)

* Update requirements-dev.txt

* Update requirements-dev.txt

* Code changes with new rai_test_utils

Signed-off-by: Gaurav Gupta <gaugup@microsoft.com>

---------

Signed-off-by: Gaurav Gupta <gaugup@microsoft.com>
This commit is contained in:
Gaurav Gupta 2023-02-10 16:01:35 -08:00 коммит произвёл GitHub
Родитель 46f31058c4
Коммит 23cc0d8a4a
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
13 изменённых файлов: 56 добавлений и 113 удалений

Просмотреть файл

@ -4,4 +4,4 @@ pytest-mock==3.6.1
requests==2.25.1
requirements-parser==0.2.0
rai_test_utils==0.0.0
rai_test_utils==0.1.0

Просмотреть файл

@ -309,7 +309,7 @@ class TestMatrixFilter(object):
# Test quantile binning on CRIM feature in california housing dataset,
# which errored out due to first category not fitting into bins
(X_train, X_test, y_train, y_test,
feature_names) = create_housing_data(test_size=0.5)
feature_names) = create_housing_data()
model_task = ModelTask.REGRESSION
matrix_features = ['Population']

Просмотреть файл

@ -7,4 +7,4 @@ pytest-mock==3.6.1
# Required for responsibleai package tests
deptree~=0.0.10
xgboost<=1.0.0
rai_test_utils==0.0.0
rai_test_utils==0.1.0

Просмотреть файл

@ -8,7 +8,9 @@ import pandas as pd
import pytest
import shap
from sklearn.model_selection import train_test_split
from tests.common_utils import create_adult_income_dataset, create_housing_data
from tests.common_utils import create_adult_income_dataset
from rai_test_utils.datasets.tabular import create_housing_data
@pytest.fixture(scope='session')

Просмотреть файл

@ -8,12 +8,7 @@ import pandas as pd
import pytest
# Defines common utilities for responsibleai tests
from dice_ml.utils import helpers
from sklearn.compose import ColumnTransformer
from sklearn.datasets import fetch_california_housing, load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from rai_test_utils.datasets.tabular import \
create_iris_data as _create_iris_data
@ -35,39 +30,6 @@ def create_iris_data():
return X_train, X_test, y_train, y_test, feature_names, classes
def create_housing_data(create_small_dataset=True):
# Import California housing dataset
housing = fetch_california_housing()
# Split data into train and test
if create_small_dataset:
x_train, x_test, y_train, y_test = train_test_split(housing.data,
housing.target,
train_size=500,
test_size=50,
random_state=7)
else:
x_train, x_test, y_train, y_test = train_test_split(housing.data,
housing.target,
test_size=0.2,
random_state=7)
return x_train, x_test, y_train, y_test, housing.feature_names
def create_cancer_data():
breast_cancer_data = load_breast_cancer()
classes = breast_cancer_data.target_names.tolist()
# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(
breast_cancer_data.data, breast_cancer_data.target,
test_size=0.2, random_state=0)
feature_names = breast_cancer_data.feature_names
classes = breast_cancer_data.target_names.tolist()
X_train = pd.DataFrame(X_train, columns=feature_names)
X_test = pd.DataFrame(X_test, columns=feature_names)
return X_train, X_test, y_train, y_test, feature_names, classes
class FetchDiceAdultCensusIncomeDataset(object):
def __init__(self):
pass
@ -108,28 +70,6 @@ def create_adult_income_dataset(create_small_dataset=True):
feature_columns, feature_range_keys
def create_complex_classification_pipeline(
X_train, y_train, continuous_features, categorical_features):
# We create the preprocessing pipelines for both
# numeric and categorical data.
numeric_transformer = Pipeline(steps=[
('scaler', StandardScaler())])
categorical_transformer = Pipeline(steps=[
('onehot', OneHotEncoder(handle_unknown='ignore'))])
transformations = ColumnTransformer(
transformers=[
('num', numeric_transformer, continuous_features),
('cat', categorical_transformer, categorical_features)])
# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
pipeline = Pipeline(steps=[('preprocessor', transformations),
('classifier', RandomForestClassifier())])
return pipeline.fit(X_train, y_train)
def assert_series_and_dict_equal(left: pd.Series, right: dict):
left_json = left.to_json(orient="index")
left_dict = json.loads(left_json)

Просмотреть файл

@ -12,20 +12,20 @@ import numpy as np
import pandas as pd
import pytest
from tests.causal_manager_validator import validate_causal
from tests.common_utils import (create_adult_income_dataset,
create_cancer_data,
create_complex_classification_pipeline,
create_housing_data, create_iris_data)
from tests.common_utils import create_adult_income_dataset, create_iris_data
from tests.counterfactual_manager_validator import validate_counterfactual
from tests.error_analysis_validator import (setup_error_analysis,
validate_error_analysis)
from tests.explainer_manager_validator import (setup_explainer,
validate_explainer)
from rai_test_utils.datasets.tabular import \
create_binary_classification_dataset
from rai_test_utils.datasets.tabular import (
create_binary_classification_dataset, create_cancer_data,
create_housing_data)
from rai_test_utils.models.model_utils import (create_models_classification,
create_models_regression)
from rai_test_utils.models.sklearn import \
create_complex_classification_pipeline
from responsibleai import ModelAnalysis, ModelTask
from responsibleai._internal.constants import ManagerNames
from responsibleai._tools.shared.state_directory_management import \
@ -72,7 +72,7 @@ class TestModelAnalysis(object):
ManagerNames.EXPLAINER])
def test_model_analysis_cancer(self, manager_type):
X_train, X_test, y_train, y_test, _, classes = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
models = create_models_classification(X_train, y_train)
X_train[LABELS] = y_train
X_test[LABELS] = y_test

Просмотреть файл

@ -8,11 +8,11 @@ from unittest.mock import MagicMock
import pandas as pd
import pytest
from tests.common_utils import (create_cancer_data, create_housing_data,
create_iris_data)
from tests.common_utils import create_iris_data
from rai_test_utils.datasets.tabular import \
create_binary_classification_dataset
from rai_test_utils.datasets.tabular import (
create_binary_classification_dataset, create_cancer_data,
create_housing_data)
from rai_test_utils.models.lightgbm import create_lightgbm_classifier
from rai_test_utils.models.sklearn import \
create_sklearn_random_forest_regressor
@ -119,7 +119,7 @@ class TestModelAnalysisValidations:
def test_validate_serializer(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
model = create_lightgbm_classifier(X_train, y_train)
X_train['target'] = y_train
@ -189,7 +189,7 @@ class TestModelAnalysisValidations:
def test_model_predictions_predict(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
X_train['target'] = y_train
X_test['target'] = y_test
@ -209,7 +209,7 @@ class TestModelAnalysisValidations:
def test_model_predictions_predict_proba(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
X_train['target'] = y_train
X_test['target'] = y_test
@ -231,7 +231,7 @@ class TestModelAnalysisValidations:
def test_model_analysis_incorrect_task_type(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
model = create_lightgbm_classifier(X_train, y_train)
X_train['target'] = y_train
@ -250,7 +250,7 @@ class TestModelAnalysisValidations:
def test_mismatch_train_test_features(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
model = create_lightgbm_classifier(X_train, y_train)
X_train['target'] = y_train
@ -268,7 +268,7 @@ class TestModelAnalysisValidations:
def test_unsupported_train_test_types(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
model = create_lightgbm_classifier(X_train, y_train)
X_train['target'] = y_train
@ -286,7 +286,7 @@ class TestModelAnalysisValidations:
def test_train_labels(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
model = create_lightgbm_classifier(X_train, y_train)
X_train['target'] = y_train

Просмотреть файл

@ -11,20 +11,20 @@ import numpy as np
import pandas as pd
import pytest
from tests.causal_manager_validator import validate_causal
from tests.common_utils import (create_adult_income_dataset,
create_cancer_data,
create_complex_classification_pipeline,
create_housing_data, create_iris_data)
from tests.common_utils import create_adult_income_dataset, create_iris_data
from tests.counterfactual_manager_validator import validate_counterfactual
from tests.error_analysis_validator import (setup_error_analysis,
validate_error_analysis)
from tests.explainer_manager_validator import (setup_explainer,
validate_explainer)
from rai_test_utils.datasets.tabular import \
create_binary_classification_dataset
from rai_test_utils.datasets.tabular import (
create_binary_classification_dataset, create_cancer_data,
create_housing_data)
from rai_test_utils.models.model_utils import (create_models_classification,
create_models_regression)
from rai_test_utils.models.sklearn import \
create_complex_classification_pipeline
from responsibleai import ModelTask, RAIInsights
from responsibleai._internal.constants import (ManagerNames,
SerializationAttributes)
@ -78,7 +78,7 @@ class TestRAIInsights(object):
ManagerNames.EXPLAINER])
def test_rai_insights_cancer(self, manager_type):
X_train, X_test, y_train, y_test, _, classes = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
models = create_models_classification(X_train, y_train)
X_train[LABELS] = y_train
X_test[LABELS] = y_test

Просмотреть файл

@ -4,11 +4,12 @@
import numpy as np
import pandas as pd
import pytest
from tests.common_utils import create_housing_data, create_iris_data
from tests.common_utils import create_iris_data
from erroranalysis._internal.constants import (PRED_Y, ROW_INDEX, TRUE_Y,
ModelTask)
from rai_test_utils.datasets.tabular import create_simple_titanic_data
from rai_test_utils.datasets.tabular import (create_housing_data,
create_simple_titanic_data)
from rai_test_utils.models.sklearn import (
create_sklearn_random_forest_regressor, create_sklearn_svm_classifier,
create_titanic_pipeline)

Просмотреть файл

@ -6,8 +6,9 @@ from tempfile import TemporaryDirectory
import pandas as pd
import pytest
from tests.common_utils import create_housing_data, create_iris_data
from tests.common_utils import create_iris_data
from rai_test_utils.datasets.tabular import create_housing_data
from rai_test_utils.models.sklearn import (
create_sklearn_random_forest_classifier,
create_sklearn_random_forest_regressor)

Просмотреть файл

@ -8,13 +8,13 @@ from tempfile import TemporaryDirectory
import numpy as np
import pandas as pd
import pytest
from tests.common_utils import (create_adult_income_dataset,
create_complex_classification_pipeline,
create_iris_data)
from tests.common_utils import create_adult_income_dataset, create_iris_data
from rai_test_utils.datasets.tabular import \
create_binary_classification_dataset
from rai_test_utils.models.lightgbm import create_lightgbm_classifier
from rai_test_utils.models.sklearn import \
create_complex_classification_pipeline
from responsibleai import ModelTask, RAIInsights
from responsibleai._internal.constants import (ManagerNames,
SerializationAttributes)

Просмотреть файл

@ -8,11 +8,11 @@ import numpy as np
import pandas as pd
import pytest
from lightgbm import LGBMClassifier
from tests.common_utils import (create_cancer_data, create_housing_data,
create_iris_data)
from tests.common_utils import create_iris_data
from rai_test_utils.datasets.tabular import \
create_binary_classification_dataset
from rai_test_utils.datasets.tabular import (
create_binary_classification_dataset, create_cancer_data,
create_housing_data)
from rai_test_utils.models.lightgbm import create_lightgbm_classifier
from rai_test_utils.models.sklearn import \
create_sklearn_random_forest_regressor
@ -157,7 +157,7 @@ class TestRAIInsightsValidations:
def test_validate_serializer(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
model = create_lightgbm_classifier(X_train, y_train)
X_train[TARGET] = y_train
@ -227,7 +227,7 @@ class TestRAIInsightsValidations:
def test_model_predictions_predict(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
X_train[TARGET] = y_train
X_test[TARGET] = y_test
@ -247,7 +247,7 @@ class TestRAIInsightsValidations:
def test_model_predictions_predict_proba(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
X_train[TARGET] = y_train
X_test[TARGET] = y_test
@ -269,7 +269,7 @@ class TestRAIInsightsValidations:
def test_incorrect_task_type(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
model = create_lightgbm_classifier(X_train, y_train)
X_train[TARGET] = y_train
@ -287,7 +287,7 @@ class TestRAIInsightsValidations:
def test_mismatch_train_test_features(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
model = create_lightgbm_classifier(X_train, y_train)
X_train[TARGET] = y_train
@ -344,7 +344,7 @@ class TestRAIInsightsValidations:
def test_unsupported_train_test_types(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
model = create_lightgbm_classifier(X_train, y_train)
X_train[TARGET] = y_train
@ -363,7 +363,7 @@ class TestRAIInsightsValidations:
def test_classes_exceptions(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
model = create_lightgbm_classifier(X_train, y_train)
X_train[TARGET] = y_train
@ -414,7 +414,7 @@ class TestRAIInsightsValidations:
def test_dataset_exception(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
model = create_lightgbm_classifier(X_train, y_train)
X_train_feature_names = X_train.columns.tolist()
@ -453,7 +453,7 @@ class TestRAIInsightsValidations:
def test_classes_passes(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
model = create_lightgbm_classifier(X_train, y_train)
X_train[TARGET] = y_train
@ -471,7 +471,7 @@ class TestRAIInsightsValidations:
def test_no_model_but_serializer_provided(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
X_train[TARGET] = y_train
X_test[TARGET] = y_test
@ -489,7 +489,7 @@ class TestRAIInsightsValidations:
def test_feature_metadata(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
model = create_lightgbm_classifier(X_train, y_train)
X_train[TARGET] = y_train

Просмотреть файл

@ -3,12 +3,11 @@
import pickle
from pathlib import Path
from rai_test_utils.datasets.tabular import create_cancer_data
from rai_test_utils.models.lightgbm import create_lightgbm_classifier
from responsibleai import RAIInsights
from responsibleai._internal.constants import SerializationAttributes
from .common_utils import create_cancer_data
class PickleSerializer:
def save(self, model, model_dir):
@ -35,7 +34,7 @@ class TestModelSerializer:
def test_init_with_pickle_serializer(self, tmpdir):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data()
create_cancer_data(return_dataframe=True)
model = create_lightgbm_classifier(X_train, y_train)
X_train['target'] = y_train