From cd2502a459925337e5020806f96c0d3d286eab91 Mon Sep 17 00:00:00 2001 From: Gaurav Gupta <47334368+gaugup@users.noreply.github.com> Date: Fri, 11 Aug 2023 11:01:56 -0700 Subject: [PATCH] Move missing value validations before unique value validations (#2228) Signed-off-by: Gaurav Gupta --- responsibleai/responsibleai/rai_insights/rai_insights.py | 8 ++++---- .../tests/rai_insights/test_rai_insights_validations.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/responsibleai/responsibleai/rai_insights/rai_insights.py b/responsibleai/responsibleai/rai_insights/rai_insights.py index 66bc170ea..1d0a999a0 100644 --- a/responsibleai/responsibleai/rai_insights/rai_insights.py +++ b/responsibleai/responsibleai/rai_insights/rai_insights.py @@ -534,6 +534,10 @@ class RAIInsights(RAIBaseInsights): raise UserConfigValidationException( f'Target name {target_column} not present in train/test data') + # Check if any of the data is missing in test and train data + self._validate_data_is_not_missing(test, "test") + self._validate_data_is_not_missing(train, "train") + categorical_features = feature_metadata.categorical_features if (categorical_features is not None and len(categorical_features) > 0): @@ -597,10 +601,6 @@ class RAIInsights(RAIBaseInsights): "identified as categorical features: " f"{non_categorical_or_time_string_columns}") - # Check if any of the data is missing in test and train data - self._validate_data_is_not_missing(test, "test") - self._validate_data_is_not_missing(train, "train") - self._validate_feature_metadata( feature_metadata, train, task_type, model, target_column) diff --git a/responsibleai/tests/rai_insights/test_rai_insights_validations.py b/responsibleai/tests/rai_insights/test_rai_insights_validations.py index baac6db2c..cf8a0aa49 100644 --- a/responsibleai/tests/rai_insights/test_rai_insights_validations.py +++ b/responsibleai/tests/rai_insights/test_rai_insights_validations.py @@ -398,7 +398,7 @@ class TestRAIInsightsValidations: str(ucve.value) def test_dirty_train_test_data(self): - X_train = pd.DataFrame(data=[['1', np.nan], ['2', '3']], + X_train = pd.DataFrame(data=[['1', 1], ['2', '3']], columns=['c1', 'c2']) y_train = np.array([1, 0]) X_test = pd.DataFrame(data=[['1', '2'], ['2', '3']],