Add `create_complex_regression_pipeline` to `sklearn_model_utils.py` (#2318)

* Add `create_complex_regression_pipeline` to `sklearn_model_utils.py`

Signed-off-by: Gaurav Gupta <gaugup@microsoft.com>

* Add version update

Signed-off-by: Gaurav Gupta <gaugup@microsoft.com>

* add pandas timestamps and test

* Update test_data_processing_utils.py

* Update test_data_processing_utils.py

---------

Signed-off-by: Gaurav Gupta <gaugup@microsoft.com>
Co-authored-by: Roman Lutz <romanlutz13@gmail.com>
This commit is contained in:
Gaurav Gupta 2023-09-07 15:48:19 -07:00 коммит произвёл GitHub
Родитель 61c70d09be
Коммит 6eea36c28b
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 52 добавлений и 4 удалений

Просмотреть файл

@ -4,6 +4,7 @@
"""Namespace for sklearn models."""
from .sklearn_model_utils import (create_complex_classification_pipeline,
create_complex_regression_pipeline,
create_kneighbors_classifier,
create_sklearn_logistic_regressor,
create_sklearn_random_forest_classifier,
@ -18,5 +19,6 @@ __all__ = [
"create_sklearn_random_forest_regressor",
"create_sklearn_svm_classifier",
"create_titanic_pipeline",
"create_complex_classification_pipeline"
"create_complex_classification_pipeline",
"create_complex_regression_pipeline"
]

Просмотреть файл

@ -181,3 +181,39 @@ def create_complex_classification_pipeline(
pipeline = Pipeline(steps=[('preprocessor', transformations),
('classifier', RandomForestClassifier())])
return pipeline.fit(X_train, y_train)
def create_complex_regression_pipeline(
X_train, y_train, continuous_features, categorical_features):
"""Create a complex sklearn pipeline for regression.
param X_train: The training data.
type X_train: numpy.ndarray or pandas.DataFrame
param y_train: The training labels.
type y_train: numpy.ndarray or pandas.DataFrame
param continuous_features: The continuous features.
type continuous_features: list
param categorical_features: The categorical features.
type categorical_features: list
return: A complex sklearn pipeline for regression.
rtype: sklearn.pipeline.Pipeline
"""
# We create the preprocessing pipelines for both
# numeric and categorical data.
numeric_transformer = Pipeline(steps=[
("imputer", SimpleImputer(strategy='median')),
('scaler', StandardScaler())])
categorical_transformer = Pipeline(steps=[
('onehot', OneHotEncoder(handle_unknown='ignore'))])
transformations = ColumnTransformer(
transformers=[
('num', numeric_transformer, continuous_features),
('cat', categorical_transformer, categorical_features)])
# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
pipeline = Pipeline(steps=[('preprocessor', transformations),
('classifier', RandomForestRegressor())])
return pipeline.fit(X_train, y_train)

Просмотреть файл

@ -4,5 +4,5 @@
name = 'rai_test_utils'
_major = '0'
_minor = '4'
_patch = '0'
_patch = '1'
version = '{}.{}.{}'.format(_major, _minor, _patch)

Просмотреть файл

@ -2,6 +2,7 @@
# Licensed under the MIT License.
import numpy as np
import pandas as pd
import pytest
from ml_wrappers import wrap_model
@ -13,8 +14,8 @@ from rai_test_utils.datasets.vision import (
from rai_test_utils.models import (create_models_classification,
create_models_object_detection,
create_models_regression)
from rai_test_utils.models.sklearn import \
create_complex_classification_pipeline
from rai_test_utils.models.sklearn import (
create_complex_classification_pipeline, create_complex_regression_pipeline)
try:
import torch # noqa: F401
@ -47,6 +48,15 @@ class TestModelUtils:
X_train, y_train, num_feature_names, cat_feature_names)
assert pipeline.predict(X_test) is not None
def test_create_complex_regression_pipeline(self):
X_train, X_test, y_train, y_test, num_feature_names, \
= create_housing_data()
X_train = pd.DataFrame(X_train, columns=num_feature_names)
X_test = pd.DataFrame(X_test, columns=num_feature_names)
pipeline = create_complex_regression_pipeline(
X_train, y_train, num_feature_names, [])
assert pipeline.predict(X_test) is not None
@pytest.mark.skipif(not pytorch_installed,
reason="requires torch/torchvision")
def test_object_detection_models(self):