Add `create_complex_regression_pipeline` to `sklearn_model_utils.py` (#2318)
* Add `create_complex_regression_pipeline` to `sklearn_model_utils.py` Signed-off-by: Gaurav Gupta <gaugup@microsoft.com> * Add version update Signed-off-by: Gaurav Gupta <gaugup@microsoft.com> * add pandas timestamps and test * Update test_data_processing_utils.py * Update test_data_processing_utils.py --------- Signed-off-by: Gaurav Gupta <gaugup@microsoft.com> Co-authored-by: Roman Lutz <romanlutz13@gmail.com>
This commit is contained in:
Родитель
61c70d09be
Коммит
6eea36c28b
|
@ -4,6 +4,7 @@
|
|||
"""Namespace for sklearn models."""
|
||||
|
||||
from .sklearn_model_utils import (create_complex_classification_pipeline,
|
||||
create_complex_regression_pipeline,
|
||||
create_kneighbors_classifier,
|
||||
create_sklearn_logistic_regressor,
|
||||
create_sklearn_random_forest_classifier,
|
||||
|
@ -18,5 +19,6 @@ __all__ = [
|
|||
"create_sklearn_random_forest_regressor",
|
||||
"create_sklearn_svm_classifier",
|
||||
"create_titanic_pipeline",
|
||||
"create_complex_classification_pipeline"
|
||||
"create_complex_classification_pipeline",
|
||||
"create_complex_regression_pipeline"
|
||||
]
|
||||
|
|
|
@ -181,3 +181,39 @@ def create_complex_classification_pipeline(
|
|||
pipeline = Pipeline(steps=[('preprocessor', transformations),
|
||||
('classifier', RandomForestClassifier())])
|
||||
return pipeline.fit(X_train, y_train)
|
||||
|
||||
|
||||
def create_complex_regression_pipeline(
|
||||
X_train, y_train, continuous_features, categorical_features):
|
||||
"""Create a complex sklearn pipeline for regression.
|
||||
|
||||
param X_train: The training data.
|
||||
type X_train: numpy.ndarray or pandas.DataFrame
|
||||
param y_train: The training labels.
|
||||
type y_train: numpy.ndarray or pandas.DataFrame
|
||||
param continuous_features: The continuous features.
|
||||
type continuous_features: list
|
||||
param categorical_features: The categorical features.
|
||||
type categorical_features: list
|
||||
return: A complex sklearn pipeline for regression.
|
||||
rtype: sklearn.pipeline.Pipeline
|
||||
"""
|
||||
# We create the preprocessing pipelines for both
|
||||
# numeric and categorical data.
|
||||
numeric_transformer = Pipeline(steps=[
|
||||
("imputer", SimpleImputer(strategy='median')),
|
||||
('scaler', StandardScaler())])
|
||||
|
||||
categorical_transformer = Pipeline(steps=[
|
||||
('onehot', OneHotEncoder(handle_unknown='ignore'))])
|
||||
|
||||
transformations = ColumnTransformer(
|
||||
transformers=[
|
||||
('num', numeric_transformer, continuous_features),
|
||||
('cat', categorical_transformer, categorical_features)])
|
||||
|
||||
# Append classifier to preprocessing pipeline.
|
||||
# Now we have a full prediction pipeline.
|
||||
pipeline = Pipeline(steps=[('preprocessor', transformations),
|
||||
('classifier', RandomForestRegressor())])
|
||||
return pipeline.fit(X_train, y_train)
|
||||
|
|
|
@ -4,5 +4,5 @@
|
|||
name = 'rai_test_utils'
|
||||
_major = '0'
|
||||
_minor = '4'
|
||||
_patch = '0'
|
||||
_patch = '1'
|
||||
version = '{}.{}.{}'.format(_major, _minor, _patch)
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# Licensed under the MIT License.
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from ml_wrappers import wrap_model
|
||||
|
||||
|
@ -13,8 +14,8 @@ from rai_test_utils.datasets.vision import (
|
|||
from rai_test_utils.models import (create_models_classification,
|
||||
create_models_object_detection,
|
||||
create_models_regression)
|
||||
from rai_test_utils.models.sklearn import \
|
||||
create_complex_classification_pipeline
|
||||
from rai_test_utils.models.sklearn import (
|
||||
create_complex_classification_pipeline, create_complex_regression_pipeline)
|
||||
|
||||
try:
|
||||
import torch # noqa: F401
|
||||
|
@ -47,6 +48,15 @@ class TestModelUtils:
|
|||
X_train, y_train, num_feature_names, cat_feature_names)
|
||||
assert pipeline.predict(X_test) is not None
|
||||
|
||||
def test_create_complex_regression_pipeline(self):
|
||||
X_train, X_test, y_train, y_test, num_feature_names, \
|
||||
= create_housing_data()
|
||||
X_train = pd.DataFrame(X_train, columns=num_feature_names)
|
||||
X_test = pd.DataFrame(X_test, columns=num_feature_names)
|
||||
pipeline = create_complex_regression_pipeline(
|
||||
X_train, y_train, num_feature_names, [])
|
||||
assert pipeline.predict(X_test) is not None
|
||||
|
||||
@pytest.mark.skipif(not pytorch_installed,
|
||||
reason="requires torch/torchvision")
|
||||
def test_object_detection_models(self):
|
||||
|
|
Загрузка…
Ссылка в новой задаче