Remove constraint on dependencies version (#523)

* remove constraint on dependencies version

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* fix attributeerror

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* uninstall hummingbird-ml

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* uninstall hummingbird-ml

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* newer version of scikit-learn

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* fix xgboost

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* lint

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* fix xgb

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* lint

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* add ONNXMLSub

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* update

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* fix index

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* fix missing import

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* tr

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* none

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* discrepencies

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* ci

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* yml

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* lint

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* replace by string.format

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

* comment

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

Co-authored-by: xavier dupré <xavier.dupre@gmail.com>
This commit is contained in:
Xavier Dupré 2021-06-18 21:00:19 +02:00 коммит произвёл GitHub
Родитель 0c9a71e32a
Коммит 5081f6acfa
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
14 изменённых файлов: 208 добавлений и 57 удалений

1
.github/workflows/pythonapp.yml поставляемый
Просмотреть файл

@ -53,6 +53,7 @@ jobs:
- name: Install basic dependencies
run: |
pip install .[tests] -f https://download.pytorch.org/whl/torch_stable.html
pip install git+https://github.com/onnx/sklearn-onnx.git
- name: Run basic tests without extra
run: pytest
- name: Coverage on basic tests without extra

Просмотреть файл

@ -624,6 +624,8 @@ def _parse_onnx_single_operator(topology, operator):
# Add the operator in the topology.
alias = get_onnxml_api_operator_name(operator.op_type)
if alias is None:
alias = "ONNXML" + operator.op_type
this_operator = topology.declare_logical_operator(alias, operator)
# Register the operator's inputs.

Просмотреть файл

@ -372,24 +372,37 @@ def convert(topology, backend, test_input, device, extra_config={}):
):
idx -= 1
assert idx >= 0, "Cannot detect container type. Please fill an issue at https://github.com/microsoft/hummingbird."
force_transformer = False
if idx < 0:
force_transformer = True
# If is a transformer, we need to check whether there is another operator type before.
# E.g., normalization after classification.
tmp_idx = idx
if operator_map[operators[idx].full_name].transformer:
while (
idx >= 0
and not operator_map[operators[idx].full_name].regression
and not operator_map[operators[idx].full_name].classification
and not operator_map[operators[idx].full_name].anomaly_detection
):
idx -= 1
if idx < 0:
idx = tmp_idx
if not force_transformer:
tmp_idx = idx
if operator_map[operators[idx].full_name].transformer:
while (
idx >= 0
and not operator_map[operators[idx].full_name].regression
and not operator_map[operators[idx].full_name].classification
and not operator_map[operators[idx].full_name].anomaly_detection
):
idx -= 1
if idx < 0:
idx = tmp_idx
# Get the proper container type.
if operator_map[operators[idx].full_name].regression:
if force_transformer or operator_map[operators[idx].full_name].transformer:
# We are just transforming the input data.
if backend == torch.jit.__name__:
container = TorchScriptSklearnContainerTransformer
elif backend == onnx.__name__:
container = ONNXSklearnContainerTransformer
elif backend == tvm_backend:
container = TVMSklearnContainerTransformer
else:
container = PyTorchSklearnContainerTransformer
elif operator_map[operators[idx].full_name].regression:
# We are doing a regression task.
if backend == torch.jit.__name__:
container = TorchScriptSklearnContainerRegression
@ -409,16 +422,6 @@ def convert(topology, backend, test_input, device, extra_config={}):
container = TVMSklearnContainerAnomalyDetection
else:
container = PyTorchSklearnContainerAnomalyDetection
elif operator_map[operators[idx].full_name].transformer:
# We are just transforming the input data.
if backend == torch.jit.__name__:
container = TorchScriptSklearnContainerTransformer
elif backend == onnx.__name__:
container = ONNXSklearnContainerTransformer
elif backend == tvm_backend:
container = TVMSklearnContainerTransformer
else:
container = PyTorchSklearnContainerTransformer
else:
# We are doing a classification task.
if backend == torch.jit.__name__:

Просмотреть файл

@ -127,7 +127,10 @@ def _convert_xgboost(model, backend, test_input, device, extra_config={}):
# XGBoostRegressor and Classifier have different APIs for extracting the number of features.
# In the former case we need to infer them from the test_input.
if "_features_count" in dir(model):
booster = model.get_booster() if hasattr(model, "get_booster") else model
if hasattr(booster, "num_features"):
extra_config[constants.N_FEATURES] = booster.num_features()
elif "_features_count" in dir(model):
extra_config[constants.N_FEATURES] = model._features_count
elif test_input is not None:
if type(test_input) is np.ndarray and len(test_input.shape) == 2:

Просмотреть файл

@ -7,7 +7,7 @@
"""
Base classes for scaler implementations.
"""
import numpy
import torch
from ._physical_operator import PhysicalOperator
@ -21,6 +21,11 @@ class Scaler(PhysicalOperator, torch.nn.Module):
def __init__(self, logical_operator, offset, scale, device):
super(Scaler, self).__init__(logical_operator, transformer=True)
if offset is None or len(offset.shape) == 0 or offset.shape == (0, ):
offset = numpy.array([0], dtype=numpy.float32)
if scale is None or len(scale.shape) == 0 or scale.shape == (0, ):
scale = numpy.array([1], dtype=numpy.float32)
self.offset = offset
self.scale = scale

Просмотреть файл

@ -66,6 +66,10 @@ def convert_sklearn_lgbm_classifier(operator, device, extra_config):
A PyTorch model
"""
assert operator is not None, "Cannot convert None operator"
if operator.raw_operator.boosting_type == 'rf':
raise RuntimeError(
"Unable to directly convert this model. "
"It should be converted into ONNX first.")
n_features = operator.raw_operator._n_features
tree_infos = operator.raw_operator.booster_.dump_model()["tree_info"]

Просмотреть файл

@ -84,6 +84,20 @@ class Add(PhysicalOperator, torch.nn.Module):
return torch.add(*x)
class Sub(PhysicalOperator, torch.nn.Module):
def __init__(self, logical_operator, val):
super(Sub, self).__init__(logical_operator)
if val is not None:
assert len(self.inputs) == 1, "Unexpected input length for Sub val"
self.val = torch.nn.Parameter(torch.FloatTensor(val), requires_grad=False)
def forward(self, *x):
if len(x) == 1:
return torch.sub(*x, self.val)
return torch.sub(*x)
class Less(PhysicalOperator, torch.nn.Module):
def __init__(self, logical_operator, val):
super(Less, self).__init__(logical_operator)
@ -135,10 +149,16 @@ class MatMul(PhysicalOperator, torch.nn.Module):
class Div(PhysicalOperator, torch.nn.Module):
def __init__(self, logical_operator):
def __init__(self, logical_operator, val):
super(Div, self).__init__(logical_operator)
if val is not None:
assert len(self.inputs) == 1, "Unexpected input length for Div val"
self.val = torch.nn.Parameter(torch.FloatTensor(val), requires_grad=False)
def forward(self, *x):
if len(x) == 1:
return torch.div(*x, self.val)
return torch.div(*x)
@ -272,6 +292,35 @@ def convert_onnx_add(operator, device=None, extra_config={}):
return Add(operator, val)
def convert_onnx_sub(operator, device=None, extra_config={}):
"""
Converter for `ai.onnx.Sub`.
Args:
operator: An operator wrapping a `ai.onnx.Sub` model
device: String defining the type of device the converted operator should be run on
extra_config: Extra configuration used to select the best conversion strategy
Returns:
A PyTorch model
"""
assert operator is not None
initializers = extra_config[constants.ONNX_INITIALIZERS]
val = None
if operator.raw_operator.origin.input[1] in initializers:
init = initializers[operator.raw_operator.origin.input[1]]
if init.data_type == 11:
val = list(init.double_data)
elif init.data_type == 1:
val = list(init.float_data)
else:
raise TypeError("Data type %r not supported for initializer %r." % (init.data_type, init))
# Generate the model.
return Sub(operator, val)
def convert_onnx_neg(operator, device=None, extra_config={}):
"""
Converter for `ai.onnx.Neg`.
@ -366,8 +415,19 @@ def convert_onnx_div(operator, device=None, extra_config={}):
"""
assert operator is not None
initializers = extra_config[constants.ONNX_INITIALIZERS]
val = None
if operator.raw_operator.origin.input[1] in initializers:
init = initializers[operator.raw_operator.origin.input[1]]
if init.data_type == 11:
val = list(init.double_data)
elif init.data_type == 1:
val = list(init.float_data)
else:
raise TypeError("Data type %r not supported for initializer %r." % (init.data_type, init))
# Generate the model.
return Div(operator)
return Div(operator, val)
def convert_onnx_less(operator, device=None, extra_config={}):
@ -402,4 +462,5 @@ register_converter("ONNXMLMatMul", convert_onnx_mat_mul)
register_converter("ONNXMLMul", convert_onnx_mul)
register_converter("ONNXMLNeg", convert_onnx_neg)
register_converter("ONNXMLReshape", convert_onnx_reshape)
register_converter("ONNXMLSub", convert_onnx_sub)
register_converter("ONNXMLSum", convert_onnx_sum)

Просмотреть файл

@ -40,6 +40,9 @@ def _get_tree_infos_from_onnx_ml_operator(model):
threshold = attr.floats
elif attr.name == "class_weights" or attr.name == "target_weights":
values = attr.floats
if len(values) == 0:
raise TypeError(
"Type mismatch with attribute {}.".format(attr))
elif attr.name == "class_nodeids" or attr.name == "target_nodeids":
target_node_ids = attr.ints
elif attr.name == "class_treeids" or attr.name == "target_treeids":
@ -62,6 +65,7 @@ def _get_tree_infos_from_onnx_ml_operator(model):
raise AssertionError("Modality {} not supported".format(mode))
is_decision_tree = post_transform == "NONE"
# Order values based on target node and tree ids.
new_values = []
n_classes = 1 if classes is None or not is_decision_tree else len(classes)

Просмотреть файл

@ -16,6 +16,14 @@ from .._gbdt_commons import convert_gbdt_common, convert_gbdt_classifier_common
from .._tree_commons import get_parameters_for_sklearn_common, get_parameters_for_tree_trav_sklearn, TreeParameters
def _get_n_features(model):
try:
return model.n_features_
except AttributeError:
# HistGradientBoosting
return model._n_features
def _get_parameters_hist_gbdt(trees):
"""
Extract the tree parameters from SklearnHistGradientBoostingClassifier trees
@ -24,7 +32,11 @@ def _get_parameters_hist_gbdt(trees):
Returns: The tree parameters wrapped into an instance of `operator_converters._tree_commons_TreeParameters`
"""
features = [n["feature_idx"] for n in trees.nodes]
thresholds = [n["threshold"] if n["threshold"] != 0 else -1 for n in trees.nodes]
try:
thresholds = [n["threshold"] if n["threshold"] != 0 else -1 for n in trees.nodes]
except ValueError:
# newer version of scikit-learn
thresholds = [n["num_threshold"] if n["num_threshold"] != 0 else -1 for n in trees.nodes]
lefts = [n["left"] if n["left"] != 0 else -1 for n in trees.nodes]
rights = [n["right"] if n["right"] != 0 else -1 for n in trees.nodes]
values = [[n["value"]] if n["value"] != 0 else [-1] for n in trees.nodes]
@ -54,7 +66,7 @@ def convert_sklearn_gbdt_classifier(operator, device, extra_config):
# GBDT does not normalize values upfront, we have to do it.
extra_config[constants.GET_PARAMETERS_FOR_TREE_TRAVERSAL] = get_parameters_for_tree_trav_sklearn
n_features = operator.raw_operator.n_features_
n_features = _get_n_features(operator.raw_operator)
classes = operator.raw_operator.classes_.tolist()
n_classes = len(classes)
@ -111,7 +123,7 @@ def convert_sklearn_gbdt_regressor(operator, device, extra_config):
# Get tree information out of the operator.
tree_infos = operator.raw_operator.estimators_.ravel().tolist()
n_features = operator.raw_operator.n_features_
n_features = _get_n_features(operator.raw_operator)
extra_config[constants.LEARNING_RATE] = operator.raw_operator.learning_rate
# For sklearn models we need to massage the parameters a bit before generating the parameters for tree_trav.
extra_config[constants.GET_PARAMETERS_FOR_TREE_TRAVERSAL] = get_parameters_for_tree_trav_sklearn
@ -145,7 +157,7 @@ def convert_sklearn_hist_gbdt_classifier(operator, device, extra_config):
# Get tree information out of the operator.
tree_infos = operator.raw_operator._predictors
n_features = operator.raw_operator.n_features_
n_features = _get_n_features(operator.raw_operator)
classes = operator.raw_operator.classes_.tolist()
n_classes = len(classes)
@ -189,7 +201,7 @@ def convert_sklearn_hist_gbdt_regressor(operator, device, extra_config):
# Get tree information out of the operator.
tree_infos = operator.raw_operator._predictors
tree_infos = [tree_infos[i][0] for i in range(len(tree_infos))]
n_features = operator.raw_operator.n_features_
n_features = _get_n_features(operator.raw_operator)
extra_config[constants.BASE_PREDICTION] = [[operator.raw_operator._baseline_prediction]]
return convert_gbdt_common(operator, tree_infos, _get_parameters_hist_gbdt, n_features, None, extra_config)

Просмотреть файл

@ -22,23 +22,23 @@ with open(README) as f:
long_description = long_description[start_pos:]
install_requires = [
"numpy>=1.15,<=1.20.*",
"onnxconverter-common>=1.6.0,<=1.7.0",
"scipy<=1.5.4",
"scikit-learn>=0.21.3,<=0.23.2",
"torch>=1.4.*,<=1.8.1",
"numpy>=1.15",
"onnxconverter-common>=1.6.0",
"scipy",
"scikit-learn>=0.21.3",
"torch>=1.4",
"psutil",
"dill",
]
onnx_requires = [
"onnxruntime>=1.0.0,<1.7.0",
"onnxruntime>=1.0.0",
"onnxmltools>=1.6.0",
"skl2onnx<=1.7.0",
"skl2onnx>=1.7.0",
]
extra_requires = [
# The need each for these depends on which libraries you plan to convert from
"xgboost>=0.90,<1.4.0",
"lightgbm>=2.2,<3",
"xgboost>=0.90",
"lightgbm>=2.2",
]
setup(
name="hummingbird-ml",

Просмотреть файл

@ -14,6 +14,7 @@ from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from onnxconverter_common.data_types import (
FloatTensorType,
DoubleTensorType,
@ -34,6 +35,10 @@ from hummingbird.ml.exceptions import MissingBackend
if onnx_ml_tools_installed():
from onnxmltools.convert import convert_sklearn
try:
from skl2onnx.sklapi import CastTransformer
except ImportError:
CastTransformer = None
if sparkml_installed():
import pyspark
@ -514,7 +519,13 @@ class TestBackends(unittest.TestCase):
warnings.filterwarnings("ignore")
max_depth = 10
num_classes = 2
model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
if CastTransformer is None:
model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
else:
# newer version of sklearn-onnx
model = make_pipeline(
CastTransformer(dtype=np.float32),
GradientBoostingClassifier(n_estimators=10, max_depth=max_depth))
np.random.seed(0)
X = np.random.rand(100, 200)
y = np.random.randint(num_classes, size=100)
@ -536,7 +547,13 @@ class TestBackends(unittest.TestCase):
warnings.filterwarnings("ignore")
max_depth = 10
num_classes = 2
model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
if CastTransformer is None:
model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
else:
# newer version of sklearn-onnx
model = make_pipeline(
CastTransformer(dtype=np.float32),
GradientBoostingClassifier(n_estimators=10, max_depth=max_depth))
np.random.seed(0)
X = np.random.rand(100, 200)
y = np.random.randint(num_classes, size=100)
@ -545,7 +562,7 @@ class TestBackends(unittest.TestCase):
# Create ONNX-ML model
onnx_ml_model = convert_sklearn(
model, initial_types=[("input", DoubleTensorType([X.shape[0], X.shape[1]]))], target_opset=11
model, initial_types=[("input", DoubleTensorType([None, X.shape[1]]))], target_opset=11
)
# Test onnx requires no test_data

Просмотреть файл

@ -2,12 +2,11 @@
Tests extra configurations.
"""
from distutils.version import LooseVersion
import psutil
import unittest
import warnings
import sys
import numpy as np
import psutil
from onnxconverter_common.data_types import FloatTensorType, DoubleTensorType
from sklearn import datasets
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, IsolationForest
@ -31,6 +30,10 @@ if lightgbm_installed():
if onnx_ml_tools_installed():
from onnxmltools.convert import convert_sklearn, convert_lightgbm
try:
from skl2onnx.sklapi import CastTransformer
except ImportError:
CastTransformer = None
class TestExtraConf(unittest.TestCase):
@ -696,12 +699,22 @@ class TestExtraConf(unittest.TestCase):
columns = ["vA", "vB", "vC"]
X_train = pandas.DataFrame(X, columns=columns)
pipeline = Pipeline(
steps=[
("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
]
)
if CastTransformer is None:
pipeline = Pipeline(
steps=[
("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
]
)
else:
# newer version of sklearn-onnx
pipeline = Pipeline(
steps=[
("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
('cast', CastTransformer(dtype=np.float32)),
("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
]
)
pipeline.fit(X_train, y)

Просмотреть файл

@ -225,9 +225,16 @@ class TestLGBMConverter(unittest.TestCase):
def test_float64_lgbm_perf_tree_trav_regressor_converter(self):
self._run_float64_lgbm_regressor_converter(1000, extra_config={"tree_implementation": "perf_tree_trav"})
# Random forest in lgbm
# Random forest in lgbm, the conversion fails with the latest
# version of lightgbm. The direct converter to pytorch should be
# updated or the model could be converted into ONNX then
# converted into pytorch.
# For more details, see ONNX converter at https://github.com/onnx/
# onnxmltools/blob/master/onnxmltools/convert/lightgbm/
# operator_converters/LightGbm.py#L313.
@unittest.skipIf(True, reason="boosting_type=='rf' produces different probabilites.")
@unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed")
def test_lgbm_classifier_random_forest(self):
def test_lgbm_classifier_random_forest_rf(self):
warnings.filterwarnings("ignore")
model = lgb.LGBMClassifier(boosting_type="rf", n_estimators=128, max_depth=5, subsample=0.3, bagging_freq=1)
@ -242,6 +249,23 @@ class TestLGBMConverter(unittest.TestCase):
self.assertIsNotNone(torch_model)
np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06)
# Random forest in lgbm
@unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed")
def test_lgbm_classifier_random_forest_gbdt(self):
warnings.filterwarnings("ignore")
model = lgb.LGBMClassifier(boosting_type="gbdt", n_estimators=128, max_depth=5, subsample=0.3, bagging_freq=1)
np.random.seed(0)
X = np.random.rand(100, 200)
X = np.array(X, dtype=np.float32)
y = np.random.randint(2, size=100)
model.fit(X, y)
torch_model = hummingbird.ml.convert(model, "torch")
self.assertIsNotNone(torch_model)
np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06)
# Test Tweedie loss in lgbm
@unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed")
def test_lgbm_tweedie(self):

Просмотреть файл

@ -26,7 +26,9 @@ class TestONNXScaler(unittest.TestCase):
model.fit(X)
# Create ONNX-ML model
onnx_ml_model = convert_sklearn(model, initial_types=[("float_input", FloatTensorType(X.shape))])
onnx_ml_model = convert_sklearn(
model, initial_types=[("float_input", FloatTensorType([None, X.shape[1]]))]
)
# Create ONNX model by calling converter
onnx_model = convert(onnx_ml_model, "onnx", X)
@ -132,7 +134,6 @@ class TestONNXScaler(unittest.TestCase):
# Generate test input
onnx_ml_model = convert_sklearn(model, initial_types=[("float_input", FloatTensorType(X.shape))])
print(onnx_ml_model.graph.node[0].attribute[0].name)
onnx_ml_model.graph.node[0].attribute[0].name = "".encode()
self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X)
@ -150,7 +151,8 @@ class TestONNXScaler(unittest.TestCase):
model.fit(X)
# Generate test input
onnx_ml_model = convert_sklearn(model, initial_types=[("double_input", DoubleTensorType(X.shape))])
onnx_ml_model = convert_sklearn(
model, initial_types=[("double_input", DoubleTensorType([None, X.shape[1]]))])
# Create ONNX model by calling converter
onnx_model = convert(onnx_ml_model, "onnx", X)