Add basic support for scikit-learn pipeline (#251)
* Add support for SKL pipelines
This commit is contained in:
Родитель
905f8f36b0
Коммит
39a79be15c
|
@ -62,6 +62,7 @@ jobs:
|
|||
- name: Install extra dependencies
|
||||
run: |
|
||||
pip install .[extra,onnx]
|
||||
pip install pandas
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/convert.py#L0-L238" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/convert.py#L0-L246" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python"># -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
|
@ -193,9 +193,9 @@ def _convert_onnxml(model, backend, test_input, device, extra_config={}):
|
|||
type(initial_types[0][1])
|
||||
)
|
||||
)
|
||||
else:
|
||||
extra_config[constants.N_FEATURES] = np.array(test_input).shape[1]
|
||||
extra_config[constants.TEST_INPUT] = test_input
|
||||
extra_config[constants.TEST_INPUT] = test_input
|
||||
elif constants.N_FEATURES not in extra_config:
|
||||
extra_config[constants.N_FEATURES] = test_input.shape[1]
|
||||
|
||||
# Set the initializers. Some converter requires the access to initializers.
|
||||
initializers = {} if model.graph.initializer is None else {in_.name: in_ for in_ in model.graph.initializer}
|
||||
|
@ -249,6 +249,14 @@ def convert(model, backend, test_input=None, device="cpu", extra_config=
|
|||
if test_input is not None and constants.TEST_INPUT not in extra_config:
|
||||
extra_config[constants.TEST_INPUT] = test_input
|
||||
|
||||
# Fix the test_input type
|
||||
if constants.TEST_INPUT in extra_config:
|
||||
if type(extra_config[constants.TEST_INPUT]) == list:
|
||||
extra_config[constants.TEST_INPUT] = np.array(extra_config[constants.TEST_INPUT])
|
||||
elif type(extra_config[constants.TEST_INPUT]) == tuple:
|
||||
extra_config[constants.N_FEATURES] = len(extra_config[constants.TEST_INPUT])
|
||||
test_input = extra_config[constants.TEST_INPUT]
|
||||
|
||||
# We do some normalization on backends.
|
||||
backend = backend.lower()
|
||||
backend = backends[backend]
|
||||
|
@ -310,7 +318,7 @@ The set of supported extra configurations can be found at <code><a title="hummin
|
|||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/convert.py#L182-L239" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/convert.py#L182-L247" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">def convert(model, backend, test_input=None, device="cpu", extra_config={}):
|
||||
"""
|
||||
|
@ -352,6 +360,14 @@ The set of supported extra configurations can be found at <code><a title="hummin
|
|||
if test_input is not None and constants.TEST_INPUT not in extra_config:
|
||||
extra_config[constants.TEST_INPUT] = test_input
|
||||
|
||||
# Fix the test_input type
|
||||
if constants.TEST_INPUT in extra_config:
|
||||
if type(extra_config[constants.TEST_INPUT]) == list:
|
||||
extra_config[constants.TEST_INPUT] = np.array(extra_config[constants.TEST_INPUT])
|
||||
elif type(extra_config[constants.TEST_INPUT]) == tuple:
|
||||
extra_config[constants.N_FEATURES] = len(extra_config[constants.TEST_INPUT])
|
||||
test_input = extra_config[constants.TEST_INPUT]
|
||||
|
||||
# We do some normalization on backends.
|
||||
backend = backend.lower()
|
||||
backend = backends[backend]
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/__init__.py#L0-L48" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/__init__.py#L0-L49" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python"># -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
|
@ -62,6 +62,7 @@ from .sklearn import iforest # noqa: E402
|
|||
from .sklearn import linear as sklearn_linear # noqa: E402
|
||||
from .sklearn import normalizer as sklearn_normalizer # noqa: E402
|
||||
from .sklearn import one_hot_encoder as sklearn_ohe # noqa: E402
|
||||
from .sklearn import pipeline # noqa: E402
|
||||
from .sklearn import scaler as sklearn_scaler # noqa: E402
|
||||
from .sklearn import sv # noqa: E402
|
||||
from . import lightgbm # noqa: E402
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L0-L115" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L0-L120" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python"># -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
|
@ -55,6 +55,8 @@ class Cast(BaseOperator, torch.nn.Module):
|
|||
self.to_type = to_type
|
||||
|
||||
def forward(self, x):
|
||||
if self.to_type == 1: # Cast to float
|
||||
return x.float()
|
||||
if self.to_type == 7: # Cast to long
|
||||
return x.long()
|
||||
|
||||
|
@ -64,7 +66,10 @@ class Concat(BaseOperator, torch.nn.Module):
|
|||
super(Concat, self).__init__()
|
||||
|
||||
def forward(self, *x):
|
||||
return torch.cat(x, dim=1)
|
||||
if len(x[0].shape) > 1:
|
||||
return torch.cat(x, dim=1)
|
||||
else:
|
||||
return torch.stack(x, dim=1)
|
||||
|
||||
|
||||
class Reshape(BaseOperator, torch.nn.Module):
|
||||
|
@ -175,7 +180,7 @@ register_converter("ONNXMLReshape", convert_onnx_reshape)</code></pre>
|
|||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L50-L71" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L55-L76" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">def convert_onnx_cast(operator, device=None, extra_config={}):
|
||||
"""
|
||||
|
@ -223,7 +228,7 @@ register_converter("ONNXMLReshape", convert_onnx_reshape)</code></pre>
|
|||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L74-L89" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L79-L94" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">def convert_onnx_concat(operator, device=None, extra_config={}):
|
||||
"""
|
||||
|
@ -265,7 +270,7 @@ register_converter("ONNXMLReshape", convert_onnx_reshape)</code></pre>
|
|||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L92-L111" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L97-L116" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">def convert_onnx_reshape(operator, device=None, extra_config={}):
|
||||
"""
|
||||
|
@ -303,7 +308,7 @@ register_converter("ONNXMLReshape", convert_onnx_reshape)</code></pre>
|
|||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L19-L29" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L19-L31" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">class Cast(BaseOperator, torch.nn.Module):
|
||||
def __init__(self, to_type):
|
||||
|
@ -314,6 +319,8 @@ register_converter("ONNXMLReshape", convert_onnx_reshape)</code></pre>
|
|||
self.to_type = to_type
|
||||
|
||||
def forward(self, x):
|
||||
if self.to_type == 1: # Cast to float
|
||||
return x.float()
|
||||
if self.to_type == 7: # Cast to long
|
||||
return x.long()</code></pre>
|
||||
</details>
|
||||
|
@ -326,24 +333,18 @@ register_converter("ONNXMLReshape", convert_onnx_reshape)</code></pre>
|
|||
<h3>Methods</h3>
|
||||
<dl>
|
||||
<dt id="hummingbird.ml.operator_converters.onnx.onnx_operator.Cast.forward"><code class="name flex">
|
||||
<span>def <span class="ident">forward</span></span>(<span>self, x)</span>
|
||||
<span>def <span class="ident">forward</span></span>(<span>self, x) -> Callable[..., Any]</span>
|
||||
</code></dt>
|
||||
<dd>
|
||||
<div class="desc"><p>Defines the computation performed at every call.</p>
|
||||
<p>Should be overridden by all subclasses.</p>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>Although the recipe for forward pass needs to be defined within
|
||||
this function, one should call the :class:<code>Module</code> instance afterwards
|
||||
instead of this since the former takes care of running the
|
||||
registered hooks while the latter silently ignores them.</p>
|
||||
</div></div>
|
||||
<div class="desc"></div>
|
||||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L27-L29" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L27-L31" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">def forward(self, x):
|
||||
if self.to_type == 1: # Cast to float
|
||||
return x.float()
|
||||
if self.to_type == 7: # Cast to long
|
||||
return x.long()</code></pre>
|
||||
</details>
|
||||
|
@ -358,14 +359,17 @@ registered hooks while the latter silently ignores them.</p>
|
|||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L32-L37" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L34-L42" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">class Concat(BaseOperator, torch.nn.Module):
|
||||
def __init__(self):
|
||||
super(Concat, self).__init__()
|
||||
|
||||
def forward(self, *x):
|
||||
return torch.cat(x, dim=1)</code></pre>
|
||||
if len(x[0].shape) > 1:
|
||||
return torch.cat(x, dim=1)
|
||||
else:
|
||||
return torch.stack(x, dim=1)</code></pre>
|
||||
</details>
|
||||
<h3>Ancestors</h3>
|
||||
<ul class="hlist">
|
||||
|
@ -376,25 +380,20 @@ registered hooks while the latter silently ignores them.</p>
|
|||
<h3>Methods</h3>
|
||||
<dl>
|
||||
<dt id="hummingbird.ml.operator_converters.onnx.onnx_operator.Concat.forward"><code class="name flex">
|
||||
<span>def <span class="ident">forward</span></span>(<span>self, *x)</span>
|
||||
<span>def <span class="ident">forward</span></span>(<span>self, *x) -> Callable[..., Any]</span>
|
||||
</code></dt>
|
||||
<dd>
|
||||
<div class="desc"><p>Defines the computation performed at every call.</p>
|
||||
<p>Should be overridden by all subclasses.</p>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>Although the recipe for forward pass needs to be defined within
|
||||
this function, one should call the :class:<code>Module</code> instance afterwards
|
||||
instead of this since the former takes care of running the
|
||||
registered hooks while the latter silently ignores them.</p>
|
||||
</div></div>
|
||||
<div class="desc"></div>
|
||||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L36-L37" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L38-L42" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">def forward(self, *x):
|
||||
return torch.cat(x, dim=1)</code></pre>
|
||||
if len(x[0].shape) > 1:
|
||||
return torch.cat(x, dim=1)
|
||||
else:
|
||||
return torch.stack(x, dim=1)</code></pre>
|
||||
</details>
|
||||
</dd>
|
||||
</dl>
|
||||
|
@ -408,7 +407,7 @@ registered hooks while the latter silently ignores them.</p>
|
|||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L40-L47" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L45-L52" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">class Reshape(BaseOperator, torch.nn.Module):
|
||||
def __init__(self, shape):
|
||||
|
@ -428,22 +427,14 @@ registered hooks while the latter silently ignores them.</p>
|
|||
<h3>Methods</h3>
|
||||
<dl>
|
||||
<dt id="hummingbird.ml.operator_converters.onnx.onnx_operator.Reshape.forward"><code class="name flex">
|
||||
<span>def <span class="ident">forward</span></span>(<span>self, x)</span>
|
||||
<span>def <span class="ident">forward</span></span>(<span>self, x) -> Callable[..., Any]</span>
|
||||
</code></dt>
|
||||
<dd>
|
||||
<div class="desc"><p>Defines the computation performed at every call.</p>
|
||||
<p>Should be overridden by all subclasses.</p>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>Although the recipe for forward pass needs to be defined within
|
||||
this function, one should call the :class:<code>Module</code> instance afterwards
|
||||
instead of this since the former takes care of running the
|
||||
registered hooks while the latter silently ignores them.</p>
|
||||
</div></div>
|
||||
<div class="desc"></div>
|
||||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L46-L47" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/onnx/onnx_operator.py#L51-L52" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">def forward(self, x):
|
||||
return torch.reshape(x, self.shape)</code></pre>
|
||||
|
|
|
@ -158,18 +158,10 @@ register_converter("SklearnBinarizer", convert_sklearn_binarizer)</code>
|
|||
<h3>Methods</h3>
|
||||
<dl>
|
||||
<dt id="hummingbird.ml.operator_converters.sklearn.binarizer.Binarizer.forward"><code class="name flex">
|
||||
<span>def <span class="ident">forward</span></span>(<span>self, x)</span>
|
||||
<span>def <span class="ident">forward</span></span>(<span>self, x) -> Callable[..., Any]</span>
|
||||
</code></dt>
|
||||
<dd>
|
||||
<div class="desc"><p>Defines the computation performed at every call.</p>
|
||||
<p>Should be overridden by all subclasses.</p>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>Although the recipe for forward pass needs to be defined within
|
||||
this function, one should call the :class:<code>Module</code> instance afterwards
|
||||
instead of this since the former takes care of running the
|
||||
registered hooks while the latter silently ignores them.</p>
|
||||
</div></div>
|
||||
<div class="desc"></div>
|
||||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
|
|
|
@ -74,6 +74,10 @@ All scikit-learn operators converters are stored under this package.
|
|||
<dd>
|
||||
<div class="desc"><p>Converter for scikit-learn one hot encoder.</p></div>
|
||||
</dd>
|
||||
<dt><code class="name"><a title="hummingbird.ml.operator_converters.sklearn.pipeline" href="pipeline.html">hummingbird.ml.operator_converters.sklearn.pipeline</a></code></dt>
|
||||
<dd>
|
||||
<div class="desc"><p>Converters for operators necessary for supporting scikit-learn Pipelines.</p></div>
|
||||
</dd>
|
||||
<dt><code class="name"><a title="hummingbird.ml.operator_converters.sklearn.scaler" href="scaler.html">hummingbird.ml.operator_converters.sklearn.scaler</a></code></dt>
|
||||
<dd>
|
||||
<div class="desc"><p>Converters for scikit-learn scalers: RobustScaler, MaxAbsScaler, MinMaxScaler, StandardScaler.</p></div>
|
||||
|
@ -117,6 +121,7 @@ All scikit-learn operators converters are stored under this package.
|
|||
<li><code><a title="hummingbird.ml.operator_converters.sklearn.linear" href="linear.html">hummingbird.ml.operator_converters.sklearn.linear</a></code></li>
|
||||
<li><code><a title="hummingbird.ml.operator_converters.sklearn.normalizer" href="normalizer.html">hummingbird.ml.operator_converters.sklearn.normalizer</a></code></li>
|
||||
<li><code><a title="hummingbird.ml.operator_converters.sklearn.one_hot_encoder" href="one_hot_encoder.html">hummingbird.ml.operator_converters.sklearn.one_hot_encoder</a></code></li>
|
||||
<li><code><a title="hummingbird.ml.operator_converters.sklearn.pipeline" href="pipeline.html">hummingbird.ml.operator_converters.sklearn.pipeline</a></code></li>
|
||||
<li><code><a title="hummingbird.ml.operator_converters.sklearn.scaler" href="scaler.html">hummingbird.ml.operator_converters.sklearn.scaler</a></code></li>
|
||||
<li><code><a title="hummingbird.ml.operator_converters.sklearn.sv" href="sv.html">hummingbird.ml.operator_converters.sklearn.sv</a></code></li>
|
||||
</ul>
|
||||
|
|
|
@ -60,7 +60,7 @@ def convert_sklearn_linear_model(operator, device, extra_config):
|
|||
"""
|
||||
classes = [0] if not hasattr(operator.raw_operator, "classes_") else operator.raw_operator.classes_
|
||||
|
||||
if not all([type(x) in [int, np.int32, np.int64] for x in classes]):
|
||||
if not all(["int" in str(type(x)) for x in classes]):
|
||||
raise RuntimeError(
|
||||
"Hummingbird currently supports only integer labels for class labels. Please file an issue at https://github.com/microsoft/hummingbird."
|
||||
)
|
||||
|
@ -153,7 +153,7 @@ register_converter("SklearnLogisticRegressionCV", convert_sklearn_linear
|
|||
"""
|
||||
classes = [0] if not hasattr(operator.raw_operator, "classes_") else operator.raw_operator.classes_
|
||||
|
||||
if not all([type(x) in [int, np.int32, np.int64] for x in classes]):
|
||||
if not all(["int" in str(type(x)) for x in classes]):
|
||||
raise RuntimeError(
|
||||
"Hummingbird currently supports only integer labels for class labels. Please file an issue at https://github.com/microsoft/hummingbird."
|
||||
)
|
||||
|
|
|
@ -0,0 +1,416 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" />
|
||||
<meta name="generator" content="pdoc 0.8.1" />
|
||||
<title>hummingbird.ml.operator_converters.sklearn.pipeline API documentation</title>
|
||||
<meta name="description" content="Converters for operators necessary for supporting scikit-learn Pipelines." />
|
||||
<link href='https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.0/normalize.min.css' rel='stylesheet'>
|
||||
<link href='https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/8.0.0/sanitize.min.css' rel='stylesheet'>
|
||||
<link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" rel="stylesheet">
|
||||
<style>.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
|
||||
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style>
|
||||
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
|
||||
<style>.homelink{display:block;font-size:2em;font-weight:bold;color:#555;padding-bottom:.5em;border-bottom:1px solid silver}.homelink:hover{color:inherit}.homelink img{max-width:20%;max-height:5em;margin:auto;margin-bottom:.3em}</style>
|
||||
<link rel="canonical" href="https://microsoft.github.io/hummingbird/ml/operator_converters/sklearn/pipeline.html">
|
||||
</head>
|
||||
<body>
|
||||
<main>
|
||||
<article id="content">
|
||||
<header>
|
||||
<h1 class="title">Module <code>hummingbird.ml.operator_converters.sklearn.pipeline</code></h1>
|
||||
</header>
|
||||
<section id="section-intro">
|
||||
<p>Converters for operators necessary for supporting scikit-learn Pipelines.</p>
|
||||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/sklearn/pipeline.py#L0-L101" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python"># -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project root for
|
||||
# license information.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
"""
|
||||
Converters for operators necessary for supporting scikit-learn Pipelines.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from onnxconverter_common.registration import register_converter
|
||||
import torch
|
||||
|
||||
from .. import constants
|
||||
from .._array_feature_extractor_implementations import ArrayFeatureExtractor
|
||||
from .._base_operator import BaseOperator
|
||||
|
||||
|
||||
class Concat(BaseOperator, torch.nn.Module):
|
||||
"""
|
||||
Module used to concatenate tensors into a single tensor.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(Concat, self).__init__()
|
||||
|
||||
def forward(self, *x):
|
||||
return torch.cat(x, dim=1)
|
||||
|
||||
|
||||
class Multiply(BaseOperator, torch.nn.Module):
|
||||
"""
|
||||
Module used to multiply features in a pipeline by a score.
|
||||
"""
|
||||
|
||||
def __init__(self, score):
|
||||
super(Multiply, self).__init__()
|
||||
|
||||
self.score = score
|
||||
|
||||
def forward(self, x):
|
||||
return x * self.score
|
||||
|
||||
|
||||
def convert_sklearn_array_feature_extractor(operator, device, extra_config):
|
||||
"""
|
||||
Converter for ArrayFeatureExtractor.
|
||||
|
||||
Args:
|
||||
operator: An operator wrapping a ArrayFeatureExtractor operator
|
||||
device: String defining the type of device the converted operator should be run on
|
||||
extra_config: Extra configuration used to select the best conversion strategy
|
||||
|
||||
Returns:
|
||||
A PyTorch model
|
||||
"""
|
||||
assert operator is not None
|
||||
|
||||
indices = operator.column_indices
|
||||
return ArrayFeatureExtractor(np.ascontiguousarray(indices), device)
|
||||
|
||||
|
||||
def convert_sklearn_concat(operator, device=None, extra_config={}):
|
||||
"""
|
||||
Converter for concat operators injected when parsing Sklearn pipelines.
|
||||
|
||||
Args:
|
||||
operator: An empty operator
|
||||
device: String defining the type of device the converted operator should be run on
|
||||
extra_config: Extra configuration used to select the best conversion strategy
|
||||
|
||||
Returns:
|
||||
A PyTorch model
|
||||
"""
|
||||
return Concat()
|
||||
|
||||
|
||||
def convert_sklearn_multiply(operator, device=None, extra_config={}):
|
||||
"""
|
||||
Converter for multiply operators injected when parsing Sklearn pipelines.
|
||||
|
||||
Args:
|
||||
operator: An empty operator
|
||||
device: String defining the type of device the converted operator should be run on
|
||||
extra_config: Extra configuration used to select the best conversion strategy
|
||||
|
||||
Returns:
|
||||
A PyTorch model
|
||||
"""
|
||||
assert operator is not None
|
||||
assert hasattr(operator, "operand")
|
||||
|
||||
score = operator.operand
|
||||
|
||||
# Generate the model.
|
||||
return Multiply(score)
|
||||
|
||||
|
||||
register_converter("SklearnArrayFeatureExtractor", convert_sklearn_array_feature_extractor)
|
||||
register_converter("SklearnConcat", convert_sklearn_concat)
|
||||
register_converter("SklearnMultiply", convert_sklearn_multiply)</code></pre>
|
||||
</details>
|
||||
</section>
|
||||
<section>
|
||||
</section>
|
||||
<section>
|
||||
</section>
|
||||
<section>
|
||||
<h2 class="section-title" id="header-functions">Functions</h2>
|
||||
<dl>
|
||||
<dt id="hummingbird.ml.operator_converters.sklearn.pipeline.convert_sklearn_array_feature_extractor"><code class="name flex">
|
||||
<span>def <span class="ident">convert_sklearn_array_feature_extractor</span></span>(<span>operator, device, extra_config)</span>
|
||||
</code></dt>
|
||||
<dd>
|
||||
<div class="desc"><p>Converter for ArrayFeatureExtractor.</p>
|
||||
<h2 id="args">Args</h2>
|
||||
<dl>
|
||||
<dt><strong><code>operator</code></strong></dt>
|
||||
<dd>An operator wrapping a ArrayFeatureExtractor operator</dd>
|
||||
<dt><strong><code>device</code></strong></dt>
|
||||
<dd>String defining the type of device the converted operator should be run on</dd>
|
||||
<dt><strong><code>extra_config</code></strong></dt>
|
||||
<dd>Extra configuration used to select the best conversion strategy</dd>
|
||||
</dl>
|
||||
<h2 id="returns">Returns</h2>
|
||||
<dl>
|
||||
<dt><code>A PyTorch model</code></dt>
|
||||
<dd> </dd>
|
||||
</dl></div>
|
||||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/sklearn/pipeline.py#L46-L61" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">def convert_sklearn_array_feature_extractor(operator, device, extra_config):
|
||||
"""
|
||||
Converter for ArrayFeatureExtractor.
|
||||
|
||||
Args:
|
||||
operator: An operator wrapping a ArrayFeatureExtractor operator
|
||||
device: String defining the type of device the converted operator should be run on
|
||||
extra_config: Extra configuration used to select the best conversion strategy
|
||||
|
||||
Returns:
|
||||
A PyTorch model
|
||||
"""
|
||||
assert operator is not None
|
||||
|
||||
indices = operator.column_indices
|
||||
return ArrayFeatureExtractor(np.ascontiguousarray(indices), device)</code></pre>
|
||||
</details>
|
||||
</dd>
|
||||
<dt id="hummingbird.ml.operator_converters.sklearn.pipeline.convert_sklearn_concat"><code class="name flex">
|
||||
<span>def <span class="ident">convert_sklearn_concat</span></span>(<span>operator, device=None, extra_config={})</span>
|
||||
</code></dt>
|
||||
<dd>
|
||||
<div class="desc"><p>Converter for concat operators injected when parsing Sklearn pipelines.</p>
|
||||
<h2 id="args">Args</h2>
|
||||
<dl>
|
||||
<dt><strong><code>operator</code></strong></dt>
|
||||
<dd>An empty operator</dd>
|
||||
<dt><strong><code>device</code></strong></dt>
|
||||
<dd>String defining the type of device the converted operator should be run on</dd>
|
||||
<dt><strong><code>extra_config</code></strong></dt>
|
||||
<dd>Extra configuration used to select the best conversion strategy</dd>
|
||||
</dl>
|
||||
<h2 id="returns">Returns</h2>
|
||||
<dl>
|
||||
<dt><code>A PyTorch model</code></dt>
|
||||
<dd> </dd>
|
||||
</dl></div>
|
||||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/sklearn/pipeline.py#L64-L76" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">def convert_sklearn_concat(operator, device=None, extra_config={}):
|
||||
"""
|
||||
Converter for concat operators injected when parsing Sklearn pipelines.
|
||||
|
||||
Args:
|
||||
operator: An empty operator
|
||||
device: String defining the type of device the converted operator should be run on
|
||||
extra_config: Extra configuration used to select the best conversion strategy
|
||||
|
||||
Returns:
|
||||
A PyTorch model
|
||||
"""
|
||||
return Concat()</code></pre>
|
||||
</details>
|
||||
</dd>
|
||||
<dt id="hummingbird.ml.operator_converters.sklearn.pipeline.convert_sklearn_multiply"><code class="name flex">
|
||||
<span>def <span class="ident">convert_sklearn_multiply</span></span>(<span>operator, device=None, extra_config={})</span>
|
||||
</code></dt>
|
||||
<dd>
|
||||
<div class="desc"><p>Converter for multiply operators injected when parsing Sklearn pipelines.</p>
|
||||
<h2 id="args">Args</h2>
|
||||
<dl>
|
||||
<dt><strong><code>operator</code></strong></dt>
|
||||
<dd>An empty operator</dd>
|
||||
<dt><strong><code>device</code></strong></dt>
|
||||
<dd>String defining the type of device the converted operator should be run on</dd>
|
||||
<dt><strong><code>extra_config</code></strong></dt>
|
||||
<dd>Extra configuration used to select the best conversion strategy</dd>
|
||||
</dl>
|
||||
<h2 id="returns">Returns</h2>
|
||||
<dl>
|
||||
<dt><code>A PyTorch model</code></dt>
|
||||
<dd> </dd>
|
||||
</dl></div>
|
||||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/sklearn/pipeline.py#L79-L97" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">def convert_sklearn_multiply(operator, device=None, extra_config={}):
|
||||
"""
|
||||
Converter for multiply operators injected when parsing Sklearn pipelines.
|
||||
|
||||
Args:
|
||||
operator: An empty operator
|
||||
device: String defining the type of device the converted operator should be run on
|
||||
extra_config: Extra configuration used to select the best conversion strategy
|
||||
|
||||
Returns:
|
||||
A PyTorch model
|
||||
"""
|
||||
assert operator is not None
|
||||
assert hasattr(operator, "operand")
|
||||
|
||||
score = operator.operand
|
||||
|
||||
# Generate the model.
|
||||
return Multiply(score)</code></pre>
|
||||
</details>
|
||||
</dd>
|
||||
</dl>
|
||||
</section>
|
||||
<section>
|
||||
<h2 class="section-title" id="header-classes">Classes</h2>
|
||||
<dl>
|
||||
<dt id="hummingbird.ml.operator_converters.sklearn.pipeline.Concat"><code class="flex name class">
|
||||
<span>class <span class="ident">Concat</span></span>
|
||||
</code></dt>
|
||||
<dd>
|
||||
<div class="desc"><p>Module used to concatenate tensors into a single tensor.</p></div>
|
||||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/sklearn/pipeline.py#L20-L29" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">class Concat(BaseOperator, torch.nn.Module):
|
||||
"""
|
||||
Module used to concatenate tensors into a single tensor.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(Concat, self).__init__()
|
||||
|
||||
def forward(self, *x):
|
||||
return torch.cat(x, dim=1)</code></pre>
|
||||
</details>
|
||||
<h3>Ancestors</h3>
|
||||
<ul class="hlist">
|
||||
<li>hummingbird.ml.operator_converters._base_operator.BaseOperator</li>
|
||||
<li>abc.ABC</li>
|
||||
<li>torch.nn.modules.module.Module</li>
|
||||
</ul>
|
||||
<h3>Methods</h3>
|
||||
<dl>
|
||||
<dt id="hummingbird.ml.operator_converters.sklearn.pipeline.Concat.forward"><code class="name flex">
|
||||
<span>def <span class="ident">forward</span></span>(<span>self, *x) -> Callable[..., Any]</span>
|
||||
</code></dt>
|
||||
<dd>
|
||||
<div class="desc"></div>
|
||||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/sklearn/pipeline.py#L28-L29" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">def forward(self, *x):
|
||||
return torch.cat(x, dim=1)</code></pre>
|
||||
</details>
|
||||
</dd>
|
||||
</dl>
|
||||
</dd>
|
||||
<dt id="hummingbird.ml.operator_converters.sklearn.pipeline.Multiply"><code class="flex name class">
|
||||
<span>class <span class="ident">Multiply</span></span>
|
||||
<span>(</span><span>score)</span>
|
||||
</code></dt>
|
||||
<dd>
|
||||
<div class="desc"><p>Module used to multiply features in a pipeline by a score.</p></div>
|
||||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/sklearn/pipeline.py#L32-L43" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">class Multiply(BaseOperator, torch.nn.Module):
|
||||
"""
|
||||
Module used to multiply features in a pipeline by a score.
|
||||
"""
|
||||
|
||||
def __init__(self, score):
|
||||
super(Multiply, self).__init__()
|
||||
|
||||
self.score = score
|
||||
|
||||
def forward(self, x):
|
||||
return x * self.score</code></pre>
|
||||
</details>
|
||||
<h3>Ancestors</h3>
|
||||
<ul class="hlist">
|
||||
<li>hummingbird.ml.operator_converters._base_operator.BaseOperator</li>
|
||||
<li>abc.ABC</li>
|
||||
<li>torch.nn.modules.module.Module</li>
|
||||
</ul>
|
||||
<h3>Methods</h3>
|
||||
<dl>
|
||||
<dt id="hummingbird.ml.operator_converters.sklearn.pipeline.Multiply.forward"><code class="name flex">
|
||||
<span>def <span class="ident">forward</span></span>(<span>self, x) -> Callable[..., Any]</span>
|
||||
</code></dt>
|
||||
<dd>
|
||||
<div class="desc"></div>
|
||||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/operator_converters/sklearn/pipeline.py#L42-L43" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">def forward(self, x):
|
||||
return x * self.score</code></pre>
|
||||
</details>
|
||||
</dd>
|
||||
</dl>
|
||||
</dd>
|
||||
</dl>
|
||||
</section>
|
||||
</article>
|
||||
<nav id="sidebar">
|
||||
<header>
|
||||
<a class="homelink" rel="home" title="Hummingbird Home" href="https://github.com/microsoft/hummingbird"> Hummingbird
|
||||
</a>
|
||||
</header>
|
||||
<h1>Index</h1>
|
||||
<div class="toc">
|
||||
<ul></ul>
|
||||
</div>
|
||||
<ul id="index">
|
||||
<li><h3>Super-module</h3>
|
||||
<ul>
|
||||
<li><code><a title="hummingbird.ml.operator_converters.sklearn" href="index.html">hummingbird.ml.operator_converters.sklearn</a></code></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><h3><a href="#header-functions">Functions</a></h3>
|
||||
<ul class="">
|
||||
<li><code><a title="hummingbird.ml.operator_converters.sklearn.pipeline.convert_sklearn_array_feature_extractor" href="#hummingbird.ml.operator_converters.sklearn.pipeline.convert_sklearn_array_feature_extractor">convert_sklearn_array_feature_extractor</a></code></li>
|
||||
<li><code><a title="hummingbird.ml.operator_converters.sklearn.pipeline.convert_sklearn_concat" href="#hummingbird.ml.operator_converters.sklearn.pipeline.convert_sklearn_concat">convert_sklearn_concat</a></code></li>
|
||||
<li><code><a title="hummingbird.ml.operator_converters.sklearn.pipeline.convert_sklearn_multiply" href="#hummingbird.ml.operator_converters.sklearn.pipeline.convert_sklearn_multiply">convert_sklearn_multiply</a></code></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><h3><a href="#header-classes">Classes</a></h3>
|
||||
<ul>
|
||||
<li>
|
||||
<h4><code><a title="hummingbird.ml.operator_converters.sklearn.pipeline.Concat" href="#hummingbird.ml.operator_converters.sklearn.pipeline.Concat">Concat</a></code></h4>
|
||||
<ul class="">
|
||||
<li><code><a title="hummingbird.ml.operator_converters.sklearn.pipeline.Concat.forward" href="#hummingbird.ml.operator_converters.sklearn.pipeline.Concat.forward">forward</a></code></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<h4><code><a title="hummingbird.ml.operator_converters.sklearn.pipeline.Multiply" href="#hummingbird.ml.operator_converters.sklearn.pipeline.Multiply">Multiply</a></code></h4>
|
||||
<ul class="">
|
||||
<li><code><a title="hummingbird.ml.operator_converters.sklearn.pipeline.Multiply.forward" href="#hummingbird.ml.operator_converters.sklearn.pipeline.Multiply.forward">forward</a></code></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</nav>
|
||||
</main>
|
||||
<footer id="footer">
|
||||
<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.1</a>.</p>
|
||||
</footer>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
|
||||
<script>hljs.initHighlightingOnLoad()</script>
|
||||
</body>
|
||||
</html>
|
|
@ -303,18 +303,10 @@ register_converter("SklearnNuSVC", convert_sklearn_svc_model)</code></pr
|
|||
<h3>Methods</h3>
|
||||
<dl>
|
||||
<dt id="hummingbird.ml.operator_converters.sklearn.sv.SVC.forward"><code class="name flex">
|
||||
<span>def <span class="ident">forward</span></span>(<span>self, x)</span>
|
||||
<span>def <span class="ident">forward</span></span>(<span>self, x) -> Callable[..., Any]</span>
|
||||
</code></dt>
|
||||
<dd>
|
||||
<div class="desc"><p>Defines the computation performed at every call.</p>
|
||||
<p>Should be overridden by all subclasses.</p>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>Although the recipe for forward pass needs to be defined within
|
||||
this function, one should call the :class:<code>Module</code> instance afterwards
|
||||
instead of this since the former takes care of running the
|
||||
registered hooks while the latter silently ignores them.</p>
|
||||
</div></div>
|
||||
<div class="desc"></div>
|
||||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
|
|
|
@ -64,7 +64,7 @@ XGBRegressor</p>
|
|||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/supported.py#L0-L295" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/supported.py#L0-L305" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python"># -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
|
@ -288,7 +288,17 @@ def _build_sklearn_api_operator_name_map():
|
|||
Associate Sklearn with the operator class names.
|
||||
If two scikit-learn (API) models share a single name, it means they are equivalent in terms of conversion.
|
||||
"""
|
||||
return {k: "Sklearn" + k.__name__ for k in sklearn_operator_list + xgb_operator_list + lgbm_operator_list}
|
||||
# Pipeline ops. These are ops injected by the parser not "real" sklearn operators.
|
||||
pipeline_operator_list = [
|
||||
"ArrayFeatureExtractor",
|
||||
"Concat",
|
||||
"Multiply",
|
||||
]
|
||||
|
||||
return {
|
||||
k: "Sklearn" + k.__name__ if hasattr(k, "__name__") else k
|
||||
for k in sklearn_operator_list + pipeline_operator_list + xgb_operator_list + lgbm_operator_list
|
||||
}
|
||||
|
||||
|
||||
def _build_onnxml_api_operator_name_map():
|
||||
|
@ -417,7 +427,7 @@ CONTAINER = "container"
|
|||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/supported.py#L250-L263" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/supported.py#L260-L273" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">def get_onnxml_api_operator_name(model_type):
|
||||
"""
|
||||
|
@ -454,7 +464,7 @@ or an object with scikit-learn API (e.g., LightGBM)</dd>
|
|||
<details class="source">
|
||||
<summary>
|
||||
<span>Expand source code</span>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/supported.py#L234-L247" class="git-link">Browse git</a>
|
||||
<a href="https://github.com/microsoft/hummingbird/blob/master/hummingbird/ml/supported.py#L244-L257" class="git-link">Browse git</a>
|
||||
</summary>
|
||||
<pre><code class="python">def get_sklearn_api_operator_name(model_type):
|
||||
"""
|
||||
|
|
|
@ -52,24 +52,54 @@ class PyTorchBackendModel(torch.nn.Module):
|
|||
extra_config: Some additional custom configuration parameter
|
||||
"""
|
||||
super(PyTorchBackendModel, self).__init__()
|
||||
self._input_names = input_names
|
||||
self._output_names = output_names
|
||||
|
||||
# Define input \ output names.
|
||||
# This is required because the internal variable names may differ from the original (raw) one.
|
||||
# This may happen, for instance, because we force our internal naming to be unique.
|
||||
def _fix_var_naming(operators, names, mod="input"):
|
||||
new_names = []
|
||||
map = {}
|
||||
|
||||
for op in operators:
|
||||
if mod == "input":
|
||||
iter = op.inputs
|
||||
else:
|
||||
iter = op.outputs
|
||||
for i in iter:
|
||||
for name in names:
|
||||
if i.raw_name == name and name not in map:
|
||||
map[i.raw_name] = i.full_name
|
||||
if len(map) == len(names):
|
||||
break
|
||||
for name in names:
|
||||
new_names.append(map[name])
|
||||
return new_names
|
||||
|
||||
self._input_names = _fix_var_naming(operators, input_names)
|
||||
self._output_names = _fix_var_naming(reversed(operators), output_names, "output")
|
||||
self._operator_map = torch.nn.ModuleDict(operator_map)
|
||||
self._operators = operators
|
||||
|
||||
def forward(self, *inputs):
|
||||
with torch.no_grad():
|
||||
assert len(self._input_names) == len(inputs)
|
||||
|
||||
inputs = [*inputs]
|
||||
variable_map = {}
|
||||
device = _get_device(self)
|
||||
|
||||
# Maps data inputs to the expected variables.
|
||||
for i, input_name in enumerate(self._input_names):
|
||||
if type(inputs[i]) is list:
|
||||
inputs[i] = np.array(inputs[i])
|
||||
if type(inputs[i]) is np.ndarray:
|
||||
inputs[i] = torch.from_numpy(inputs[i]).float()
|
||||
inputs[i] = torch.from_numpy(inputs[i])
|
||||
if inputs[i].dtype == torch.float64:
|
||||
# We convert double precision arrays into single precision. Sklearn does the same.
|
||||
inputs[i] = inputs[i].float()
|
||||
elif type(inputs[i]) is not torch.Tensor:
|
||||
raise RuntimeError("Inputer tensor {} of not supported type {}".format(input_name, type(inputs[i])))
|
||||
if device != "cpu":
|
||||
if device is not None and device.type != "cpu":
|
||||
inputs[i] = inputs[i].to(device)
|
||||
variable_map[input_name] = inputs[i]
|
||||
|
||||
|
@ -312,16 +342,19 @@ class ONNXSklearnContainer(ABC):
|
|||
def model(self):
|
||||
return self._model
|
||||
|
||||
def _get_named_inputs(self, *inputs):
|
||||
def _get_named_inputs(self, inputs):
|
||||
"""
|
||||
Retrieve the inputs names from the session object.
|
||||
"""
|
||||
if len(inputs) < len(self.input_names):
|
||||
inputs = inputs[0]
|
||||
|
||||
assert len(inputs) == len(self.input_names)
|
||||
|
||||
named_inputs = {}
|
||||
|
||||
for i in range(len(inputs)):
|
||||
named_inputs[self.input_names[i]] = inputs[i]
|
||||
named_inputs[self.input_names[i]] = np.array(inputs[i])
|
||||
|
||||
return named_inputs
|
||||
|
||||
|
@ -341,7 +374,7 @@ class ONNXSklearnContainerTransformer(ONNXSklearnContainer):
|
|||
Utility functions used to emulate the behavior of the Sklearn API.
|
||||
On data transformers it returns transformed output data
|
||||
"""
|
||||
named_inputs = self._get_named_inputs(*inputs)
|
||||
named_inputs = self._get_named_inputs(inputs)
|
||||
|
||||
return self._session.run(self._output_names, named_inputs)
|
||||
|
||||
|
@ -368,7 +401,7 @@ class ONNXSklearnContainerRegression(ONNXSklearnContainer):
|
|||
On classification tasks returns the predicted class labels for the input data.
|
||||
On anomaly detection (e.g. isolation forest) returns the predicted classes (-1 or 1).
|
||||
"""
|
||||
named_inputs = self._get_named_inputs(*inputs)
|
||||
named_inputs = self._get_named_inputs(inputs)
|
||||
|
||||
if self._is_regression:
|
||||
return self._session.run(self._output_names, named_inputs)
|
||||
|
@ -393,7 +426,7 @@ class ONNXSklearnContainerClassification(ONNXSklearnContainerRegression):
|
|||
Utility functions used to emulate the behavior of the Sklearn API.
|
||||
On classification tasks returns the probability estimates.
|
||||
"""
|
||||
named_inputs = self._get_named_inputs(*inputs)
|
||||
named_inputs = self._get_named_inputs(inputs)
|
||||
|
||||
return self._session.run([self._output_names[1]], named_inputs)[0]
|
||||
|
||||
|
@ -415,7 +448,7 @@ class ONNXSklearnContainerAnomalyDetection(ONNXSklearnContainerRegression):
|
|||
Utility functions used to emulate the behavior of the Sklearn API.
|
||||
On anomaly detection (e.g. isolation forest) returns the decision function scores.
|
||||
"""
|
||||
named_inputs = self._get_named_inputs(*inputs)
|
||||
named_inputs = self._get_named_inputs(inputs)
|
||||
|
||||
return np.array(self._session.run([self._output_names[1]], named_inputs)[0]).flatten()
|
||||
|
||||
|
|
|
@ -6,13 +6,18 @@
|
|||
|
||||
"""
|
||||
All functions used for parsing input models are listed here.
|
||||
Some code here have been copied from https://github.com/onnx/sklearn-onnx/.
|
||||
"""
|
||||
from collections import OrderedDict
|
||||
from copy import deepcopy
|
||||
from uuid import uuid4
|
||||
|
||||
from onnxconverter_common.container import CommonSklearnModelContainer
|
||||
from onnxconverter_common.optimizer import LinkedNode, _topological_sort
|
||||
from onnxconverter_common.topology import Topology
|
||||
from sklearn import pipeline
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.preprocessing import OneHotEncoder
|
||||
|
||||
from . import constants
|
||||
from ._container import CommonONNXModelContainer
|
||||
|
@ -162,33 +167,130 @@ def _parse_sklearn_single_model(scope, model, inputs):
|
|||
|
||||
def _parse_sklearn_pipeline(scope, model, inputs):
|
||||
"""
|
||||
The basic ideas of scikit-learn pipeline parsing:
|
||||
The basic ideas of scikit-learn parsing:
|
||||
1. Sequentially go though all stages defined in the considered
|
||||
scikit-learn pipeline
|
||||
2. The output `onnxconverter_common.topology.Variable`s of one stage will be fed into its next
|
||||
2. The output variables of one stage will be fed into its next
|
||||
stage as the inputs.
|
||||
|
||||
Args:
|
||||
scope: The ``onnxconverter_common.topology.Scope`` for the model
|
||||
model: A `sklearn.pipeline.Pipeline` object
|
||||
inputs: A list of `onnxconverter_common.topology.Variable` objects
|
||||
|
||||
Returns:
|
||||
A list of output `onnxconverter_common.topology.Variable`s produced by the input pipeline
|
||||
:param scope: Scope object defined in _topology.py
|
||||
:param model: scikit-learn pipeline object
|
||||
:param inputs: A list of Variable objects
|
||||
:return: A list of output variables produced by the input pipeline
|
||||
"""
|
||||
for step in model.steps:
|
||||
inputs = _parse_sklearn_api(scope, step[1], inputs)
|
||||
return inputs
|
||||
|
||||
|
||||
def _build_sklearn_api_parsers_map():
|
||||
from sklearn import pipeline
|
||||
def _parse_sklearn_feature_union(scope, model, inputs):
|
||||
"""
|
||||
Taken from https://github.com/onnx/sklearn-onnx/blob/9939c089a467676f4ffe9f3cb91098c4841f89d8/skl2onnx/_parse.py#L199.
|
||||
:param scope: Scope object
|
||||
:param model: A scikit-learn FeatureUnion object
|
||||
:param inputs: A list of Variable objects
|
||||
:return: A list of output variables produced by feature union
|
||||
"""
|
||||
# Output variable name of each transform. It's a list of string.
|
||||
transformed_result_names = []
|
||||
# Encode each transform as our IR object
|
||||
for name, transform in model.transformer_list:
|
||||
transformed_result_names.append(_parse_sklearn_single_model(scope, transform, inputs)[0])
|
||||
if model.transformer_weights is not None and name in model.transformer_weights:
|
||||
transform_result = [transformed_result_names.pop()]
|
||||
# Create a Multiply node
|
||||
multiply_operator = scope.declare_local_operator("SklearnMultiply")
|
||||
multiply_operator.inputs = transform_result
|
||||
multiply_operator.operand = model.transformer_weights[name]
|
||||
multiply_output = scope.declare_local_variable("multiply_output")
|
||||
multiply_operator.outputs.append(multiply_output)
|
||||
transformed_result_names.append(multiply_operator.outputs[0])
|
||||
|
||||
# Create a Concat operator
|
||||
concat_operator = scope.declare_local_operator("SklearnConcat")
|
||||
concat_operator.inputs = transformed_result_names
|
||||
|
||||
# Declare output name of scikit-learn FeatureUnion
|
||||
union_name = scope.declare_local_variable("union")
|
||||
concat_operator.outputs.append(union_name)
|
||||
|
||||
return concat_operator.outputs
|
||||
|
||||
|
||||
def _parse_sklearn_column_transformer(scope, model, inputs):
|
||||
"""
|
||||
Taken from https://github.com/onnx/sklearn-onnx/blob/9939c089a467676f4ffe9f3cb91098c4841f89d8/skl2onnx/_parse.py#L238.
|
||||
:param scope: Scope object
|
||||
:param model: A *scikit-learn* *ColumnTransformer* object
|
||||
:param inputs: A list of Variable objects
|
||||
:return: A list of output variables produced by column transformer
|
||||
"""
|
||||
assert (
|
||||
len(inputs) < 2
|
||||
), "Hummingbird currently supports ColumnTransformer over single inputs. Please fill an issue at https://github.com/microsoft/hummingbird."
|
||||
# Output variable name of each transform. It's a list of string.
|
||||
transformed_result_names = []
|
||||
# Encode each transform as our IR object
|
||||
for name, op, column_indices in model.transformers_:
|
||||
if op == "drop":
|
||||
continue
|
||||
if isinstance(column_indices, slice):
|
||||
column_indices = list(
|
||||
range(
|
||||
column_indices.start if column_indices.start is not None else 0,
|
||||
column_indices.stop,
|
||||
column_indices.step if column_indices.step is not None else 1,
|
||||
)
|
||||
)
|
||||
elif isinstance(column_indices, (int, str)):
|
||||
column_indices = [column_indices]
|
||||
pt_var, pt_is = _get_column_indices(column_indices, inputs)
|
||||
transform_inputs = []
|
||||
tr_inputs = _fetch_input_slice(scope, [inputs[pt_var]], pt_is)
|
||||
transform_inputs.extend(tr_inputs)
|
||||
|
||||
model_obj = model.named_transformers_[name]
|
||||
if isinstance(model_obj, str):
|
||||
if model_obj == "passthrough":
|
||||
var_out = transform_inputs[0]
|
||||
elif model_obj == "drop":
|
||||
var_out = None
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"Unknown operator alias " "'{0}'. These are specified in " "supported.py." "".format(model_obj)
|
||||
)
|
||||
else:
|
||||
var_out = _parse_sklearn_api(scope, model_obj, transform_inputs)[0]
|
||||
if model.transformer_weights is not None and name in model.transformer_weights:
|
||||
# Create a Multiply node
|
||||
multiply_operator = scope.declare_local_operator("SklearnMultiply")
|
||||
multiply_operator.inputs.append(var_out)
|
||||
multiply_operator.operand = model.transformer_weights[name]
|
||||
var_out = scope.declare_local_variable("multiply_output")
|
||||
multiply_operator.outputs.append(var_out)
|
||||
if var_out:
|
||||
transformed_result_names.append(var_out)
|
||||
|
||||
# Create a Concat node
|
||||
if len(transformed_result_names) > 1:
|
||||
concat_operator = scope.declare_local_operator("SklearnConcat")
|
||||
concat_operator.inputs = transformed_result_names
|
||||
|
||||
# Declare output name of scikit-learn ColumnTransformer
|
||||
transformed_column_name = scope.declare_local_variable("transformed_column")
|
||||
concat_operator.outputs.append(transformed_column_name)
|
||||
return concat_operator.outputs
|
||||
return transformed_result_names
|
||||
|
||||
|
||||
def _build_sklearn_api_parsers_map():
|
||||
# Parsers for edge cases are going here.
|
||||
map_parser = {
|
||||
pipeline.Pipeline: _parse_sklearn_pipeline
|
||||
# More will go here as added.
|
||||
ColumnTransformer: _parse_sklearn_column_transformer,
|
||||
pipeline.Pipeline: _parse_sklearn_pipeline,
|
||||
pipeline.FeatureUnion: _parse_sklearn_feature_union,
|
||||
# More parsers will go here
|
||||
}
|
||||
|
||||
return map_parser
|
||||
|
||||
|
||||
|
@ -236,9 +338,7 @@ def _parse_onnx_single_operator(scope, operator):
|
|||
this_operator = scope.declare_local_operator(alias, operator)
|
||||
|
||||
# Register the operator's inputs.
|
||||
# LinkedNode uses dictionaries and with Python 3.5 the order is not deterministic.
|
||||
input_names = list(operator.input.keys())
|
||||
input_names.sort()
|
||||
input_names = list(operator.origin.input)
|
||||
this_operator.inputs = [scope.variables[in_] for in_ in input_names if in_ in scope.variables]
|
||||
|
||||
# Register the operator's outpurs.
|
||||
|
@ -283,6 +383,81 @@ def _remove_zipmap(node_list):
|
|||
return output_node_list
|
||||
|
||||
|
||||
def _fetch_input_slice(scope, inputs, column_indices):
|
||||
"""
|
||||
Taken from https://github.com/onnx/sklearn-onnx/blob/9939c089a467676f4ffe9f3cb91098c4841f89d8/skl2onnx/_parse.py#L53.
|
||||
"""
|
||||
if not isinstance(inputs, list):
|
||||
raise TypeError("Parameter inputs must be a list.")
|
||||
if len(inputs) == 0:
|
||||
raise RuntimeError("Operator ArrayFeatureExtractor requires at least one inputs.")
|
||||
if len(inputs) != 1:
|
||||
raise RuntimeError("Operator ArrayFeatureExtractor does not support multiple input tensors.")
|
||||
|
||||
array_feature_extractor_operator = scope.declare_local_operator("SklearnArrayFeatureExtractor")
|
||||
array_feature_extractor_operator.inputs = inputs
|
||||
array_feature_extractor_operator.column_indices = column_indices
|
||||
output_variable_name = scope.declare_local_variable("extracted_feature_columns", inputs[0].type)
|
||||
array_feature_extractor_operator.outputs.append(output_variable_name)
|
||||
return array_feature_extractor_operator.outputs
|
||||
|
||||
|
||||
def _get_column_index(i, inputs):
|
||||
"""
|
||||
Taken from https://github.com/onnx/sklearn-onnx/blob/9939c089a467676f4ffe9f3cb91098c4841f89d8/skl2onnx/common/utils.py#L50.
|
||||
Returns a tuples (variable index, column index in that variable).
|
||||
The function has two different behaviours, one when *i* (column index)
|
||||
is an integer, another one when *i* is a string (column name).
|
||||
If *i* is a string, the function looks for input name with this name and returns (index, 0).
|
||||
If *i* is an integer, let's assume first we have two inputs
|
||||
*I0 = FloatTensorType([None, 2])* and *I1 = FloatTensorType([None, 3])*,
|
||||
in this case, here are the results:
|
||||
::
|
||||
get_column_index(0, inputs) -> (0, 0)
|
||||
get_column_index(1, inputs) -> (0, 1)
|
||||
get_column_index(2, inputs) -> (1, 0)
|
||||
get_column_index(3, inputs) -> (1, 1)
|
||||
get_column_index(4, inputs) -> (1, 2)
|
||||
"""
|
||||
if isinstance(i, int):
|
||||
if i == 0:
|
||||
# Useful shortcut, skips the case when end is None
|
||||
# (unknown dimension)
|
||||
return 0, 0
|
||||
vi = 0
|
||||
return (vi, i)
|
||||
else:
|
||||
raise RuntimeError("Hummingbird currently support only int columns, {} is not supported.".format(i))
|
||||
|
||||
|
||||
def _get_column_indices(indices, inputs):
|
||||
"""
|
||||
Taken from https://github.com/onnx/sklearn-onnx/blob/9939c089a467676f4ffe9f3cb91098c4841f89d8/skl2onnx/common/utils.py#L105.
|
||||
Returns the requested graph inpudes based on their indices or names. See `_parse._get_column_index`.
|
||||
Args:
|
||||
indices: variables indices or names
|
||||
inputs: model inputs
|
||||
|
||||
Returns:
|
||||
a tuple *(variable name, list of requested indices)* if *multiple* is False, a dictionary *{ var_index: [ list of
|
||||
requested indices ] }* if *multiple* is True
|
||||
"""
|
||||
pt_var = None
|
||||
pt_is = []
|
||||
for p in indices:
|
||||
pt_v, pt_i = _get_column_index(p, inputs)
|
||||
pt_is.append(pt_i)
|
||||
if pt_var is None:
|
||||
pt_var = pt_v
|
||||
elif pt_var != pt_v:
|
||||
raise NotImplementedError(
|
||||
"Hummingbird is not able to merge multiple columns from "
|
||||
"multiple variables ({0}). You should think about merging "
|
||||
"initial types.".format([pt_var, pt_v])
|
||||
)
|
||||
return pt_var, pt_is
|
||||
|
||||
|
||||
# Registered API parsers.
|
||||
if sklearn_installed():
|
||||
sklearn_api_parsers_map = _build_sklearn_api_parsers_map()
|
||||
|
|
|
@ -93,10 +93,17 @@ def convert(topology, backend, device, extra_config={}):
|
|||
if output_model_name is None:
|
||||
output_model_name = str(uuid4().hex) + ".onnx"
|
||||
|
||||
# Put the tracing test input into the right format.
|
||||
trace_input = extra_config[constants.TEST_INPUT]
|
||||
if type(trace_input) is tuple:
|
||||
trace_input = tuple([torch.from_numpy(i) for i in trace_input])
|
||||
else:
|
||||
trace_input = torch.from_numpy(trace_input)
|
||||
|
||||
# Generate the ONNX models
|
||||
torch.onnx.export(
|
||||
torch_model,
|
||||
torch.from_numpy(extra_config[constants.TEST_INPUT]),
|
||||
trace_input,
|
||||
output_model_name,
|
||||
input_names=topology.raw_model.input_names,
|
||||
output_names=topology.raw_model.output_names,
|
||||
|
|
|
@ -100,6 +100,17 @@ def xgboost_installed():
|
|||
return True
|
||||
|
||||
|
||||
def pandas_installed():
|
||||
"""
|
||||
Checks that *Pandas* is available.
|
||||
"""
|
||||
try:
|
||||
import pandas
|
||||
except ImportError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class _Constants(object):
|
||||
"""
|
||||
Class enabling the proper definition of constants.
|
||||
|
|
|
@ -163,9 +163,9 @@ def _convert_onnxml(model, backend, test_input, device, extra_config={}):
|
|||
type(initial_types[0][1])
|
||||
)
|
||||
)
|
||||
else:
|
||||
extra_config[constants.N_FEATURES] = np.array(test_input).shape[1]
|
||||
extra_config[constants.TEST_INPUT] = test_input
|
||||
extra_config[constants.TEST_INPUT] = test_input
|
||||
elif constants.N_FEATURES not in extra_config:
|
||||
extra_config[constants.N_FEATURES] = test_input.shape[1]
|
||||
|
||||
# Set the initializers. Some converter requires the access to initializers.
|
||||
initializers = {} if model.graph.initializer is None else {in_.name: in_ for in_ in model.graph.initializer}
|
||||
|
@ -219,6 +219,14 @@ def convert(model, backend, test_input=None, device="cpu", extra_config={}):
|
|||
if test_input is not None and constants.TEST_INPUT not in extra_config:
|
||||
extra_config[constants.TEST_INPUT] = test_input
|
||||
|
||||
# Fix the test_input type
|
||||
if constants.TEST_INPUT in extra_config:
|
||||
if type(extra_config[constants.TEST_INPUT]) == list:
|
||||
extra_config[constants.TEST_INPUT] = np.array(extra_config[constants.TEST_INPUT])
|
||||
elif type(extra_config[constants.TEST_INPUT]) == tuple:
|
||||
extra_config[constants.N_FEATURES] = len(extra_config[constants.TEST_INPUT])
|
||||
test_input = extra_config[constants.TEST_INPUT]
|
||||
|
||||
# We do some normalization on backends.
|
||||
backend = backend.lower()
|
||||
backend = backends[backend]
|
||||
|
|
|
@ -32,6 +32,7 @@ from .sklearn import iforest # noqa: E402
|
|||
from .sklearn import linear as sklearn_linear # noqa: E402
|
||||
from .sklearn import normalizer as sklearn_normalizer # noqa: E402
|
||||
from .sklearn import one_hot_encoder as sklearn_ohe # noqa: E402
|
||||
from .sklearn import pipeline # noqa: E402
|
||||
from .sklearn import scaler as sklearn_scaler # noqa: E402
|
||||
from .sklearn import sv # noqa: E402
|
||||
from . import lightgbm # noqa: E402
|
||||
|
|
|
@ -25,6 +25,8 @@ class Cast(BaseOperator, torch.nn.Module):
|
|||
self.to_type = to_type
|
||||
|
||||
def forward(self, x):
|
||||
if self.to_type == 1: # Cast to float
|
||||
return x.float()
|
||||
if self.to_type == 7: # Cast to long
|
||||
return x.long()
|
||||
|
||||
|
@ -34,7 +36,10 @@ class Concat(BaseOperator, torch.nn.Module):
|
|||
super(Concat, self).__init__()
|
||||
|
||||
def forward(self, *x):
|
||||
return torch.cat(x, dim=1)
|
||||
if len(x[0].shape) > 1:
|
||||
return torch.cat(x, dim=1)
|
||||
else:
|
||||
return torch.stack(x, dim=1)
|
||||
|
||||
|
||||
class Reshape(BaseOperator, torch.nn.Module):
|
||||
|
|
|
@ -30,7 +30,7 @@ def convert_sklearn_linear_model(operator, device, extra_config):
|
|||
"""
|
||||
classes = [0] if not hasattr(operator.raw_operator, "classes_") else operator.raw_operator.classes_
|
||||
|
||||
if not all([type(x) in [int, np.int32, np.int64] for x in classes]):
|
||||
if not all(["int" in str(type(x)) for x in classes]):
|
||||
raise RuntimeError(
|
||||
"Hummingbird currently supports only integer labels for class labels. Please file an issue at https://github.com/microsoft/hummingbird."
|
||||
)
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project root for
|
||||
# license information.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
"""
|
||||
Converters for operators necessary for supporting scikit-learn Pipelines.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from onnxconverter_common.registration import register_converter
|
||||
import torch
|
||||
|
||||
from .. import constants
|
||||
from .._array_feature_extractor_implementations import ArrayFeatureExtractor
|
||||
from .._base_operator import BaseOperator
|
||||
|
||||
|
||||
class Concat(BaseOperator, torch.nn.Module):
|
||||
"""
|
||||
Module used to concatenate tensors into a single tensor.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(Concat, self).__init__()
|
||||
|
||||
def forward(self, *x):
|
||||
return torch.cat(x, dim=1)
|
||||
|
||||
|
||||
class Multiply(BaseOperator, torch.nn.Module):
|
||||
"""
|
||||
Module used to multiply features in a pipeline by a score.
|
||||
"""
|
||||
|
||||
def __init__(self, score):
|
||||
super(Multiply, self).__init__()
|
||||
|
||||
self.score = score
|
||||
|
||||
def forward(self, x):
|
||||
return x * self.score
|
||||
|
||||
|
||||
def convert_sklearn_array_feature_extractor(operator, device, extra_config):
|
||||
"""
|
||||
Converter for ArrayFeatureExtractor.
|
||||
|
||||
Args:
|
||||
operator: An operator wrapping a ArrayFeatureExtractor operator
|
||||
device: String defining the type of device the converted operator should be run on
|
||||
extra_config: Extra configuration used to select the best conversion strategy
|
||||
|
||||
Returns:
|
||||
A PyTorch model
|
||||
"""
|
||||
assert operator is not None
|
||||
|
||||
indices = operator.column_indices
|
||||
return ArrayFeatureExtractor(np.ascontiguousarray(indices), device)
|
||||
|
||||
|
||||
def convert_sklearn_concat(operator, device=None, extra_config={}):
|
||||
"""
|
||||
Converter for concat operators injected when parsing Sklearn pipelines.
|
||||
|
||||
Args:
|
||||
operator: An empty operator
|
||||
device: String defining the type of device the converted operator should be run on
|
||||
extra_config: Extra configuration used to select the best conversion strategy
|
||||
|
||||
Returns:
|
||||
A PyTorch model
|
||||
"""
|
||||
return Concat()
|
||||
|
||||
|
||||
def convert_sklearn_multiply(operator, device=None, extra_config={}):
|
||||
"""
|
||||
Converter for multiply operators injected when parsing Sklearn pipelines.
|
||||
|
||||
Args:
|
||||
operator: An empty operator
|
||||
device: String defining the type of device the converted operator should be run on
|
||||
extra_config: Extra configuration used to select the best conversion strategy
|
||||
|
||||
Returns:
|
||||
A PyTorch model
|
||||
"""
|
||||
assert operator is not None
|
||||
assert hasattr(operator, "operand")
|
||||
|
||||
score = operator.operand
|
||||
|
||||
# Generate the model.
|
||||
return Multiply(score)
|
||||
|
||||
|
||||
register_converter("SklearnArrayFeatureExtractor", convert_sklearn_array_feature_extractor)
|
||||
register_converter("SklearnConcat", convert_sklearn_concat)
|
||||
register_converter("SklearnMultiply", convert_sklearn_multiply)
|
|
@ -220,7 +220,17 @@ def _build_sklearn_api_operator_name_map():
|
|||
Associate Sklearn with the operator class names.
|
||||
If two scikit-learn (API) models share a single name, it means they are equivalent in terms of conversion.
|
||||
"""
|
||||
return {k: "Sklearn" + k.__name__ for k in sklearn_operator_list + xgb_operator_list + lgbm_operator_list}
|
||||
# Pipeline ops. These are ops injected by the parser not "real" sklearn operators.
|
||||
pipeline_operator_list = [
|
||||
"ArrayFeatureExtractor",
|
||||
"Concat",
|
||||
"Multiply",
|
||||
]
|
||||
|
||||
return {
|
||||
k: "Sklearn" + k.__name__ if hasattr(k, "__name__") else k
|
||||
for k in sklearn_operator_list + pipeline_operator_list + xgb_operator_list + lgbm_operator_list
|
||||
}
|
||||
|
||||
|
||||
def _build_onnxml_api_operator_name_map():
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project root for
|
||||
# license information.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
import unittest
|
||||
from distutils.version import StrictVersion
|
||||
|
||||
import numpy as np
|
||||
from sklearn.datasets import load_digits, load_iris
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.pipeline import FeatureUnion
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||
|
||||
import hummingbird.ml
|
||||
|
||||
|
||||
class TestSklearnFeatureUnion(unittest.TestCase):
|
||||
def test_feature_union_default(self):
|
||||
data = load_iris()
|
||||
X, y = data.data, data.target
|
||||
X = X.astype(np.float32)
|
||||
X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42)
|
||||
model = FeatureUnion([("standard", StandardScaler()), ("minmax", MinMaxScaler())]).fit(X_train)
|
||||
|
||||
torch_model = hummingbird.ml.convert(model, "torch")
|
||||
|
||||
np.testing.assert_allclose(
|
||||
model.transform(X_test), torch_model.transform(X_test), rtol=1e-06, atol=1e-06,
|
||||
)
|
||||
|
||||
def test_feature_union_transformer_weights(self):
|
||||
data = load_iris()
|
||||
X, y = data.data, data.target
|
||||
X = X.astype(np.float32)
|
||||
X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42)
|
||||
model = FeatureUnion(
|
||||
[("standard", StandardScaler()), ("minmax", MinMaxScaler())], transformer_weights={"standard": 2, "minmax": 4}
|
||||
).fit(X_train)
|
||||
|
||||
torch_model = hummingbird.ml.convert(model, "torch")
|
||||
|
||||
np.testing.assert_allclose(
|
||||
model.transform(X_test), torch_model.transform(X_test), rtol=1e-06, atol=1e-06,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -0,0 +1,417 @@
|
|||
import unittest
|
||||
import numpy as np
|
||||
from sklearn import datasets
|
||||
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.decomposition import PCA
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.pipeline import Pipeline, FeatureUnion
|
||||
from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler
|
||||
|
||||
import hummingbird.ml
|
||||
from hummingbird.ml._utils import pandas_installed
|
||||
|
||||
if pandas_installed():
|
||||
import pandas
|
||||
|
||||
|
||||
class TestSklearnPipeline(unittest.TestCase):
|
||||
def test_pipeline(self):
|
||||
data = np.array([[0, 0], [0, 0], [1, 1], [1, 1]], dtype=np.float32)
|
||||
scaler = StandardScaler()
|
||||
scaler.fit(data)
|
||||
model = Pipeline([("scaler1", scaler), ("scaler2", scaler)])
|
||||
|
||||
torch_model = hummingbird.ml.convert(model, "torch")
|
||||
|
||||
self.assertTrue(torch_model is not None)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06,
|
||||
)
|
||||
|
||||
def test_pipeline2(self):
|
||||
data = np.array([[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]], dtype=np.float32)
|
||||
scaler = StandardScaler()
|
||||
scaler.fit(data)
|
||||
model = Pipeline([("scaler1", scaler), ("scaler2", scaler)])
|
||||
|
||||
torch_model = hummingbird.ml.convert(model, "torch")
|
||||
|
||||
self.assertTrue(torch_model is not None)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06,
|
||||
)
|
||||
|
||||
def test_combine_inputs_union_in_pipeline(self):
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.pipeline import Pipeline
|
||||
|
||||
data = np.array([[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]], dtype=np.float32)
|
||||
model = Pipeline(
|
||||
[
|
||||
("scaler1", StandardScaler()),
|
||||
("union", FeatureUnion([("scaler2", StandardScaler()), ("scaler3", MinMaxScaler())])),
|
||||
]
|
||||
)
|
||||
model.fit(data)
|
||||
|
||||
torch_model = hummingbird.ml.convert(model, "torch")
|
||||
|
||||
self.assertTrue(torch_model is not None)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06,
|
||||
)
|
||||
|
||||
def test_combine_inputs_floats_ints(self):
|
||||
data = [[0, 0.0], [0, 0.0], [1, 1.0], [1, 1.0]]
|
||||
scaler = StandardScaler()
|
||||
scaler.fit(data)
|
||||
model = Pipeline([("scaler1", scaler), ("scaler2", scaler)])
|
||||
|
||||
torch_model = hummingbird.ml.convert(model, "torch")
|
||||
|
||||
self.assertTrue(torch_model is not None)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06,
|
||||
)
|
||||
|
||||
@unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
|
||||
def test_pipeline_column_transformer_1(self):
|
||||
iris = datasets.load_iris()
|
||||
X = iris.data[:, :3]
|
||||
y = iris.target
|
||||
X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
|
||||
X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
|
||||
X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
|
||||
y_train = y % 2
|
||||
numeric_features = [0, 1, 2] # ["vA", "vB", "vC"]
|
||||
|
||||
classifier = LogisticRegression(
|
||||
C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3,
|
||||
)
|
||||
|
||||
numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
|
||||
|
||||
preprocessor = ColumnTransformer(transformers=[("num", numeric_transformer, numeric_features)])
|
||||
|
||||
model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)])
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
X_test = X_train[:11]
|
||||
|
||||
torch_model = hummingbird.ml.convert(model, "torch")
|
||||
|
||||
self.assertTrue(torch_model is not None)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06,
|
||||
)
|
||||
|
||||
@unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
|
||||
def test_pipeline_column_transformer(self):
|
||||
iris = datasets.load_iris()
|
||||
X = iris.data[:, :3]
|
||||
y = iris.target
|
||||
X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
|
||||
X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
|
||||
X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
|
||||
y_train = y % 2
|
||||
numeric_features = [0, 1, 2] # ["vA", "vB", "vC"]
|
||||
categorical_features = [3, 4] # ["vcat", "vcat2"]
|
||||
|
||||
classifier = LogisticRegression(
|
||||
C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3,
|
||||
)
|
||||
|
||||
numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
|
||||
|
||||
categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
|
||||
|
||||
preprocessor = ColumnTransformer(
|
||||
transformers=[
|
||||
("num", numeric_transformer, numeric_features),
|
||||
("cat", categorical_transformer, categorical_features),
|
||||
]
|
||||
)
|
||||
|
||||
model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)])
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
X_test = X_train[:11]
|
||||
|
||||
torch_model = hummingbird.ml.convert(model, "torch")
|
||||
|
||||
self.assertTrue(torch_model is not None)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06,
|
||||
)
|
||||
|
||||
@unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
|
||||
def test_pipeline_column_transformer_weights(self):
|
||||
iris = datasets.load_iris()
|
||||
X = iris.data[:, :3]
|
||||
y = iris.target
|
||||
X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
|
||||
X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
|
||||
X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
|
||||
y_train = y % 2
|
||||
numeric_features = [0, 1, 2] # ["vA", "vB", "vC"]
|
||||
categorical_features = [3, 4] # ["vcat", "vcat2"]
|
||||
|
||||
classifier = LogisticRegression(
|
||||
C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3,
|
||||
)
|
||||
|
||||
numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
|
||||
|
||||
categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
|
||||
|
||||
preprocessor = ColumnTransformer(
|
||||
transformers=[
|
||||
("num", numeric_transformer, numeric_features),
|
||||
("cat", categorical_transformer, categorical_features),
|
||||
],
|
||||
transformer_weights={"num": 2, "cat": 3},
|
||||
)
|
||||
|
||||
model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)])
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
X_test = X_train[:11]
|
||||
|
||||
torch_model = hummingbird.ml.convert(model, "torch")
|
||||
|
||||
self.assertTrue(torch_model is not None)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06,
|
||||
)
|
||||
|
||||
@unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
|
||||
def test_pipeline_column_transformer_drop(self):
|
||||
iris = datasets.load_iris()
|
||||
X = iris.data[:, :3]
|
||||
y = iris.target
|
||||
X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
|
||||
X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
|
||||
X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
|
||||
y_train = y % 2
|
||||
numeric_features = [0, 1] # ["vA", "vB"]
|
||||
categorical_features = [3, 4] # ["vcat", "vcat2"]
|
||||
|
||||
classifier = LogisticRegression(
|
||||
C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3,
|
||||
)
|
||||
|
||||
numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
|
||||
|
||||
categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
|
||||
|
||||
preprocessor = ColumnTransformer(
|
||||
transformers=[
|
||||
("num", numeric_transformer, numeric_features),
|
||||
("cat", categorical_transformer, categorical_features),
|
||||
],
|
||||
transformer_weights={"num": 2, "cat": 3},
|
||||
remainder="drop",
|
||||
)
|
||||
|
||||
model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)])
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
X_test = X_train[:11]
|
||||
|
||||
torch_model = hummingbird.ml.convert(model, "torch")
|
||||
|
||||
self.assertTrue(torch_model is not None)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06,
|
||||
)
|
||||
|
||||
@unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
|
||||
def test_pipeline_column_transformer_drop_noweights(self):
|
||||
iris = datasets.load_iris()
|
||||
X = iris.data[:, :3]
|
||||
y = iris.target
|
||||
X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
|
||||
X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
|
||||
X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
|
||||
y_train = y % 2
|
||||
numeric_features = [0, 1] # ["vA", "vB"]
|
||||
categorical_features = [3, 4] # ["vcat", "vcat2"]
|
||||
|
||||
classifier = LogisticRegression(
|
||||
C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3,
|
||||
)
|
||||
|
||||
numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
|
||||
|
||||
categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
|
||||
|
||||
preprocessor = ColumnTransformer(
|
||||
transformers=[
|
||||
("num", numeric_transformer, numeric_features),
|
||||
("cat", categorical_transformer, categorical_features),
|
||||
],
|
||||
remainder="drop",
|
||||
)
|
||||
|
||||
model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)])
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
X_test = X_train[:11]
|
||||
|
||||
torch_model = hummingbird.ml.convert(model, "torch")
|
||||
|
||||
self.assertTrue(torch_model is not None)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06,
|
||||
)
|
||||
|
||||
@unittest.skipIf(ColumnTransformer is None, reason="ColumnTransformer not available in 0.19")
|
||||
@unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
|
||||
def test_pipeline_column_transformer_passthrough(self):
|
||||
iris = datasets.load_iris()
|
||||
X = iris.data[:, :3]
|
||||
y = iris.target
|
||||
X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
|
||||
X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
|
||||
X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
|
||||
y_train = y % 2
|
||||
numeric_features = [0, 1] # ["vA", "vB"]
|
||||
categorical_features = [3, 4] # ["vcat", "vcat2"]
|
||||
|
||||
classifier = LogisticRegression(
|
||||
C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3,
|
||||
)
|
||||
|
||||
numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
|
||||
|
||||
categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
|
||||
|
||||
preprocessor = ColumnTransformer(
|
||||
transformers=[
|
||||
("num", numeric_transformer, numeric_features),
|
||||
("cat", categorical_transformer, categorical_features),
|
||||
],
|
||||
transformer_weights={"num": 2, "cat": 3},
|
||||
remainder="passthrough",
|
||||
)
|
||||
|
||||
model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)])
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
X_test = X_train[:11]
|
||||
|
||||
torch_model = hummingbird.ml.convert(model, "torch")
|
||||
|
||||
self.assertTrue(torch_model is not None)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06,
|
||||
)
|
||||
|
||||
@unittest.skipIf(ColumnTransformer is None, reason="ColumnTransformer not available in 0.19")
|
||||
@unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
|
||||
def test_pipeline_column_transformer_passthrough_noweights(self):
|
||||
iris = datasets.load_iris()
|
||||
X = iris.data[:, :3]
|
||||
y = iris.target
|
||||
X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
|
||||
X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
|
||||
X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
|
||||
y_train = y % 2
|
||||
numeric_features = [0, 1] # ["vA", "vB"]
|
||||
categorical_features = [3, 4] # ["vcat", "vcat2"]
|
||||
|
||||
classifier = LogisticRegression(
|
||||
C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3,
|
||||
)
|
||||
|
||||
numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
|
||||
|
||||
categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
|
||||
|
||||
preprocessor = ColumnTransformer(
|
||||
transformers=[
|
||||
("num", numeric_transformer, numeric_features),
|
||||
("cat", categorical_transformer, categorical_features),
|
||||
],
|
||||
remainder="passthrough",
|
||||
)
|
||||
|
||||
model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)])
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
X_test = X_train[:11]
|
||||
|
||||
torch_model = hummingbird.ml.convert(model, "torch")
|
||||
|
||||
self.assertTrue(torch_model is not None)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06,
|
||||
)
|
||||
|
||||
@unittest.skipIf(ColumnTransformer is None, reason="ColumnTransformer not available in 0.19")
|
||||
@unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
|
||||
def test_pipeline_column_transformer_passthrough_slice(self):
|
||||
iris = datasets.load_iris()
|
||||
X = iris.data[:, :3]
|
||||
y = iris.target
|
||||
X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
|
||||
X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
|
||||
X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
|
||||
y_train = y % 2
|
||||
numeric_features = slice(0, 1) # ["vA", "vB"]
|
||||
categorical_features = slice(3, 4) # ["vcat", "vcat2"]
|
||||
|
||||
classifier = LogisticRegression(
|
||||
C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3,
|
||||
)
|
||||
|
||||
numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
|
||||
|
||||
categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
|
||||
|
||||
preprocessor = ColumnTransformer(
|
||||
transformers=[
|
||||
("num", numeric_transformer, numeric_features),
|
||||
("cat", categorical_transformer, categorical_features),
|
||||
],
|
||||
transformer_weights={"num": 2, "cat": 3},
|
||||
remainder="passthrough",
|
||||
)
|
||||
|
||||
model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)])
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
X_test = X_train[:11]
|
||||
|
||||
torch_model = hummingbird.ml.convert(model, "torch")
|
||||
|
||||
self.assertTrue(torch_model is not None)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Загрузка…
Ссылка в новой задаче