[Relay][Op] Dropout and batch_norm (#1870)
This commit is contained in:
Родитель
a5be8fd3d1
Коммит
b4946e770c
|
@ -38,6 +38,8 @@ This level enables fully connected multi-layer perceptron.
|
|||
tvm.relay.tanh
|
||||
tvm.relay.sigmoid
|
||||
tvm.relay.nn.relu
|
||||
tvm.relay.nn.dropout
|
||||
tvm.relay.nn.batch_norm
|
||||
|
||||
|
||||
**Level 2: Convolutions**
|
||||
|
|
|
@ -237,6 +237,41 @@ struct PadAttrs : public tvm::AttrsNode<PadAttrs> {
|
|||
}
|
||||
};
|
||||
|
||||
/*! \brief Attributes used in dropout operator */
|
||||
struct DropoutAttrs : public tvm::AttrsNode<DropoutAttrs> {
|
||||
double rate;
|
||||
TVM_DECLARE_ATTRS(DropoutAttrs, "relay.attrs.DropoutAttrs") {
|
||||
TVM_ATTR_FIELD(rate)
|
||||
.describe("Fraction of the input that gets dropped out during training time")
|
||||
.set_default(0.5);
|
||||
}
|
||||
}; // struct DropoutAttrs
|
||||
|
||||
/*! \brief Attributes used in batch_norm operator */
|
||||
struct BatchNormAttrs : public tvm::AttrsNode<BatchNormAttrs> {
|
||||
int axis;
|
||||
double epsilon;
|
||||
bool center;
|
||||
bool scale;
|
||||
|
||||
TVM_DECLARE_ATTRS(BatchNormAttrs, "relay.attrs.BatchNormAttrs") {
|
||||
TVM_ATTR_FIELD(axis)
|
||||
.describe("Specify which shape axis denotes the channel.")
|
||||
.set_default(1);
|
||||
TVM_ATTR_FIELD(epsilon)
|
||||
.describe("Small float added to variance to avoid dividing by zero")
|
||||
.set_default(1e-5);
|
||||
TVM_ATTR_FIELD(center)
|
||||
.describe("If True, add offset of beta to normalized tensor. If False, beta is ignored")
|
||||
.set_default(true);
|
||||
TVM_ATTR_FIELD(scale)
|
||||
.describe("If True, multiply by gamma. If False, gamma is not used. "
|
||||
"When the next layer is piecewise linear (also, e.g., nn.relu), "
|
||||
"this can be disabled since the scaling will be done by the next layer.")
|
||||
.set_default(true);
|
||||
}
|
||||
}; // struct BatchNormAttrs
|
||||
|
||||
/*! \brief Attributes for LRN operator */
|
||||
struct LRNAttrs : public tvm::AttrsNode<LRNAttrs> {
|
||||
IndexExpr size;
|
||||
|
|
|
@ -11,6 +11,32 @@ from .expr import Expr, Constant, Let, Var, Function, If
|
|||
from .env import Environment
|
||||
|
||||
|
||||
class TupleWrapper(tvm._ffi.node.NodeGeneric):
|
||||
"""TupleWrapper.
|
||||
|
||||
This class is a Python wrapper for a Relay tuple of known size.
|
||||
It allows for accessing the fields of the Relay tuple as though
|
||||
it were a Python tuple.
|
||||
"""
|
||||
|
||||
def __init__(self, tuple_value, size):
|
||||
self.tuple_value = tuple_value
|
||||
self.size = size
|
||||
|
||||
|
||||
def asnode(self):
|
||||
"""Returns the underlying Relay tuple if this wrapper is passed
|
||||
as an argument to an FFI function."""
|
||||
|
||||
return self.tuple_value
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.tuple_value.fields[key]
|
||||
|
||||
def __len__(self):
|
||||
return len(self.tuple_value.fields)
|
||||
|
||||
|
||||
def _convert_to_value(arg, ctxt=tvm.cpu(0)):
|
||||
# type: (Any, tvm.Context) -> tvm.nd.NDArray
|
||||
"""Convert Python values into the appropriate types
|
||||
|
@ -61,6 +87,8 @@ def convert(arg):
|
|||
return relay.Tuple([convert(el) for el in arg])
|
||||
elif isinstance(arg, PartialFunc):
|
||||
return arg.to_func()
|
||||
elif isinstance(arg, tvm._ffi.node.NodeGeneric):
|
||||
return arg.asnode()
|
||||
else:
|
||||
value = _convert_to_value(arg)
|
||||
return Constant(value)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
"""Neural network operations."""
|
||||
from __future__ import absolute_import as _abs
|
||||
from tvm.relay.ir_builder import TupleWrapper
|
||||
from . import _make
|
||||
|
||||
|
||||
|
@ -484,6 +485,7 @@ def lrn(data, size=5, axis=1, bias=2, alpha=.00001, beta=0.75):
|
|||
|
||||
.. math::
|
||||
(data / (bias + (alpha * sum_data ^2 /size))^beta)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : relay.Expr
|
||||
|
@ -535,3 +537,103 @@ def l2_normalize(data, eps, axis=None):
|
|||
The computed result.
|
||||
"""
|
||||
return _make.l2_normalize(data, eps, axis)
|
||||
|
||||
def dropout(data, rate=0.5):
|
||||
"""Applies the dropout operation to the input array.
|
||||
|
||||
During training, each element of the input is set to zero with
|
||||
probability ``p``. The whole array is rescaled by ``1/(1-p)``
|
||||
to keep the expected sum of the input unchanged.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : relay.Expr
|
||||
The input data to the operator.
|
||||
|
||||
rate : float, optional (default=0.5)
|
||||
The probability for an element to be reset to 0.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : relay.Tuple([relay.Expr, relay.Expr])
|
||||
The first member of the tuple is the result of dropping elements from ``data``
|
||||
and rescaling. The second member is a "mask" tensor, which is of the same
|
||||
shape and data type as ``data`` and, for each element in ``data``, is 1.0
|
||||
if the element was not dropped and 0.0 if it was.
|
||||
"""
|
||||
result = _make.dropout(data, rate)
|
||||
return TupleWrapper(result, 2)
|
||||
|
||||
def batch_norm(data, gamma, beta, moving_mean, moving_var,
|
||||
axis=1, epsilon=1e-5, center=True, scale=True):
|
||||
r"""
|
||||
Batch normalization layer (Ioffe and Szegedy, 2014).
|
||||
Normalizes the input at each batch, i.e. applies a transformation
|
||||
that maintains the mean activation close to 0 and the activation
|
||||
standard deviation close to 1.
|
||||
|
||||
.. math::
|
||||
|
||||
data\_mean[i] = mean(data[:,i,:,...]) \\
|
||||
data\_var[i] = var(data[:,i,:,...])
|
||||
|
||||
Then compute the normalized output, which has the same shape as input, as following:
|
||||
|
||||
.. math::
|
||||
|
||||
out[:,i,:,...] = \frac{data[:,i,:,...] - data\_mean[i]}{\sqrt{data\_var[i]+\epsilon}}
|
||||
* gamma[i] + beta[i]
|
||||
|
||||
Both *mean* and *var* returns a scalar by treating the input as a vector.
|
||||
|
||||
Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
|
||||
have shape *(k,)*.
|
||||
|
||||
Besides the inputs and the outputs, this operator accepts two auxiliary
|
||||
states, ``moving_mean`` and ``moving_var``, which are *k*-length
|
||||
vectors. They are global statistics for the whole dataset, which are updated by::
|
||||
|
||||
moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
|
||||
moving_var = moving_var * momentum + data_var * (1 - momentum)
|
||||
|
||||
The parameter ``axis`` specifies which axis of the input shape denotes
|
||||
the 'channel' (separately normalized groups). The default is 1.
|
||||
Specifying -1 sets the channel axis to be the last item in the input shape.
|
||||
|
||||
.. note::
|
||||
|
||||
This operator can be optimized away for inference.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : relay.Expr
|
||||
Input to which batch_norm will be applied.
|
||||
gamma : relay.Expr
|
||||
The gamma scale factor.
|
||||
beta : relay.Expr
|
||||
The beta offset factor.
|
||||
moving_mean : relay.Expr
|
||||
Running mean of input,
|
||||
moving_var : relay.Expr
|
||||
Running variance of input.
|
||||
axis : int, optional, default=1
|
||||
Specify along which shape axis the channel is specified.
|
||||
epsilon : double, optional, default=1e-5
|
||||
Small float added to variance to avoid diving by zero.
|
||||
center : boolean, optional, default=True
|
||||
If True, add offset of beta to normalized tensor, If False,
|
||||
beta is ignored.
|
||||
scale : boolean, optional, default=True
|
||||
If true, multiply by gamma. If False, gamma is not used.
|
||||
When the next layer is piecewise linear (also e.g. nn.relu),
|
||||
this can be disabled since the scalingwill be done by the next layer.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : relay.Tuple([relay.Expr, relay.Expr, relay.Expr])
|
||||
Tuple of normed data (same shape as input), new running mean (k-length vector),
|
||||
and new running variance (k-length vector)
|
||||
"""
|
||||
result = _make.batch_norm(data, gamma, beta, moving_mean, moving_var,
|
||||
axis, epsilon, center, scale)
|
||||
return TupleWrapper(result, 3)
|
||||
|
|
|
@ -217,5 +217,177 @@ Normalizes along dimension axis using an L2 norm
|
|||
.set_support_level(2)
|
||||
.add_type_rel("Identity", IdentityRel);
|
||||
|
||||
// Dropout
|
||||
TVM_REGISTER_NODE_TYPE(DropoutAttrs);
|
||||
|
||||
bool DropoutRel(const Array<Type>& types,
|
||||
int num_inputs,
|
||||
const Attrs& attrs,
|
||||
const TypeReporter& reporter) {
|
||||
CHECK_EQ(types.size(), 2);
|
||||
const auto* data = types[0].as<TensorTypeNode>();
|
||||
if (data == nullptr) return false;
|
||||
|
||||
// dropout returns the original tensor with dropout applied
|
||||
// and a mask tensor (1.0 where element not dropped, 0.0 where dropped)
|
||||
auto ret_type = TensorTypeNode::make(data->shape, data->dtype);
|
||||
reporter->Assign(types[1], TupleTypeNode::make(Array<Type>({ret_type, ret_type})));
|
||||
return true;
|
||||
}
|
||||
|
||||
Expr MakeDropout(Expr data, double rate) {
|
||||
auto attrs = make_node<DropoutAttrs>();
|
||||
attrs->rate = rate;
|
||||
static const Op& op = Op::Get("nn.dropout");
|
||||
return CallNode::make(op, {data}, Attrs(attrs), {});
|
||||
}
|
||||
|
||||
TVM_REGISTER_API("relay.op.nn._make.dropout")
|
||||
.set_body([](const TVMArgs& args, TVMRetValue* rv) {
|
||||
runtime::detail::unpack_call<Expr, 2>(MakeDropout, args, rv);
|
||||
});
|
||||
|
||||
RELAY_REGISTER_OP("nn.dropout")
|
||||
.describe(R"code(Applies the dropout operation to the input array.
|
||||
|
||||
During training, each element of the input is set to zero with probability ``p``.
|
||||
The whole array is rescaled by ``1/(1-p)`` to keep the expected sum of the input unchanged.
|
||||
|
||||
)code" TVM_ADD_FILELINE)
|
||||
.set_num_inputs(1)
|
||||
.add_argument("data", "Tensor", "Input to which dropout will be applied.")
|
||||
.set_support_level(1)
|
||||
.add_type_rel("Dropout", DropoutRel);
|
||||
|
||||
// batch_norm
|
||||
TVM_REGISTER_NODE_TYPE(BatchNormAttrs);
|
||||
|
||||
bool CheckVectorLength(int64_t dim, const DataType& dtype, Type vector, const char* name) {
|
||||
const auto* candidate = vector.as<TensorTypeNode>();
|
||||
CHECK(candidate != nullptr)
|
||||
<< name << " should be a vector but is not a tensor type,";
|
||||
CHECK_EQ(dtype, candidate->dtype)
|
||||
<< name << " should be of the same data type as the original but it is not.";
|
||||
CHECK_EQ(candidate->shape.size(), 1)
|
||||
<< name << " should be a vector but has a shape of "
|
||||
<< candidate->shape.size() << " dimensions instead of 1.";
|
||||
|
||||
const int64_t* length = as_const_int(candidate->shape[0]);
|
||||
if (length == nullptr) return false;
|
||||
CHECK(*length == dim)
|
||||
<< name << " should be as long as the channel but has length "
|
||||
<< *length << " instead of " << dim << ".";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BatchNormRel(const Array<Type>& types,
|
||||
int num_inputs,
|
||||
const Attrs& attrs,
|
||||
const TypeReporter& reporter) {
|
||||
CHECK_EQ(types.size(), 6);
|
||||
const auto* data = types[0].as<TensorTypeNode>();
|
||||
if (data == nullptr) return false;
|
||||
if (data->shape.size() == 0) return false;
|
||||
|
||||
const BatchNormAttrs* param = attrs.as<BatchNormAttrs>();
|
||||
|
||||
// axis of -1 means use the last dimension
|
||||
CHECK(param->axis >= -1 && param->axis < (int)data->shape.size());
|
||||
int axis = (param->axis != -1) ? param->axis : data->shape.size() - 1;
|
||||
|
||||
auto dim = as_const_int(data->shape[axis]);
|
||||
if (dim == nullptr) return false;
|
||||
|
||||
// if we are using beta and gamma, they need to be of shape (dim,)
|
||||
if (param->scale && !CheckVectorLength(*dim, data->dtype, types[1], "The gamma scale factor")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (param->center && !CheckVectorLength(*dim, data->dtype, types[2], "The beta offset factor")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// the two running averages must also be vectors of length dim
|
||||
if (!CheckVectorLength(*dim, data->dtype, types[3], "The moving mean")) {
|
||||
return false;
|
||||
}
|
||||
if (!CheckVectorLength(*dim, data->dtype, types[4], "The moving variance")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// output is a tuple of the normed data (same shape as input), new running mean,
|
||||
// and new running average (the latter two are both vectors of length dim)
|
||||
std::vector<Type> fields;
|
||||
auto vec_ty = TensorTypeNode::make(Array<IndexExpr>({data->shape[axis]}),
|
||||
data->dtype);
|
||||
fields.push_back(TensorTypeNode::make(data->shape, data->dtype));
|
||||
fields.push_back(vec_ty);
|
||||
fields.push_back(vec_ty);
|
||||
reporter->Assign(types[5], TupleTypeNode::make(Array<Type>(fields)));
|
||||
return true;
|
||||
}
|
||||
|
||||
Expr MakeBatchNorm(Expr data, Expr gamma, Expr beta, Expr moving_mean, Expr moving_var,
|
||||
int axis, double epsilon, bool center, bool scale) {
|
||||
auto attrs = make_node<BatchNormAttrs>();
|
||||
attrs->axis = axis;
|
||||
attrs->epsilon = epsilon;
|
||||
attrs->center = center;
|
||||
attrs->scale = scale;
|
||||
static const Op& op = Op::Get("nn.batch_norm");
|
||||
return CallNode::make(op, {data, gamma, beta, moving_mean, moving_var}, Attrs(attrs), {});
|
||||
}
|
||||
|
||||
TVM_REGISTER_API("relay.op.nn._make.batch_norm")
|
||||
.set_body([](const TVMArgs& args, TVMRetValue* rv) {
|
||||
runtime::detail::unpack_call<Expr, 9>(MakeBatchNorm, args, rv);
|
||||
});
|
||||
|
||||
RELAY_REGISTER_OP("nn.batch_norm")
|
||||
.describe(R"code(Batch normalization layer (Ioffe and Szegedy, 2014).
|
||||
Normalizes the input at each batch, i.e. applies a transformation
|
||||
that maintains the mean activation close to 0 and the activation
|
||||
standard deviation close to 1.
|
||||
|
||||
.. math::
|
||||
|
||||
data\_mean[i] = mean(data[:,i,:,...]) \\
|
||||
data\_var[i] = var(data[:,i,:,...])
|
||||
|
||||
Then compute the normalized output, which has the same shape as input, as following:
|
||||
|
||||
.. math::
|
||||
|
||||
out[:,i,:,...] = \frac{data[:,i,:,...] - data\_mean[i]}{\sqrt{data\_var[i]+\epsilon}} \
|
||||
* gamma[i] + beta[i]
|
||||
|
||||
Both *mean* and *var* returns a scalar by treating the input as a vector.
|
||||
|
||||
Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta`` have shape *(k,)*.
|
||||
|
||||
Besides the inputs and the outputs, this operator accepts two auxiliary
|
||||
states, ``moving_mean`` and ``moving_var``, which are *k*-length
|
||||
vectors. They are global statistics for the whole dataset, which are updated
|
||||
by::
|
||||
|
||||
moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
|
||||
moving_var = moving_var * momentum + data_var * (1 - momentum)
|
||||
|
||||
The parameter ``axis`` specifies which axis of the input shape denotes
|
||||
the 'channel' (separately normalized groups). The default is 1. Specifying -1 sets the channel
|
||||
axis to be the last item in the input shape.
|
||||
|
||||
.. note::
|
||||
This operator can be optimized away for inference.
|
||||
)code" TVM_ADD_FILELINE)
|
||||
.set_num_inputs(5)
|
||||
.add_argument("data", "Tensor", "Input to which batch_norm will be applied.")
|
||||
.add_argument("gamma", "Tensor", "The gamma scale factor.")
|
||||
.add_argument("beta", "Tensor", "The beta offset factor.")
|
||||
.add_argument("moving_mean", "Tensor", "Running mean of input.")
|
||||
.add_argument("moving_var", "Tensor", "Running variance of input.")
|
||||
.set_support_level(1)
|
||||
.add_type_rel("BatchNorm", BatchNormRel);
|
||||
|
||||
} // namespace relay
|
||||
} // namespace tvm
|
||||
|
|
|
@ -196,6 +196,93 @@ def test_l2_normalize():
|
|||
ftype = func.checked_type
|
||||
assert ftype.ret_type == relay.ty.TensorType((n, c , h, w), "float32")
|
||||
|
||||
def test_dropout():
|
||||
ib = relay.ir_builder.IRBuilder()
|
||||
input_ty = relay.ty.TensorType((3, 4, 5), "int8")
|
||||
x = ib.param("x", input_ty)
|
||||
with ib.function(x) as func:
|
||||
ib.ret(relay.nn.dropout(x))
|
||||
ib.ret(func)
|
||||
|
||||
func = relay.ir_pass.infer_type(ib.env, func.to_func())
|
||||
ftype = func.checked_type
|
||||
assert ftype.ret_type == relay.ty.TupleType([input_ty, input_ty])
|
||||
|
||||
ib = relay.ir_builder.IRBuilder()
|
||||
n, t, d = tvm.var("n"), tvm.var("t"), tvm.var("d")
|
||||
input_ty = relay.ty.TensorType((n, t, d), "float32")
|
||||
x = ib.param("x", input_ty)
|
||||
with ib.function(x) as func:
|
||||
ib.ret(relay.nn.dropout(x, rate=0.75))
|
||||
ib.ret(func)
|
||||
|
||||
func = relay.ir_pass.infer_type(ib.env, func.to_func())
|
||||
ftype = func.checked_type
|
||||
assert ftype.ret_type == relay.ty.TupleType([input_ty, input_ty])
|
||||
|
||||
|
||||
def test_batch_norm():
|
||||
# beta and gamma ignored
|
||||
ib = relay.ir_builder.IRBuilder()
|
||||
data = ib.param("data", relay.ty.TensorType((3, 2, 1), "float32"))
|
||||
gamma = ib.param("gamma", relay.ty.TensorType((5,), "int8"))
|
||||
beta = ib.param("beta", relay.ty.TensorType((12, 16), "int64"))
|
||||
moving_mean = ib.param("moving_mean", relay.ty.TensorType((2,), "float32"))
|
||||
moving_var = ib.param("moving_var", relay.ty.TensorType((2,), "float32"))
|
||||
with ib.function(data, gamma, beta, moving_mean, moving_var) as func:
|
||||
ib.ret(relay.nn.batch_norm(data, gamma, beta, moving_mean, moving_var,
|
||||
center=False, scale=False))
|
||||
ib.ret(func)
|
||||
|
||||
func = relay.ir_pass.infer_type(ib.env, func.to_func())
|
||||
ftype = func.checked_type
|
||||
assert ftype.ret_type == relay.ty.TupleType(tvm.convert([
|
||||
relay.ty.TensorType((3, 2, 1), "float32"),
|
||||
relay.ty.TensorType((2,), "float32"),
|
||||
relay.ty.TensorType((2,), "float32")
|
||||
]))
|
||||
|
||||
# with beta and gamma, different axis
|
||||
ib = relay.ir_builder.IRBuilder()
|
||||
data = ib.param("data", relay.ty.TensorType((3, 2, 1), "float32"))
|
||||
gamma = ib.param("gamma", relay.ty.TensorType((3,), "float32"))
|
||||
beta = ib.param("beta", relay.ty.TensorType((3,), "float32"))
|
||||
moving_mean = ib.param("moving_mean", relay.ty.TensorType((3,), "float32"))
|
||||
moving_var = ib.param("moving_var", relay.ty.TensorType((3,), "float32"))
|
||||
with ib.function(data, gamma, beta, moving_mean, moving_var) as func:
|
||||
ib.ret(relay.nn.batch_norm(data, gamma, beta, moving_mean, moving_var,
|
||||
axis=0, center=False, scale=False))
|
||||
ib.ret(func)
|
||||
|
||||
func = relay.ir_pass.infer_type(ib.env, func.to_func())
|
||||
ftype = func.checked_type
|
||||
assert ftype.ret_type == relay.ty.TupleType(tvm.convert([
|
||||
relay.ty.TensorType((3, 2, 1), "float32"),
|
||||
relay.ty.TensorType((3,), "float32"),
|
||||
relay.ty.TensorType((3,), "float32")
|
||||
]))
|
||||
|
||||
# axis=-1
|
||||
ib = relay.ir_builder.IRBuilder()
|
||||
data = ib.param("data", relay.ty.TensorType((1, 2, 3), "float32"))
|
||||
gamma = ib.param("gamma", relay.ty.TensorType((3,), "float32"))
|
||||
beta = ib.param("beta", relay.ty.TensorType((3,), "float32"))
|
||||
moving_mean = ib.param("moving_mean", relay.ty.TensorType((3,), "float32"))
|
||||
moving_var = ib.param("moving_var", relay.ty.TensorType((3,), "float32"))
|
||||
with ib.function(data, gamma, beta, moving_mean, moving_var) as func:
|
||||
ib.ret(relay.nn.batch_norm(data, gamma, beta, moving_mean, moving_var,
|
||||
axis=-1, center=False, scale=False))
|
||||
ib.ret(func)
|
||||
|
||||
func = relay.ir_pass.infer_type(ib.env, func.to_func())
|
||||
ftype = func.checked_type
|
||||
assert ftype.ret_type == relay.ty.TupleType(tvm.convert([
|
||||
relay.ty.TensorType((1, 2, 3), "float32"),
|
||||
relay.ty.TensorType((3,), "float32"),
|
||||
relay.ty.TensorType((3,), "float32")
|
||||
]))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_unary_op()
|
||||
test_single_op()
|
||||
|
@ -207,3 +294,5 @@ if __name__ == "__main__":
|
|||
test_binary_broadcast_op()
|
||||
test_lrn()
|
||||
test_l2_normalize()
|
||||
test_dropout()
|
||||
test_batch_norm()
|
||||
|
|
Загрузка…
Ссылка в новой задаче