Adding mean_variance_normalization CNTK and ONNX op, and LayerNormalization ONNX suort.
This commit is contained in:
Родитель
5f1d710709
Коммит
4017a1664a
|
@ -4505,6 +4505,11 @@ namespace CNTK
|
||||||
///
|
///
|
||||||
CNTK_API FunctionPtr PerDimMeanVarianceNormalize(const Variable& operand, const Variable& mean, const Variable& invStdDev, const std::wstring& name = L"");
|
CNTK_API FunctionPtr PerDimMeanVarianceNormalize(const Variable& operand, const Variable& mean, const Variable& invStdDev, const std::wstring& name = L"");
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Mean-variance normalization of the specified input operand.
|
||||||
|
///
|
||||||
|
CNTK_API FunctionPtr MeanVarianceNormalization(const Variable& operand, const bool useStatsAcrossChannels = false, const bool doVarianceScaling = true, const std::wstring& name = L"");
|
||||||
|
|
||||||
///
|
///
|
||||||
/// Per dimension mean-variance normalization of the specified input operand.
|
/// Per dimension mean-variance normalization of the specified input operand.
|
||||||
///
|
///
|
||||||
|
|
|
@ -2340,7 +2340,7 @@ namespace CNTK
|
||||||
|
|
||||||
size_t channels = operand.Shape()[2];
|
size_t channels = operand.Shape()[2];
|
||||||
if (channels != biases.size())
|
if (channels != biases.size())
|
||||||
LogicError("ImageScaler: number of biase (%d) does not equal channels of the image (%d)", (int)biases.size(), (int)(channels));
|
LogicError("ImageScaler: number of biases (%d) does not equal channels of the image (%d)", (int)biases.size(), (int)(channels));
|
||||||
|
|
||||||
auto additionalProperties = Dictionary();
|
auto additionalProperties = Dictionary();
|
||||||
additionalProperties[L"Scaler"] = scale;
|
additionalProperties[L"Scaler"] = scale;
|
||||||
|
@ -2375,6 +2375,32 @@ namespace CNTK
|
||||||
return AsBlock(std::move(ElementTimes(Minus(operandPlaceholder, meanPlaceholder), invStdDevPlaceholder)), { { operandPlaceholder, operand },{ meanPlaceholder, mean },{ invStdDevPlaceholder, invStdDev } }, L"PerDimMeanVarianceNormalize", name);
|
return AsBlock(std::move(ElementTimes(Minus(operandPlaceholder, meanPlaceholder), invStdDevPlaceholder)), { { operandPlaceholder, operand },{ meanPlaceholder, mean },{ invStdDevPlaceholder, invStdDev } }, L"PerDimMeanVarianceNormalize", name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FunctionPtr MeanVarianceNormalization(const Variable& operand, const bool useStatsAcrossChannels, const bool doVarianceScaling, const std::wstring& name)
|
||||||
|
{
|
||||||
|
Dictionary additionalAttributes;
|
||||||
|
additionalAttributes[PrimitiveFunction::AttributeNameUseStatsAcrossChannels] = useStatsAcrossChannels;
|
||||||
|
additionalAttributes[PrimitiveFunction::AttributeNameDoVarianceScaling] = doVarianceScaling;
|
||||||
|
|
||||||
|
auto operandPlaceholder = PlaceholderVariable(L"operand");
|
||||||
|
size_t operandRank = operand.Shape().Rank();
|
||||||
|
if (operandRank < 2 && !useStatsAcrossChannels)
|
||||||
|
InvalidArgument("When rank of the operand is < 2, useStatsAcrossChannels must be set to false, because there is no channel dimension.");
|
||||||
|
auto numAxesToReduce = useStatsAcrossChannels ? operandRank : operandRank - 1; // Assuming last dim to be the channel dim.
|
||||||
|
std::vector<Axis> axesToReduce(numAxesToReduce);
|
||||||
|
for (size_t i = 0; i < numAxesToReduce; ++i)
|
||||||
|
axesToReduce[i] = Axis(i);
|
||||||
|
FunctionPtr operandMeanRemoved = Minus(operandPlaceholder, ReduceMean(operandPlaceholder, axesToReduce));
|
||||||
|
if (!doVarianceScaling)
|
||||||
|
{
|
||||||
|
return AsBlock(std::move(operandMeanRemoved), { { operandPlaceholder, operand } }, std::move(additionalAttributes), L"MeanVarianceNormalization", name);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return AsBlock(std::move(ElementDivide(operandMeanRemoved, Sqrt(ReduceMean(Square(operandMeanRemoved), axesToReduce)))),
|
||||||
|
{ { operandPlaceholder, operand } }, std::move(additionalAttributes), L"MeanVarianceNormalization", name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
FunctionPtr Convolution(const Variable& convolutionMap,
|
FunctionPtr Convolution(const Variable& convolutionMap,
|
||||||
const Variable& operand,
|
const Variable& operand,
|
||||||
const NDShape& strides,
|
const NDShape& strides,
|
||||||
|
|
|
@ -312,6 +312,8 @@ namespace CNTK
|
||||||
static const std::wstring AttributeNameCustomAttributes;
|
static const std::wstring AttributeNameCustomAttributes;
|
||||||
static const std::wstring AttributeNameNumItems;
|
static const std::wstring AttributeNameNumItems;
|
||||||
static const std::wstring AttributeNameFillValue;
|
static const std::wstring AttributeNameFillValue;
|
||||||
|
static const std::wstring AttributeNameUseStatsAcrossChannels;
|
||||||
|
static const std::wstring AttributeNameDoVarianceScaling;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
PrimitiveFunction(PrimitiveOpType op, const std::vector<Variable>& inputs, Dictionary&& functionConfig, const std::wstring& functionName, const std::wstring& uid)
|
PrimitiveFunction(PrimitiveOpType op, const std::vector<Variable>& inputs, Dictionary&& functionConfig, const std::wstring& functionName, const std::wstring& uid)
|
||||||
|
|
|
@ -115,4 +115,6 @@ namespace CNTK
|
||||||
/*static*/ const std::wstring PrimitiveFunction::AttributeNameCustomAttributes = L"customAttributes";
|
/*static*/ const std::wstring PrimitiveFunction::AttributeNameCustomAttributes = L"customAttributes";
|
||||||
/*static*/ const std::wstring PrimitiveFunction::AttributeNameNumItems = L"numItems";
|
/*static*/ const std::wstring PrimitiveFunction::AttributeNameNumItems = L"numItems";
|
||||||
/*static*/ const std::wstring PrimitiveFunction::AttributeNameFillValue = L"fillValue";
|
/*static*/ const std::wstring PrimitiveFunction::AttributeNameFillValue = L"fillValue";
|
||||||
|
/*static*/ const std::wstring PrimitiveFunction::AttributeNameUseStatsAcrossChannels = L"useStatsAcrossChannels";
|
||||||
|
/*static*/ const std::wstring PrimitiveFunction::AttributeNameDoVarianceScaling = L"doVarianceScaling";
|
||||||
}
|
}
|
||||||
|
|
|
@ -1276,6 +1276,13 @@ void CNTKToONNXHelper::CopyAttributes(const FunctionPtr& src, ONNXIR::Node* node
|
||||||
node->AddAttribute("scale", scale);
|
node->AddAttribute("scale", scale);
|
||||||
node->AddAttribute("bias", biases);
|
node->AddAttribute("bias", biases);
|
||||||
}
|
}
|
||||||
|
else if (src->OpName() == L"MeanVarianceNormalization")
|
||||||
|
{
|
||||||
|
auto useStatsAcrossChannels = (int64_t)(src->Attributes()[L"useStatsAcrossChannels"].Value<bool>());
|
||||||
|
auto doVarianceScaling = (int64_t)(src->Attributes()[L"doVarianceScaling"].Value<bool>());
|
||||||
|
node->AddAttribute(attributesMap[L"useStatsAcrossChannels"], useStatsAcrossChannels);
|
||||||
|
node->AddAttribute(attributesMap[L"doVarianceScaling"], doVarianceScaling);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1468,6 +1475,7 @@ ONNXIR::Node* CNTKToONNXHelper::AddNode(const FunctionPtr& src, ONNXIR::Graph* g
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
//
|
//
|
||||||
// CNTK Times OP is way more flexible for ONNX, so depend on the inputs and output shape,
|
// CNTK Times OP is way more flexible for ONNX, so depend on the inputs and output shape,
|
||||||
// we will need to insert some reshapes.
|
// we will need to insert some reshapes.
|
||||||
|
@ -1505,8 +1513,37 @@ ONNXIR::Node* CNTKToONNXHelper::AddNode(const FunctionPtr& src, ONNXIR::Graph* g
|
||||||
else
|
else
|
||||||
node = graph->AddNode(nodeName, ToOPName(src), "", orderedInputs, outputs);
|
node = graph->AddNode(nodeName, ToOPName(src), "", orderedInputs, outputs);
|
||||||
}
|
}
|
||||||
|
else if (src->OpName() == L"LayerNormalization")
|
||||||
|
{
|
||||||
|
// Special handling of LayerNormalization to use MeanVarianceNormalization (and not reduce* ops).
|
||||||
|
|
||||||
|
// This assumes that the orderedInputs are in the order:
|
||||||
|
// [0]: tensor operand, [1]: scale constant, [2]: bias constant.
|
||||||
|
// Also assumes that tensor operand is index [2] in src->Inputs().
|
||||||
|
auto input0 = orderedInputs[0];
|
||||||
|
onnx::TypeProto input0ArgType = ToTypeProto(src->Inputs()[2].Shape(), src->Inputs()[2].HasBatchAxis());
|
||||||
|
UpdateONNXType(src->Inputs()[2].GetDataType(), input0ArgType);
|
||||||
|
ONNXIR::NodeArg mvnTensorOutputArg(nodeName + string("_mvn_output0"), &input0ArgType);
|
||||||
|
ONNXIR::Node* mvnNode = graph->AddNode(nodeName + string("_MVN"), "MeanVarianceNormalization",
|
||||||
|
"", { input0 }, { mvnTensorOutputArg });
|
||||||
|
mvnNode->AddAttribute("across_channels", static_cast<int64_t>(1));
|
||||||
|
mvnNode->AddAttribute("normalize_variance", static_cast<int64_t>(1));
|
||||||
|
|
||||||
|
auto input1 = orderedInputs[1];
|
||||||
|
ONNXIR::NodeArg mulTensorOutputArg(nodeName + string("_mul_output0"), &input0ArgType);
|
||||||
|
ONNXIR::Node* mulNode = graph->AddNode(nodeName + string("_mul"), "Mul",
|
||||||
|
"", { mvnTensorOutputArg, input1 }, { mulTensorOutputArg });
|
||||||
|
mulNode->AddAttribute("broadcast", static_cast<int64_t>(1));
|
||||||
|
|
||||||
|
auto input2 = orderedInputs[2];
|
||||||
|
ONNXIR::NodeArg addTensorOutputArg(nodeName + string("_add_output0"), &input0ArgType);
|
||||||
|
node = graph->AddNode(nodeName + string("_add"), "Add",
|
||||||
|
"", { mulTensorOutputArg, input2 }, { addTensorOutputArg });
|
||||||
|
node->AddAttribute("broadcast", static_cast<int64_t>(1));
|
||||||
|
}
|
||||||
else
|
else
|
||||||
node = graph->AddNode(nodeName, ToOPName(src), "", orderedInputs, outputs);
|
node = graph->AddNode(nodeName, ToOPName(src), "", orderedInputs, outputs);
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// Copy and validate attributes.
|
// Copy and validate attributes.
|
||||||
|
|
|
@ -1752,6 +1752,12 @@ FunctionPtr ONNXToCNTKHelper::CreateFunction(const Node *node, const std::vector
|
||||||
return ImageScaler(inputs[0], scale, bias, ToWString(node->Name()));
|
return ImageScaler(inputs[0], scale, bias, ToWString(node->Name()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (onnxOpName == "MeanVarianceNormalization")
|
||||||
|
{
|
||||||
|
size_t acrossChannels = GetNamedAttributeAsInt64(node, "across_channels", 0);
|
||||||
|
size_t normalizeVariance = GetNamedAttributeAsInt64(node, "normalize_variance", 1);
|
||||||
|
return MeanVarianceNormalization(inputs[0], !!acrossChannels, !!normalizeVariance, ToWString(node->Name()));
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
LogicError("ONNX (%s) is not supported in CNTK", onnxOpName.c_str());
|
LogicError("ONNX (%s) is not supported in CNTK", onnxOpName.c_str());
|
||||||
|
|
|
@ -70,7 +70,7 @@ namespace ONNX
|
||||||
{ L"epsilon", "epsilon" },
|
{ L"epsilon", "epsilon" },
|
||||||
// { L"", "momentum" },
|
// { L"", "momentum" },
|
||||||
} } },
|
} } },
|
||||||
// from ONNX experiament, added to test Caffe models
|
// from ONNX experiment, added to test Caffe models
|
||||||
// TODO: set key as BatchNormalization instead of BatchNormalizationCaffe
|
// TODO: set key as BatchNormalization instead of BatchNormalizationCaffe
|
||||||
{ L"BatchNormalizationCaffe",{ {
|
{ L"BatchNormalizationCaffe",{ {
|
||||||
{ L"BatchNormalization", "SpatialBN" },
|
{ L"BatchNormalization", "SpatialBN" },
|
||||||
|
@ -78,7 +78,13 @@ namespace ONNX
|
||||||
// { L"", "is_test" },
|
// { L"", "is_test" },
|
||||||
{ L"epsilon", "epsilon" },
|
{ L"epsilon", "epsilon" },
|
||||||
// { L"", "momentum" },
|
// { L"", "momentum" },
|
||||||
} } },
|
} } },
|
||||||
|
{ L"LayerNormalization",{ {
|
||||||
|
{ L"LayerNormalization", "LayerNormalization" },
|
||||||
|
{ L"initial_scale", "initial_scale" },
|
||||||
|
{ L"initial_bias", "initial_bias" },
|
||||||
|
{ L"epsilon", "epsilon" },
|
||||||
|
} } },
|
||||||
{ L"LocalResponseNormalization",{ {
|
{ L"LocalResponseNormalization",{ {
|
||||||
{ L"LocalResponseNormalization", "LRN" },
|
{ L"LocalResponseNormalization", "LRN" },
|
||||||
{ L"size", "size" },
|
{ L"size", "size" },
|
||||||
|
@ -359,6 +365,11 @@ namespace ONNX
|
||||||
{ L"ImageScaler",{ {
|
{ L"ImageScaler",{ {
|
||||||
{ L"ImageScaler", "ImageScaler" },
|
{ L"ImageScaler", "ImageScaler" },
|
||||||
} } },
|
} } },
|
||||||
|
{ L"MeanVarianceNormalization",{ {
|
||||||
|
{ L"MeanVarianceNormalization", "MeanVarianceNormalization" },
|
||||||
|
{ L"useStatsAcrossChannels", "across_channels" },
|
||||||
|
{ L"doVarianceScaling", "normalize_variance" },
|
||||||
|
} } },
|
||||||
};
|
};
|
||||||
|
|
||||||
// given a cntkOpName and cntk attribute OpName which is saved in CNTK::Function's attribute,
|
// given a cntkOpName and cntk attribute OpName which is saved in CNTK::Function's attribute,
|
||||||
|
@ -442,6 +453,7 @@ namespace ONNX
|
||||||
{ L"Times",{ 1, 0 } },
|
{ L"Times",{ 1, 0 } },
|
||||||
{ L"Gather",{ 1, 0 } },
|
{ L"Gather",{ 1, 0 } },
|
||||||
{ L"PReLU",{ 1, 0 } },
|
{ L"PReLU",{ 1, 0 } },
|
||||||
|
{ L"LayerNormalization",{ 1, 2, 0 } },
|
||||||
};
|
};
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
|
@ -428,6 +428,18 @@ namespace ONNXIR {
|
||||||
.Attr("normalize_variance", "If false, normalize the mean only. Default is true.",
|
.Attr("normalize_variance", "If false, normalize the mean only. Default is true.",
|
||||||
AttrType::AttributeProto_AttributeType_INT, int64_t(1));
|
AttrType::AttributeProto_AttributeType_INT, int64_t(1));
|
||||||
|
|
||||||
|
// Manually added on 2/14/2018.
|
||||||
|
REGISTER_OPERATOR_SCHEMA(MeanVarianceNormalization)
|
||||||
|
.Description("Perform mean variance normalization.")
|
||||||
|
.Input("input", "Input tensor of any shape", "T")
|
||||||
|
.Output("output", "Output tensor of same shape and type as input X.", "T")
|
||||||
|
.TypeConstraint("T", { "tensor(float16)", "tensor(float)", "tensor(double)" }, "Constrain input and output "
|
||||||
|
"types to float tensors.")
|
||||||
|
.Attr("across_channels", "If true, mean and variance are computed across channels. "
|
||||||
|
"Default is false.", AttrType::AttributeProto_AttributeType_INT, int64_t(0))
|
||||||
|
.Attr("normalize_variance", "If false, normalize the mean only. Default is true.",
|
||||||
|
AttrType::AttributeProto_AttributeType_INT, int64_t(1));
|
||||||
|
|
||||||
REGISTER_OPERATOR_SCHEMA(LpNormalization)
|
REGISTER_OPERATOR_SCHEMA(LpNormalization)
|
||||||
.Description("Given a matrix, apply Lp-normalization along the provided axis. "
|
.Description("Given a matrix, apply Lp-normalization along the provided axis. "
|
||||||
"For RS4 default of p = 2 and it will perform L2 normalization. Divide each "
|
"For RS4 default of p = 2 and it will perform L2 normalization. Divide each "
|
||||||
|
|
|
@ -3912,4 +3912,47 @@ def cast(node_input, dtype, name=''):
|
||||||
from cntk.cntk_py import cast
|
from cntk.cntk_py import cast
|
||||||
arg_node_input = sanitize_input(node_input, get_data_type(node_input))
|
arg_node_input = sanitize_input(node_input, get_data_type(node_input))
|
||||||
arg_dtype = sanitize_dtype_cntk(dtype)
|
arg_dtype = sanitize_dtype_cntk(dtype)
|
||||||
return cast(arg_node_input, arg_dtype, name)
|
return cast(arg_node_input, arg_dtype, name)
|
||||||
|
|
||||||
|
@typemap
|
||||||
|
def mean_variance_normalization(operand, use_stats_across_channels = False, do_variance_scaling = True, name=''):
|
||||||
|
'''
|
||||||
|
Computes mean-variance normalization of the specified input operand.
|
||||||
|
|
||||||
|
This operation computes and mean and variance for the entire tensor if use_stats_across_channels is True.
|
||||||
|
If use_stats_across_channels is False the computes mean and variance per channel and normalizes each
|
||||||
|
channel with its own mean and variance. If do_variance_scaling is False, only the mean is subtracted,
|
||||||
|
and the variance scaling is omitted.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> data = np.array([[[0., 2], [4., 6.]], [[0., 4], [8., 12.]]]).astype(np.float32)
|
||||||
|
>>> data
|
||||||
|
array([[[ 0., 2.],
|
||||||
|
[ 4., 6.]],
|
||||||
|
<BLANKLINE>
|
||||||
|
[[ 0., 4.],
|
||||||
|
[ 8., 12.]]], dtype=float32)
|
||||||
|
>>> saved_precision = np.get_printoptions()['precision']
|
||||||
|
>>> np.set_printoptions(precision=4) # For consistent display upto 4 decimals.
|
||||||
|
>>> C.mean_variance_normalization(data).eval()
|
||||||
|
array([[[-1.3416, -0.4472],
|
||||||
|
[ 0.4472, 1.3416]],
|
||||||
|
<BLANKLINE>
|
||||||
|
[[-1.3416, -0.4472],
|
||||||
|
[ 0.4472, 1.3416]]], dtype=float32)
|
||||||
|
>>> np.set_printoptions(precision=saved_precision) # Reseting the display precision.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
operand: Input tensor, with dimensions :math:`[C \\times H \\times W]`.
|
||||||
|
use_stats_across_channels (bool): If False, mean and variance are computed per channel.
|
||||||
|
If True, mean and variance are computed over the entire tensor (all axes).
|
||||||
|
do_variance_scaling (bool): If False, only the mean is subtracted. If True, it is also
|
||||||
|
scaled by inverse of standard deviation.
|
||||||
|
name (str, optional): the name of the Function instance in the network
|
||||||
|
Returns:
|
||||||
|
:class:`~cntk.ops.functions.Function`
|
||||||
|
|
||||||
|
'''
|
||||||
|
from cntk.cntk_py import mean_variance_normalization
|
||||||
|
operand = sanitize_input(operand, get_data_type(operand))
|
||||||
|
return mean_variance_normalization(operand, use_stats_across_channels, do_variance_scaling, name)
|
|
@ -462,3 +462,52 @@ def test_auto_broadcast_reconcile_issue():
|
||||||
# check does the reconcile_dynamic_axes call trigger the auto broadcast
|
# check does the reconcile_dynamic_axes call trigger the auto broadcast
|
||||||
assert len(inputs) == 2
|
assert len(inputs) == 2
|
||||||
assert inputs[0].name == 'y' and inputs[1].name == 'x'
|
assert inputs[0].name == 'y' and inputs[1].name == 'x'
|
||||||
|
|
||||||
|
MEAN_VARIANCE_NORMALIZATION_DATA = [
|
||||||
|
(np.array([[[0., 2], # Input tensor
|
||||||
|
[4., 6.]],
|
||||||
|
[[0., 4],
|
||||||
|
[8., 12.]]]),
|
||||||
|
False, # use_stats_across_channels
|
||||||
|
False, # do_variance_scaling
|
||||||
|
np.array([[[-3., -1.], # Output tensor
|
||||||
|
[1., 3.]],
|
||||||
|
[[-6., -2],
|
||||||
|
[2., 6.]]])
|
||||||
|
),
|
||||||
|
(np.array([[[0., 2], # Input tensor
|
||||||
|
[4., 6.]],
|
||||||
|
[[0., 4],
|
||||||
|
[8., 12.]]]),
|
||||||
|
False, # use_stats_across_channels
|
||||||
|
True, # do_variance_scaling
|
||||||
|
np.array([[[-1.34163487, -0.44721162],
|
||||||
|
[ 0.44721162, 1.34163487]],
|
||||||
|
[[-1.34163785, -0.44721264],
|
||||||
|
[ 0.44721264, 1.34163785]]])
|
||||||
|
),
|
||||||
|
(np.array([[[0., 2], # Input tensor
|
||||||
|
[4., 6.]],
|
||||||
|
[[8., 10],
|
||||||
|
[12., 14.]]]),
|
||||||
|
True, # use_stats_across_channels
|
||||||
|
True, # do_variance_scaling
|
||||||
|
np.array([[[-1.52752209, -1.0910871],
|
||||||
|
[-0.6546523, -0.21821743]],
|
||||||
|
[[ 0.21821743, 0.6546523],
|
||||||
|
[ 1.0910871, 1.52752209]]])
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("input_operand, use_stats_across_channels, do_variance_scaling, output_ref", MEAN_VARIANCE_NORMALIZATION_DATA)
|
||||||
|
def test_op_mean_variance_normalization(input_operand, use_stats_across_channels, do_variance_scaling, output_ref, device_id, precision):
|
||||||
|
dt_precision = PRECISION_TO_TYPE[precision]
|
||||||
|
input_ref = AA(input_operand, dtype=dt_precision)
|
||||||
|
a = C.input_variable(shape=input_ref.shape,
|
||||||
|
dtype=sanitize_dtype_cntk(precision),
|
||||||
|
needs_gradient=False,
|
||||||
|
name='a')
|
||||||
|
norm_op = C.mean_variance_normalization(a, use_stats_across_channels=use_stats_across_channels, do_variance_scaling=do_variance_scaling)
|
||||||
|
output_test = norm_op.eval({a:input_ref}, device=cntk_device(device_id))
|
||||||
|
|
||||||
|
assert np.allclose(output_test, output_ref, atol=1e-4)
|
|
@ -458,6 +458,15 @@ def test_ImageScaler(tmpdir):
|
||||||
model = C.image_scaler(x, scalar, bias);
|
model = C.image_scaler(x, scalar, bias);
|
||||||
verify_one_input(model, image, tmpdir, 'ImageScaler_1')
|
verify_one_input(model, image, tmpdir, 'ImageScaler_1')
|
||||||
|
|
||||||
|
#LayerNormalization
|
||||||
|
def test_LayerNormalization(tmpdir):
|
||||||
|
test_shapes = [(3, 5, 7), (10, ), (20, 31)]
|
||||||
|
for shape in test_shapes:
|
||||||
|
data = np.reshape(np.arange(np.prod(shape), dtype = np.float32), shape)
|
||||||
|
input_operand = C.input_variable(shape=shape)
|
||||||
|
model0 = model0 = C.layers.LayerNormalization(epsilon=0.0)(input_operand)
|
||||||
|
verify_one_input(model0, data, tmpdir, 'Pad_0')
|
||||||
|
|
||||||
#LeakyRelu
|
#LeakyRelu
|
||||||
def test_LeakyRelu(tmpdir):
|
def test_LeakyRelu(tmpdir):
|
||||||
data = np.asarray([[-1, -0.5, 0, 1, 2]], dtype=np.float32)
|
data = np.asarray([[-1, -0.5, 0, 1, 2]], dtype=np.float32)
|
||||||
|
@ -552,6 +561,22 @@ def test_Mean(tmpdir):
|
||||||
|
|
||||||
verify_two_input(model, in1_data, in2_data, tmpdir, 'Mean_2')
|
verify_two_input(model, in1_data, in2_data, tmpdir, 'Mean_2')
|
||||||
|
|
||||||
|
#MeanVarianceNormalization
|
||||||
|
def test_MeanVarianceNormalization(tmpdir):
|
||||||
|
shape = (3, 5, 7)
|
||||||
|
data = np.reshape(np.arange(np.prod(shape), dtype = np.float32), shape)
|
||||||
|
|
||||||
|
input_operand = C.input_variable(shape=shape)
|
||||||
|
|
||||||
|
model0 = C.mean_variance_normalization(input_operand, use_stats_across_channels=False, do_variance_scaling=True)
|
||||||
|
verify_one_input(model0, data, tmpdir, 'Pad_0')
|
||||||
|
|
||||||
|
model1 = C.mean_variance_normalization(input_operand, use_stats_across_channels=False, do_variance_scaling=False)
|
||||||
|
verify_one_input(model1, data, tmpdir, 'Pad_1')
|
||||||
|
|
||||||
|
model2 = C.mean_variance_normalization(input_operand, use_stats_across_channels=True, do_variance_scaling=True)
|
||||||
|
verify_one_input(model2, data, tmpdir, 'Pad_2')
|
||||||
|
|
||||||
#Min
|
#Min
|
||||||
def test_Min(tmpdir):
|
def test_Min(tmpdir):
|
||||||
data0 = np.asarray([1., 1., 1., 1.], dtype=np.float32)
|
data0 = np.asarray([1., 1., 1., 1.], dtype=np.float32)
|
||||||
|
@ -583,7 +608,7 @@ def test_Pad(tmpdir):
|
||||||
x = C.input_variable(shape)
|
x = C.input_variable(shape)
|
||||||
model = C.pad(x, pattern=[(1,1),(2,2)], mode=C.ops.REFLECT_PAD)
|
model = C.pad(x, pattern=[(1,1),(2,2)], mode=C.ops.REFLECT_PAD)
|
||||||
|
|
||||||
verify_one_input(model, data, tmpdir, 'Pad_1')
|
verify_one_input(model, data, tmpdir, 'Pad_1')
|
||||||
|
|
||||||
#PRelu
|
#PRelu
|
||||||
#def test_PRelu(tmpdir):
|
#def test_PRelu(tmpdir):
|
||||||
|
|
Загрузка…
Ссылка в новой задаче