Overhauling ConvTranspose ONNX op to match ONNX1.2.2 spec.

This commit is contained in:
Spandan Tiwari 2018-08-23 10:41:55 -07:00
Родитель 28ada9657b
Коммит 6866f0c888
8 изменённых файлов: 227 добавлений и 49 удалений

Просмотреть файл

@ -4606,7 +4606,7 @@ namespace CNTK
const std::wstring& name = L"");
///
/// Convolution transpose
/// Convolution transpose with auto padding
///
CNTK_API FunctionPtr ConvolutionTranspose(const Variable& convolutionMap,
const Variable& operand,
@ -4618,6 +4618,20 @@ namespace CNTK
size_t reductionRank = 1,
size_t maxTempMemSizeInSamples = 0,
const std::wstring& name = L"");
///
/// Convolution transpose with explicit lower and upper pad values
///
CNTK_API FunctionPtr ConvolutionTranspose(const Variable& convolutionMap,
const Variable& operand,
const NDShape& strides,
const std::vector<bool>& sharing,
const std::vector<size_t>& lowerPad,
const std::vector<size_t>& upperPad,
const NDShape& outputShape,
const NDShape& dilation,
size_t maxTempMemSizeInSamples,
const std::wstring& name = L"");
///
/// Pooling type.

Просмотреть файл

@ -254,6 +254,8 @@ namespace CNTK
CNTK_API FunctionPtr CosineDistanceWithNegativeSamples(const Variable& leftOperand, const Variable& rightOperand, const Variable& shiftWindow, const Variable& numberOfNegativeSamples, const std::wstring& name = L"");
CNTK_API FunctionPtr Convolution(const Variable& convolutionMap, const Variable& operand, const NDShape& strides, const std::vector<bool>& sharing, const std::vector<bool>& autoPadding,
const NDShape& dilation, bool transpose, const NDShape& outputShape, size_t groups, size_t maxTempMemSizeInSamples, const std::wstring& name = L"");
CNTK_API FunctionPtr Convolution(const Variable& convolutionMap, const Variable& operand, const NDShape& strides, const std::vector<bool>& sharing, const std::vector<size_t>& lowerPad,
const std::vector<size_t>& upperPad, const NDShape& dilation, bool transpose, const NDShape& outputShape, size_t groups, size_t maxTempMemSizeInSamples, const std::wstring& name = L"");
CNTK_API FunctionPtr ConvolutionSequenceShape(const Variable& convolutionMap, const Variable& operand, const NDShape& strides, const std::vector<bool>& sharing, const std::vector<bool>& autoPadding,
const NDShape& dilation, bool transpose, const NDShape& outputShape, size_t groups, size_t maxTempMemSizeInSamples, const std::wstring& name = L"");
CNTK_API FunctionPtr SpatialConvolution(const Variable& convolutionMap, const Variable& operand, const NDShape& strides, const std::vector<bool>& sharing,

Просмотреть файл

@ -2635,6 +2635,32 @@ namespace CNTK
}
}
FunctionPtr ConvolutionTranspose(const Variable& convolutionMap,
const Variable& operand,
const NDShape& strides,
const std::vector<bool>& sharing,
const std::vector<size_t>& lowerPad,
const std::vector<size_t>& upperPad,
const NDShape& outputShape,
const NDShape& dilation,
size_t maxTempMemSizeInSamples,
const std::wstring& name)
{
size_t groups = 1;
return Internal::Convolution(convolutionMap,
operand,
strides,
sharing,
lowerPad,
upperPad,
dilation,
true,
outputShape,
groups,
maxTempMemSizeInSamples,
name);
}
FunctionPtr ROIPooling(const Variable& operand,
const Variable& rois,
PoolingType poolingType,
@ -3409,6 +3435,26 @@ namespace CNTK
return ReduceElements(operand, reductionOpName, axes, keepReducedDimensions, name);
}
FunctionPtr Convolution(const Variable& convolutionMap,
const Variable& operand,
const NDShape& strides,
const std::vector<bool>& sharing,
const std::vector<size_t>& lowerPad,
const std::vector<size_t>& upperPad,
const NDShape& dilation,
bool transpose,
const NDShape& outputShape,
size_t groups,
size_t maxTempMemSizeInSamples,
const std::wstring& name)
{
auto additionalProperties = Dictionary();
auto defaultAutoPad = std::vector<bool>({ false });
SetConvolutionProperties(additionalProperties, strides, sharing, defaultAutoPad, lowerPad, upperPad, dilation, /*sequential =*/false, transpose, outputShape, groups, maxTempMemSizeInSamples);
return BinaryOp(PrimitiveOpType::Convolution, convolutionMap, operand, std::move(additionalProperties), name);
}
FunctionPtr Convolution(const Variable& convolutionMap,
const Variable& operand,
const NDShape& strides,
@ -3422,7 +3468,8 @@ namespace CNTK
const std::wstring& name)
{
auto additionalProperties = Dictionary();
SetConvolutionProperties(additionalProperties, strides, sharing, autoPadding, dilation, /*sequential =*/false, transpose, outputShape, groups, maxTempMemSizeInSamples);
auto defaultPadVector = std::vector<size_t>({ 0 });
SetConvolutionProperties(additionalProperties, strides, sharing, autoPadding, defaultPadVector, defaultPadVector, dilation, /*sequential =*/false, transpose, outputShape, groups, maxTempMemSizeInSamples);
return BinaryOp(PrimitiveOpType::Convolution, convolutionMap, operand, std::move(additionalProperties), name);
}
@ -3446,7 +3493,8 @@ namespace CNTK
LogicError("Convolution currently requires the main operand to have dynamic axes");
auto additionalProperties = Dictionary();
SetConvolutionProperties(additionalProperties, strides, sharing, autoPadding, dilation, /*sequential =*/true, transpose, outputShape, groups, maxTempMemSizeInSamples);
auto defaultPadVector = std::vector<size_t>({ 0 });
SetConvolutionProperties(additionalProperties, strides, sharing, autoPadding, defaultPadVector, defaultPadVector, dilation, /*sequential =*/true, transpose, outputShape, groups, maxTempMemSizeInSamples);
return BinaryOp(PrimitiveOpType::ConvolutionSequenceShape, convolutionMap, operand, std::move(additionalProperties), name);
}

Просмотреть файл

@ -238,16 +238,17 @@ namespace CNTK
return !(*this == other);
}
void SetConvolutionProperties(Dictionary& additionalProperties, const NDShape& strides, const std::vector<bool>& sharing, const std::vector<bool>& autoPadding,
const NDShape& dilation, bool sequential, bool transpose, const NDShape& outputShape, size_t groups, size_t maxTempMemSizeInSamples)
void SetConvolutionProperties(Dictionary& additionalProperties, const NDShape& strides, const std::vector<bool>& sharing,
const std::vector<bool>& autoPadding, const std::vector<size_t>& lowerPad, const std::vector<size_t>& upperPad,
const NDShape& dilation, bool sequential, bool transpose, const NDShape& outputShape, size_t groups, size_t maxTempMemSizeInSamples)
{
additionalProperties[PrimitiveFunctionAttribute::AttributeNameStrides] = strides;
additionalProperties[PrimitiveFunctionAttribute::AttributeNameDilation] = dilation;
additionalProperties[PrimitiveFunctionAttribute::AttributeNameSharing] = AsDictionaryValueVector(sharing);
additionalProperties[PrimitiveFunctionAttribute::AttributeNameAutoPadding] = AsDictionaryValueVector(autoPadding);
additionalProperties[PrimitiveFunctionAttribute::AttributeNameSequential] = sequential;
additionalProperties[PrimitiveFunctionAttribute::AttributeNameLowerPad] = NDShape({0});
additionalProperties[PrimitiveFunctionAttribute::AttributeNameUpperPad] = NDShape({0});
additionalProperties[PrimitiveFunctionAttribute::AttributeNameLowerPad] = NDShape(lowerPad);
additionalProperties[PrimitiveFunctionAttribute::AttributeNameUpperPad] = NDShape(upperPad);
additionalProperties[PrimitiveFunctionAttribute::AttributeNameTranspose] = transpose;
additionalProperties[PrimitiveFunctionAttribute::AttributeNameOutputShape] = outputShape;
additionalProperties[PrimitiveFunctionAttribute::AttributeNameKernelShape] = NDShape({0});

Просмотреть файл

@ -357,8 +357,9 @@ namespace CNTK
return{ paddedOutputMapCount, kernelShape };
}
void SetConvolutionProperties(Dictionary& additionalProperties, const NDShape& strides, const std::vector<bool>& sharing, const std::vector<bool>& autoPadding,
const NDShape& dilation, bool sequential, bool transpose, const NDShape& outputShape, size_t groups, size_t maxTempMemSizeInSamples);
void SetConvolutionProperties(Dictionary& additionalProperties, const NDShape& strides, const std::vector<bool>& sharing,
const std::vector<bool>& autoPadding, const std::vector<size_t>& lowerPad, const std::vector<size_t>& upperPad,
const NDShape& dilation, bool sequential, bool transpose, const NDShape& outputShape, size_t groups, size_t maxTempMemSizeInSamples);
template <typename SourceElementType, typename TargetElementType>

Просмотреть файл

@ -3368,6 +3368,7 @@ void CNTKToONNXHelper::CopyAttributes(const FunctionPtr& src, LotusIR::Node* nod
auto dilations = (NDShape)src->Attributes()[L"dilation"].Value<NDShape>();
auto transpose = (bool)src->Attributes()[L"transpose"].Value<bool>();
size_t groups = (src->Attributes().Contains(L"groups")) ? (size_t)src->Attributes()[L"groups"].Value<size_t>() : 1u;
bool ceilOutDim = (src->Attributes().Contains(L"ceilOutDim")) ? (bool)src->Attributes()[L"ceilOutDim"].Value<bool>() : false;
//
// Remove the channel part for ONNX. This is because ONNX, unlike CNTK, does
@ -3385,9 +3386,10 @@ void CNTKToONNXHelper::CopyAttributes(const FunctionPtr& src, LotusIR::Node* nod
if (transpose)
{
auto outputShape = (NDShape)src->Attributes()[L"outputShape"].Value<NDShape>();
node->AddAttribute("output_shape", ToINTS(outputShape, src->Inputs()[1].HasBatchAxis()));
if(outputShape != NDShape({ 0 }))
node->AddAttribute("output_shape", ToINTS(outputShape, src->Inputs()[1].HasBatchAxis()));
}
PutAutopadOrPadAttrInNode(node, autoPadding, kernelShape);
PutAutopadOrPadAttrInNode(node, autoPadding, kernelShape, ceilOutDim);
}
else if (src->OpName() == L"Pooling")
{

Просмотреть файл

@ -139,6 +139,7 @@ private:
NDShape &strides, const Node *node, const Variable& dataOperand, const double padValue = 0.0);
static std::pair<std::vector<size_t>, std::vector<size_t>> CalcPaddingForSameLowerAutoPad(
const Variable &input, NDShape kernelShape, NDShape strides);
static std::tuple<bool, bool, bool> ConfigureConvTransposeNodePaddingOption(const Node *node);
//
// CNTK convolution/pooling operations do not support ONNX same_low padding.
// This method does padding accoordingly before invoking
@ -148,6 +149,10 @@ private:
const Variable &input, NDShape kernelShape, NDShape strides, const double padValue);
static FunctionPtr CreateCNTKConvNode(const Node *node, const std::vector<Variable> &inputs);
static FunctionPtr CreateCNTKConvTransposeNode(const Node *node, const std::vector<Variable> &inputs);
static FunctionPtr CreateCNTKConvTransposeNode(const Variable& inputOperand, const Variable& convolutionMap, bool useAutoPadForCntkConvApi,
const NDShape& strides, const std::vector<bool>& sharing, const std::vector<bool>& cntkConvAutoPadding, const std::vector<size_t>& lowerPad,
const std::vector<size_t>& upperPad, const NDShape& outputShape, const NDShape& dilation, size_t reductionRank,
size_t maxTempMemSizeInSamples, const std::string& name);
static FunctionPtr CreateCNTKFCNode(const std::wstring &nodeName, const std::vector<Variable> &inputs);
//
@ -3083,6 +3088,21 @@ std::pair<std::vector<size_t>, std::vector<size_t>> ONNXToCNTKHelper::CalcPaddin
return std::make_pair(begins, ends);
}
std::tuple<bool, bool, bool> ONNXToCNTKHelper::ConfigureConvTransposeNodePaddingOption(const Node *node)
{
bool USE_OUTPUT_SHAPE = HasNamedAttribute(node, "output_shape");
bool USE_PADS = HasNamedAttribute(node, "pads");
bool USE_AUTO_PAD = HasNamedAttribute(node, "auto_pad") && GetNamedAttributeAsString(node, "auto_pad") != "NOTSET";
if (USE_PADS)
{
auto pads = GetNamedAttributeAsInt64Vec(node, "pads");
bool isAllZeros = std::all_of(pads.begin(), pads.end(), [](int64_t i) { return i == 0; });
if (isAllZeros && USE_AUTO_PAD)
USE_PADS = false;
}
return std::make_tuple(USE_OUTPUT_SHAPE , USE_PADS , USE_AUTO_PAD);
}
FunctionPtr ONNXToCNTKHelper::CreatePadOpForSameLowAutoPad(
const Variable &input, NDShape kernelShape, NDShape strides, const double padValue)
{
@ -3135,7 +3155,10 @@ FunctionPtr ONNXToCNTKHelper::CreateCNTKConvTransposeNode(const Node *node, cons
{
Variable inputOperand = inputs[0];
Variable convolutionMap = inputs[1];
size_t numSpatialDim = convolutionMap.Shape().Rank() - 1; // This is conv op dimension, i.e. 2 for 2D conv, 3 for 3D conv.
size_t numSpatialDim = convolutionMap.Shape().Rank() - 2; // This is conv op dimension, i.e. 2 for 2D conv, 3 for 3D conv.
size_t groups = GetNamedAttributeAsInt64(node, "group", 1);
if (groups > 1)
NOT_IMPLEMENTED;
NDShape strides = GetNamedAttributeAsShape(node, "strides", false, NDShape(std::vector<size_t>(numSpatialDim, 1u)));
NDShape dilation = GetNamedAttributeAsShape(node, "dilations", false, NDShape(std::vector<size_t>(numSpatialDim, 1u)));
@ -3143,34 +3166,112 @@ FunctionPtr ONNXToCNTKHelper::CreateCNTKConvTransposeNode(const Node *node, cons
std::vector<bool> sharing({true});
size_t reductionRank = 1;
size_t maxTempMemSizeInSamples = 0;
std::vector<bool> cntkConvAutoPadding;
NDShape inputShape = inputOperand.Shape();
NDShape kernelShape = convolutionMap.Shape();
NDShape outputShape;
std::vector<int64_t> pads;
std::pair<std::vector<size_t>, std::vector<size_t>> padsPair;
bool useAutoPadForCntkConvApi(true);
if (HasNamedAttribute(node, "output_shape"))
bool USE_OUTPUT_SHAPE, USE_PADS, USE_AUTO_PAD;
std::tie(USE_OUTPUT_SHAPE, USE_PADS, USE_AUTO_PAD) = ConfigureConvTransposeNodePaddingOption(node);
pads = GetNamedAttributeAsInt64Vec(node, "pads", std::vector<int64_t>(2 * numSpatialDim, 0));
// One of the three attributes output_shape, pads, or auto_pad should be specified.
// If not, then we use default value (all zeros) for pads attribute below.
if (!(USE_OUTPUT_SHAPE || USE_PADS || USE_AUTO_PAD))
{
std::vector<bool> cntkConvAutoPadding;
NDShape outputShape = GetNamedAttributeAsShape(node, "output_shape", true);
NDShape inputShape = inputOperand.Shape();
NDShape kernelShape = convolutionMap.Shape();
fprintf(stderr, "Warning: ConvTranpose - None of the three attributes, output_shape, pads, or auto_pad are specified. Assuming the default value (all zeros) for 'pads' attribute.");
USE_PADS = true;
pads = std::vector<int64_t>(2 * numSpatialDim, 0);
}
for (int axis = 0; axis < outputShape.Rank(); axis++)
// If both "output_shape" and "pads" are specified, we give preference to "output_shape",
// and create CNTK node using "output_shape". This may need to be changed if ONNX
// specified explicitly that "pads" has preference.
if (USE_OUTPUT_SHAPE)
{
outputShape = GetNamedAttributeAsShape(node, "output_shape", true);
if (outputShape.Rank() != numSpatialDim + 1)
LogicError("ConvTranspose node's output shape attribute is of unexpected length.");
for (int axis = 0; axis < numSpatialDim; axis++)
{
if (axis != outputShape.Rank() - 1)
{
int pads = (inputShape[axis] - 1) * strides[axis] + kernelShape[axis] - outputShape[axis];
cntkConvAutoPadding.push_back(pads > 0);
}
else
{
// We assume this is the channel dimension and since ONNX does not support
// padding (also strides, dilation) for channel dimension, we set this to
// false when creating CNTK node.
cntkConvAutoPadding.push_back(false);
}
int pads = (inputShape[axis] - 1) * strides[axis] + kernelShape[axis] - outputShape[axis];
cntkConvAutoPadding.push_back(pads > 0);
}
useAutoPadForCntkConvApi = true;
}
else if (USE_PADS)
{
padsPair = SplitAndReverseVec(pads);
auto outputPadding = (HasNamedAttribute(node, "output_padding")) ? GetNamedAttributeAsInt64Vec(node, "output_padding") : std::vector<int64_t>(numSpatialDim, 0);
std::vector<size_t> outputShapeVect(numSpatialDim + 1, 0);
for (int axis = 0; axis < numSpatialDim; axis++)
{
outputShapeVect[axis] = (inputShape[axis] - 1) * strides[axis] + kernelShape[axis] +
static_cast<size_t>(outputPadding[axis] - padsPair.first[axis] - padsPair.second[axis]);
}
outputShapeVect[numSpatialDim] = kernelShape[kernelShape.Rank() - 2]; // Because kernel in C++ is in [HxWxOxI] format
outputShape = outputShape.AppendShape(NDShape(outputShapeVect));
useAutoPadForCntkConvApi = false;
}
else if (USE_AUTO_PAD)
{
ConvAutoPadType auto_pad = ConvertStrToConvAutoPadType(GetNamedAttributeAsString(node, "auto_pad", "SAME_UPPER"));
switch (auto_pad)
{
case ConvAutoPadType::SAME_UPPER:
cntkConvAutoPadding.insert(cntkConvAutoPadding.begin(), strides.Rank(), true);
break;
case ConvAutoPadType::VALID:
cntkConvAutoPadding.insert(cntkConvAutoPadding.begin(), strides.Rank(), false);
break;
case ConvAutoPadType::SAME_LOWER:
default:
NOT_IMPLEMENTED;
}
outputShape = NDShape({ 0 });
useAutoPadForCntkConvApi = true;
}
auto operandPlaceholder = PlaceholderVariable(inputOperand.Shape(), L"operand", {});
auto convmapPlaceholder = PlaceholderVariable(convolutionMap.Shape(), L"convolutionMap", {});
FunctionPtr operandWithBatchAxis = ToBatch(operandPlaceholder);
FunctionPtr convResultWithBatchAxis = ConvolutionTranspose(
// At this point length of vectors strides, dilation, padsPair, and cntkConvAutoPadding must be equal to
// number of spatial dimensions (2 for 2D conv, 3 for 3D conv). In order to match the expected input for
// CNTK Convolution API we will append one more element in each for the "channel" axis.
strides = strides.AppendShape({ 1 });
dilation = dilation.AppendShape({ 1 });
if (useAutoPadForCntkConvApi)
cntkConvAutoPadding.push_back(false);
else
{
padsPair.first.push_back(0);
padsPair.second.push_back(0);
}
FunctionPtr cntkConvFunction = CreateCNTKConvTransposeNode(inputOperand, convolutionMap, useAutoPadForCntkConvApi,
strides, sharing, cntkConvAutoPadding, padsPair.first, padsPair.second, outputShape,
dilation, reductionRank, maxTempMemSizeInSamples, node->Name());
// If Bias is specified in the ONNX node.
if (inputs.size() == 3)
{
NDShape shape({ 1, 1, inputs[2].Shape()[0] });
return Plus(cntkConvFunction, Reshape(inputs[2], shape));
}
else
return cntkConvFunction;
}
FunctionPtr ONNXToCNTKHelper::CreateCNTKConvTransposeNode(const Variable& inputOperand, const Variable& convolutionMap, bool useAutoPadForCntkConvApi,
const NDShape& strides, const std::vector<bool>& sharing, const std::vector<bool>& cntkConvAutoPadding, const std::vector<size_t>& lowerPad,
const std::vector<size_t>& upperPad, const NDShape& outputShape, const NDShape& dilation, size_t reductionRank, size_t maxTempMemSizeInSamples, const std::string& name)
{
auto operandPlaceholder = PlaceholderVariable(inputOperand.Shape(), L"operand", {});
auto convmapPlaceholder = PlaceholderVariable(convolutionMap.Shape(), L"convolutionMap", {});
FunctionPtr operandWithBatchAxis = ToBatch(operandPlaceholder);
FunctionPtr convResultWithBatchAxis;
if (useAutoPadForCntkConvApi)
{
convResultWithBatchAxis = ConvolutionTranspose(
convmapPlaceholder,
operandWithBatchAxis,
strides,
@ -3180,20 +3281,23 @@ FunctionPtr ONNXToCNTKHelper::CreateCNTKConvTransposeNode(const Node *node, cons
dilation,
reductionRank,
maxTempMemSizeInSamples);
FunctionPtr convResultWithStaticAxis = UnpackBatch(convResultWithBatchAxis, ToFixedWStringFromMultiByte(node->Name()));
return AsBlock(std::move(convResultWithStaticAxis), { { operandPlaceholder, inputOperand },{ convmapPlaceholder, convolutionMap } },
L"ConvolutionTranspose", ToFixedWStringFromMultiByte(node->Name()));
}
else if (HasNamedAttribute(node, "pads"))
else
{
NOT_IMPLEMENTED;
convResultWithBatchAxis = ConvolutionTranspose(
convmapPlaceholder,
operandWithBatchAxis,
strides,
sharing,
lowerPad,
upperPad,
outputShape,
dilation,
maxTempMemSizeInSamples);
}
else if (HasNamedAttribute(node, "auto_pad"))
{
NOT_IMPLEMENTED;
}
return nullptr;
FunctionPtr convResultWithStaticAxis = UnpackBatch(convResultWithBatchAxis, ToFixedWStringFromMultiByte(name));
return AsBlock(std::move(convResultWithStaticAxis), { { operandPlaceholder, inputOperand },{ convmapPlaceholder, convolutionMap } },
L"ConvolutionTranspose", ToFixedWStringFromMultiByte(name));
}
FunctionPtr ONNXToCNTKHelper::CreateCNTKConvNode(const Node *node, const std::vector<Variable> &inputs)

Просмотреть файл

@ -469,17 +469,23 @@ def test_ConvTranspose(tmpdir, dtype, device_id):
device = cntk_device(device_id)
with C.default_options(dtype=dtype):
# Keep the shapes below as they are, because this tests an earlier bug.
input_shape = (48, 16, 16)
input_shape = (24, 8, 8)
img = np.reshape(np.arange(np.prod(input_shape), dtype = dtype), input_shape)
x = C.input_variable(input_shape)
kernel_shape = (48, 32, 3, 3) # For convolution_transpose the shape is (I x O x W x H)
kernel_shape = (24, 16, 3, 3) # For convolution_transpose the shape is (I x O x W x H)
kernel = C.constant(value = np.ones(shape=(kernel_shape), dtype = dtype))
conv_trans_model = C.convolution_transpose(kernel, x, strides=(2, 2), output_shape=(32, 32, 32), auto_padding = [False, True, True])
conv_trans_model_with_output_shape = C.convolution_transpose(kernel, x, strides=(2, 2), auto_padding = [False, True, True], output_shape=(16, 16, 16))
verify_one_input(conv_trans_model_with_output_shape, img, tmpdir, 'ConvTranspose_0', device)
verify_one_input(conv_trans_model, img, tmpdir, 'ConvTranspose_0', device)
conv_trans_model_without_output_shape = C.convolution_transpose(kernel, x, strides=(2, 1), dilation=(1, 1), auto_padding = [False, True, True])
verify_one_input(conv_trans_model_without_output_shape, img, tmpdir, 'ConvTranspose_1', device)
if device_id >= 0: # Dilated convolution is not supported on CPU, hence the following test is run only on GPU.
conv_trans_model_with_dilation = C.convolution_transpose(kernel, x, strides=(2, 1), dilation=(2, 1), auto_padding = [False, True, True])
verify_one_input(conv_trans_model_with_dilation, img, tmpdir, 'ConvTranspose_2', device)
# DepthToSpace
@pytest.mark.parametrize("dtype", DType_Config)