CNTK splice allows broadcast. This case is handled in the change.

This commit is contained in:
liqfu 2018-08-26 08:41:20 -07:00
Родитель 4a6238d979
Коммит 0e208365be
5 изменённых файлов: 444 добавлений и 207 удалений

Просмотреть файл

@ -944,6 +944,7 @@ namespace CNTK
for (size_t i = 0; i < replacementShape.Rank(); ++i)
{
if (replacementShape[i] == NDShape::InferredDimension)
// TODO: shall NDShape::FreeDimension be considered here instead?
replacementShape[i] = 0;
}

Просмотреть файл

@ -63,6 +63,11 @@ private:
std::unordered_map<Variable, LotusIR::Node*>& variableNodes,
const std::unordered_map<Variable, Variable>& compositeOutputsMap);
// Create an ONNX NodeArg of desired shape with constant 0s as initial values.
// The NodeArg is used to expand inputs of a CNTK splice op to a desired shape via broadcast.
static LotusIR::NodeArg &AddZerosConstantNodeArg(Graph *graph, const string &nodeArgName,
const std::vector<int64_t> &shape, CNTK::DataType dataType);
static LotusIR::Node *AddReshapeNodeAccordingToONNXVersion(Graph *graph, const string &nodeName, NodeArg *input, NodeArg *output, const std::vector<int64_t>& newShape);
@ -90,9 +95,13 @@ private:
LotusIR::Graph* graph);
static LotusIR::Node *AddMatMulNode(LotusIR::NodeArg &nodeArg1, LotusIR::NodeArg &nodeArg2, LotusIR::Graph* graph,
const std::string &out_arg_name);
static LotusIR::Node *AddAddNode(LotusIR::NodeArg &nodeArg1, LotusIR::NodeArg &nodeArg2, LotusIR::Graph* graph,
const std::string &out_arg_name);
static LotusIR::Node *AddArgMaxNode(LotusIR::NodeArg &nodeArg, LotusIR::Graph* graph, int axis);
static LotusIR::Node *AddCastNode(LotusIR::NodeArg &nodeArg, LotusIR::Graph* graph, const std::string &toType);
static void BroadcastInputsIfNeeded(std::vector<LotusIR::NodeArg *> &orderedInputs, const FunctionPtr& src, LotusIR::Graph* graph);
//
// Insert a reshape node in front of a given node and its output node arg
//
@ -151,6 +160,8 @@ private:
std::set<FunctionPtr>& visited,
std::unordered_map<Variable, Variable>& compositeOutputsMap);
static void SetTensorType(onnx::TensorProto& dst, CNTK::DataType dataType);
//
// Copy the content of NDArrayView to TensorProto, and do the needed
// convergence.
@ -236,6 +247,12 @@ private:
//
static bool FilterInput(const FunctionPtr& src, const CNTK::Variable& input, size_t inputIndex);
//
// Converts axis (in CNTK C++ API sense) to index in ONNX sense assuming op may do broadcast
// across multiple inputs. In such case, it shall take the highest axis.
//
static int64_t ConvertAxisToOnnxBroadcastOfOp(const Axis &axis, const FunctionPtr &src);
//
// Converts axis (in CNTK C++ API sense) to index in ONNX sense
//
@ -256,6 +273,11 @@ private:
//
static LotusIR::Node* AddNode(const FunctionPtr& src, LotusIR::Graph* graph, const std::vector<LotusIR::NodeArg*>& inputs, const std::vector<LotusIR::NodeArg* >& outputs);
//
// set node attribute for ReduceElements ops
//
static void SetReduceElementsAttributes(const FunctionPtr src, Node *node);
//
// Get ONNX 'pads' attribute value based on CNTK node's autoPadding attribute value.
//
@ -616,7 +638,7 @@ void AppendCNTKWeightToONNXTensor(DType *data, const NDShape &shape, onnx::Tenso
}
}
void SetTensorType(onnx::TensorProto& dst, CNTK::DataType dataType)
void CNTKToONNXHelper::SetTensorType(onnx::TensorProto& dst, CNTK::DataType dataType)
{
switch (dataType)
{
@ -1196,18 +1218,35 @@ bool IsUnSupportedLayerNormalization(const FunctionPtr src)
return cntkOpName == "LayerNormalization" && src->Output().HasSequenceAxis();
}
bool MatchOpSequence(const FunctionPtr src, std::vector<wstring> opSequence, FunctionPtr &op)
{
FunctionPtr currentOp = src;
for (auto opName : opSequence)
{
if (currentOp == nullptr || currentOp->OpName() != opName)
{
return false;
}
currentOp = currentOp->Inputs().size() == 1 ? currentOp->Inputs()[0].Owner() : nullptr;
}
op = currentOp;
return true;
}
// when importing ONNX models, we insert a sequence of ops to pack/uppack batch/sequence axis
// thoes ops shall be removed to create an equivalent ONNX model.
FunctionPtr SkipBatchAndSequenceAxisOp(const FunctionPtr src)
{
if ((src->OpName() == L"ToSequenceOp" && src->Inputs()[0].Owner() &&
src->Inputs()[0].Owner()->OpName() == L"ToBatchAxis") ||
(src->OpName() == L"UnpackBatchAxis" && src->Inputs()[0].Owner() &&
src->Inputs()[0].Owner()->OpName() == L"UnpackSequenceOp"))
return src->Inputs()[0].Owner()->Inputs()[0].Owner();
else if (src->OpName() == L"UnpackBatchAxis" && src->Inputs()[0].Owner() &&
src->Inputs()[0].Owner()->OpName() == L"Sequence::Slice")
return src->Inputs()[0].Owner();
else
return src;
std::vector<wstring> toSequenceBatchOps({ L"ToSequenceOp", L"ToBatchAxis", L"TransposeAxes" });
std::vector<wstring> unpackSequenceBatchOps({ L"TransposeAxes", L"UnpackBatchAxis", L"UnpackSequenceOp" });
// std::vector<wstring> unpackBatchSequenceSliceOps({ L"UnpackBatchAxis", L"Sequence::Slice" });
FunctionPtr op = src;
while (MatchOpSequence(op, toSequenceBatchOps, op) ||
MatchOpSequence(op, unpackSequenceBatchOps, op))
// || MatchOpSequence(op, unpackBatchSequenceSliceOps, op))
;
return op;
}
bool IsBatchAxisOp(const FunctionPtr src)
@ -1236,7 +1275,7 @@ bool IsBatchAxisOp(const FunctionPtr src)
bool OpNeedONNXTypeMap(const std::string &cntkType)
{
const vector<string> ops({"And", "Equal", "Greater", "Less", "Not", "Or", "Xor", "Gather", "ArgMax", "ArgMin", "TopK"});
const vector<string> ops({"And", "Equal", "Greater", "Less", "Not", "Or", "Xor", "Gather", "ArgMax", "ArgMin", "TopK" });
for (auto o : ops)
{
if (cntkType == o)
@ -1365,6 +1404,16 @@ bool CNTKToONNXHelper::FilterInput(const FunctionPtr& src, const CNTK::Variable&
return false;
}
int64_t CNTKToONNXHelper::ConvertAxisToOnnxBroadcastOfOp(const Axis &axis, const FunctionPtr &src)
{
int64_t onnx_axis = 0;
for (int i = 0; i < src->Inputs().size(); i++)
{
onnx_axis = std::max(onnx_axis, ConvertAxisToOnnx(axis, src->Inputs()[i]));
}
return onnx_axis;
}
/*
CNTK python static axis is zero based. Batch and Sequence axis is not static axis.
CNTK cpp get static axis in a sanitized form (e.g. -axis - 1 by sanitize_axis)
@ -2321,6 +2370,44 @@ LotusIR::Node *CNTKToONNXHelper::CreateRNNNode(const FunctionPtr &src,
return squeezedRNNNode;
}
// Create an ONNX NodeArg of desired shape with constant 0s as initial values.
LotusIR::NodeArg &CNTKToONNXHelper::AddZerosConstantNodeArg(Graph *graph, const string &nodeArgName,
const std::vector<int64_t> &shape, CNTK::DataType dataType)
{
onnx::TypeProto shapeInputArgType = ToTypeProto(shape, false);
shapeInputArgType.mutable_tensor_type()->set_elem_type(ConvertDataTypeCNTKToTensorProto(dataType));
LotusIR::NodeArg &shapeInputArg = graph->GetOrCreateNodeArg(nodeArgName, &shapeInputArgType);
onnx::TensorProto dstTensor;
dstTensor.set_name(shapeInputArg.Name());
dstTensor.set_data_type(ConvertDataTypeCNTKToTensorProto(dataType));
if (std::any_of(shape.begin(), shape.end(), [](int64_t dim) {return dim <= 0; }))
LogicError("Invalid splice inputs shape");
int64_t totalSize = std::accumulate(shape.begin(), shape.end(), (int64_t)1, std::multiplies<int64_t>());
switch (dataType)
{
case CNTK::DataType::Float16:
dstTensor.mutable_int32_data()->Resize((int)totalSize, 0);
break;
case CNTK::DataType::Float:
dstTensor.mutable_float_data()->Resize((int)totalSize, (float)0);
break;
case CNTK::DataType::Double:
dstTensor.mutable_double_data()->Resize((int)totalSize, 0);
break;
default:
NOT_IMPLEMENTED;
}
for (int index = 0; index < shape.size(); index++)
*(dstTensor.mutable_dims()->Add()) = shape[index];
graph->AddInitializedTensor(dstTensor);
return shapeInputArg;
}
LotusIR::Node *CNTKToONNXHelper::AddReshapeNodeAccordingToONNXVersion(Graph *graph, const string &nodeName, NodeArg *input, NodeArg *output, const std::vector<int64_t> &newShape)
{
if (IsONNX1_2Supported())
@ -2384,6 +2471,16 @@ LotusIR::Node *CNTKToONNXHelper::AddMatMulNode(LotusIR::NodeArg &nodeArg1, Lotus
return argMatMulNode;
}
LotusIR::Node *CNTKToONNXHelper::AddAddNode(LotusIR::NodeArg &nodeArg1, LotusIR::NodeArg &nodeArg2, LotusIR::Graph* graph,
const std::string &out_arg_name)
{
LotusIR::NodeArg &outputArg = graph->GetOrCreateNodeArg(out_arg_name, nullptr);
LotusIR::Node* argMatMulNode = graph->AddNode(
nodeArg1.Name() + string("_add"), "Add", "", { &nodeArg1, &nodeArg2 }, { &outputArg });
return argMatMulNode;
}
LotusIR::Node *CNTKToONNXHelper::AddArgMaxNode(LotusIR::NodeArg &nodeArg, LotusIR::Graph* graph, int axis)
{
// LotusIR::NodeArg inputArg(nodeArg.Name(), nullptr);
@ -2583,13 +2680,16 @@ LotusIR::Node* CNTKToONNXHelper::CreateNode(const FunctionPtr& initialSrc,
std::unordered_map<Variable, LotusIR::Node*>& variableNodes,
const std::unordered_map<Variable, Variable>& compositeOutputsMap)
{
auto iter = functionNodes.find(initialSrc);
// try to skip batch and sequence pack unpack
FunctionPtr src = SkipBatchAndSequenceAxisOp(initialSrc);
if (!src)
// TODO: it could be a input NodeArg.
return nullptr;
auto iter = functionNodes.find(src);
if (iter != functionNodes.end())
return iter->second;
// try to skip batch and sequence pack unpack
FunctionPtr src = SkipBatchAndSequenceAxisOp(initialSrc);
LotusIR::Node* functionNode = nullptr;
std::string cntkOpName = ToLegacyString(ToUTF8(src->OpName()));
std::string onnxOpName = ToOPName(src);
@ -2652,7 +2752,23 @@ LotusIR::Node* CNTKToONNXHelper::CreateNode(const FunctionPtr& initialSrc,
if (IsBatchAxisOp(src))
return CreateNodeForBatchAxisOp(src, graph, functionNodes, variableNodes, compositeOutputsMap);
else
LogicError("Node '%S': Unsupported outside the context of batch axis ops.", src->AsString().c_str());
{
// this is a normal use of UnpackBatchAxis. ONNX does not treat batch axis specially so
// we shall skip the op.
auto blockMapping = src->Inputs()[0].BlockFunctionVariableMapping();
if (blockMapping.IsInitialized())
return CreateNode(blockMapping.Owner(),
graph,
functionNodes,
variableNodes,
compositeOutputsMap);
else if (src->Inputs()[0].Owner())
return CreateNode(src->Inputs()[0].Owner(),
graph,
functionNodes,
variableNodes,
compositeOutputsMap);
}
}
//
@ -2696,6 +2812,30 @@ LotusIR::Node* CNTKToONNXHelper::CreateNode(const FunctionPtr& initialSrc,
return functionNode;
}
Variable SkipBatchPackUnpack(Variable input)
{
if (input.Owner() &&
(input.Owner()->OpName() == L"UnpackBatchAxis" || input.Owner()->OpName() == L"ToBatchAxis"))
{
return input.Owner()->Inputs()[0];
}
else
return input;
}
bool TryMatchNodeArgType(onnx::TypeProto &argType, LotusIR::Graph* graph, const std::string &nodeArgName)
{
const NodeArg* inputNodeArg = graph->FindNodeArg(nodeArgName);
if (inputNodeArg)
{
onnx::TensorProto_DataType inputType = inputNodeArg->TypeAsProto()->tensor_type().elem_type();
argType.mutable_tensor_type()->set_elem_type(inputType);
return true;
}
return false;
}
void CNTKToONNXHelper::ProcessInputs(const FunctionPtr& src,
LotusIR::Graph* graph,
std::unordered_map<FunctionPtr, LotusIR::Node*>& functionNodes,
@ -2717,6 +2857,9 @@ void CNTKToONNXHelper::ProcessInputs(const FunctionPtr& src,
LogicError("Node '%S': Placeholder isn't supported currently.", src->AsString().c_str());
}
// UnpackBatchAxis and ToBatchAxis is a noop in ONNX
input = SkipBatchPackUnpack(input);
// Special case handling of LayerNormalization layer because it changes
// ops dynamically based on value of inputs. If more such cases ops are seen,
// this should be abstracted out from here.
@ -2743,6 +2886,13 @@ void CNTKToONNXHelper::ProcessInputs(const FunctionPtr& src,
bool isConstant = (input.IsParameter() || input.IsConstant()) &&
!Operators::IgnoreConstantAndParameter(src->OpName(), inputIndex);
//
// If this input is output, then it is the ouput of an up stream node. Recursively add all upstream nodes.
// Pretty much, we are doing DFS.
//
if (input.IsOutput())
CreateNode(input.Owner(), graph, functionNodes, variableNodes, compositeOutputsMap);
onnx::TypeProto inputArgType;
if (cntkOpName == "Splice")
@ -2770,7 +2920,16 @@ void CNTKToONNXHelper::ProcessInputs(const FunctionPtr& src,
(*inputArgType.mutable_tensor_type()->mutable_shape()->mutable_dim())[0].set_dim_param(FreeSequenceDimParam);
}
if (OpNeedONNXTypeMap(cntkOpName))
// TODO: if it is an identity op, we shall peek its input node to find the correct tensor element type.
if (onnxOpName == "Identity")
{
// shall match the type of the same name NodeArg from upstream.
string inputNodeArgName = ToLegacyString(ToUTF8(input.Uid()));
if (!TryMatchNodeArgType(inputArgType, graph, inputNodeArgName))
UpdateONNXType(src->Inputs()[0].GetDataType(), inputArgType);
}
else if (OpNeedONNXTypeMap(cntkOpName))
{
MapAndUpdateONNXType(onnxOpName, true, inputIndex, input.GetDataType(), inputArgType);
}
@ -2779,6 +2938,26 @@ void CNTKToONNXHelper::ProcessInputs(const FunctionPtr& src,
UpdateONNXType(input.GetDataType(), inputArgType);
}
//
// Leaf nodes are data entry to the graph and need their own node with only output arg.
//
if (isConstant)
{
if (variableNodes.find(input) == variableNodes.end())
{
if (input.IsParameter() || input.IsConstant())
{
auto srcTensor = input.IsParameter() ? Parameter(input).Value() : Constant(input).Value();
onnx::TensorProto dstTensor;
dstTensor.set_name(inputName);
CopyTensor(srcTensor, dstTensor, &inputArgType);
graph->AddInitializedTensor(dstTensor);
}
}
}
LotusIR::NodeArg &inputArg = graph->GetOrCreateNodeArg(inputName, &inputArgType);
inputs.push_back(&inputArg);
@ -2824,32 +3003,6 @@ void CNTKToONNXHelper::ProcessInputs(const FunctionPtr& src,
*(dstTensor.mutable_dims()->Add()) = newShapeVec.size();
graph->AddInitializedTensor(dstTensor);
}
//
// Leaf nodes are data entry to the graph and need their own node with only output arg.
//
if (isConstant)
{
if (variableNodes.find(input) == variableNodes.end())
{
if (input.IsParameter() || input.IsConstant())
{
auto srcTensor = input.IsParameter() ? Parameter(input).Value() : Constant(input).Value();
onnx::TensorProto dstTensor;
dstTensor.set_name(inputName);
CopyTensor(srcTensor, dstTensor, &inputArgType);
graph->AddInitializedTensor(dstTensor);
}
}
}
//
// If this input is output, then it is the ouput of an up stream node. Recursively add all upstream nodes.
// Pretty much, we are doing DFS.
//
else if (input.IsOutput())
CreateNode(input.Owner(), graph, functionNodes, variableNodes, compositeOutputsMap);
}
}
@ -2861,7 +3014,14 @@ void CNTKToONNXHelper::ProcessOutputs(const FunctionPtr& src,
for (const auto& output : src->Outputs())
{
auto outputArgType = ToTypeProto(output.Shape(), output.HasBatchAxis(), output.HasSequenceAxis());
if (OpNeedONNXTypeMap(onnxOpName))
if (onnxOpName == "Identity")
{
// shall match the type of this Identity node's input NodeArg.
string inputNodeArgName = ToLegacyString(ToUTF8(src->Inputs()[0].Uid()));
if (!TryMatchNodeArgType(outputArgType, graph, inputNodeArgName))
UpdateONNXType(src->Inputs()[0].GetDataType(), outputArgType);
}
else if (OpNeedONNXTypeMap(onnxOpName))
{
MapAndUpdateONNXType(onnxOpName, false, outputIndex, output.GetDataType(), outputArgType);
}
@ -3036,28 +3196,9 @@ void CNTKToONNXHelper::CopyAttributes(const FunctionPtr& src, LotusIR::Node* nod
node->AddAttribute(attributesMap[L"newShape"], ToINTS(shape));
}
}
else if ((src->OpName() == L"ReduceL1") || (src->OpName() == L"ReduceL2") || (src->OpName() == L"ReduceSumSquare"))
if (src->OpName() == L"ReduceL1" || src->OpName() == L"ReduceL2" || src->OpName() == L"ReduceSumSquare")
{
auto keepReducedDimensions = (int64_t)((bool) src->Attributes()[L"reductionKeepDimensions"].Value<bool>() ? 1 : 0);
std::vector<Axis> reductionAxes;
if (src->Attributes().Contains(L"axisVec"))
reductionAxes = AsVector<Axis>(src->Attributes()[L"axisVec"].Value<std::vector<DictionaryValue>>());
else if (src->Attributes().Contains(L"axis"))
reductionAxes.push_back((Axis)(src->Attributes()[L"axis"].Value<Axis>()));
// Reduction on batch axis in CNTK removes the batch axis, even if keepdims is true.
// For ONNX export we need to make sure we export keepdims as 0 (false).
// The same applies for AllStaticAxes.
if (reductionAxes.size() == 1
&& (reductionAxes[0] == Axis::DefaultBatchAxis()
|| reductionAxes[0] == Axis::AllStaticAxes()
|| reductionAxes[0] == Axis::AllAxes()))
keepReducedDimensions = 0;
node->AddAttribute(attributesMap[L"keepdims"], keepReducedDimensions);
std::vector<int64_t> axes = ConvertAxesToOnnx(reductionAxes, src->Inputs()[0]);
node->AddAttribute("axes", axes);
SetReduceElementsAttributes(src, node);
}
else if (src->OpName() == L"TransposeAxes")
{
@ -3117,7 +3258,7 @@ void CNTKToONNXHelper::CopyAttributes(const FunctionPtr& src, LotusIR::Node* nod
else if (src->OpName() == L"Splice")
{
Axis axis = (Axis)(src->Attributes()[L"axis"].Value<Axis>());
int64_t axisIndex = ConvertAxisToOnnx(axis, src->Inputs()[0]);
int64_t axisIndex = ConvertAxisToOnnxBroadcastOfOp(axis, src);
node->AddAttribute(attributesMap[L"axis"], axisIndex);
}
else if (src->OpName() == L"Slice")
@ -3432,58 +3573,71 @@ void CNTKToONNXHelper::CopyAttributes(const FunctionPtr& src, LotusIR::Node* nod
}
else if (src->OpName() == L"ReduceElements")
{
wstring cntkAttributeOpName = (wstring)src->Attributes()[PrimitiveFunctionAttribute::AttributeNameReductionOpName].Value<wstring>();
const AttributesMapping& attributeMap = Operators::FindAttributeMap(src->OpName(), cntkAttributeOpName);
auto keepReducedDimensions = (int64_t)((bool)src->Attributes()[L"reductionKeepDimensions"].Value<bool>() ? 1 : 0);
// hack to make reduction with sequence axis pass bi-directional broadcast
if (node->OpType() == "ReduceMean" && src->Inputs()[0].HasSequenceAxis())
{
keepReducedDimensions = 1;
}
if (src->Attributes().Contains(L"axisVec"))
{
std::vector<Axis> reductionAxes;
reductionAxes = AsVector<Axis>(src->Attributes()[L"axisVec"].Value<std::vector<DictionaryValue>>());
// Reduction on batch axis in CNTK removes the batch axis, even if keepdims is true.
// For ONNX export we need to make sure we export keepdims as 0 (false).
// The same applies for AllStaticAxes.
if (reductionAxes.size() == 1
&& (reductionAxes[0] == Axis::DefaultBatchAxis()
|| reductionAxes[0] == Axis::AllStaticAxes()
|| reductionAxes[0] == Axis::AllAxes()))
keepReducedDimensions = 0;
std::vector<int64_t> axes = ConvertAxesToOnnx(reductionAxes, src->Inputs()[0]);
node->AddAttribute("axes", axes);
}
else if (src->Attributes().Contains(L"axis"))
{
// py axis -> cpp (-axis -1) -> normalize (rank + axis)
Axis axis = (Axis)(src->Attributes()[L"axis"].Value<Axis>());
// Reduction on batch axis in CNTK removes the batch axis, even if keepdims is true.
// For ONNX export we need to make sure we export keepdims as 0 (false).
// The same applies for All axes
if (axis == Axis::DefaultBatchAxis() || axis == Axis::AllAxes() || axis == Axis::AllStaticAxes())
keepReducedDimensions = 0;
if (node->OpType() != "ArgMax" && node->OpType() != "ArgMin")
{
std::vector<int64_t> axes = ConvertAxesToOnnx(std::vector<Axis>({ axis }), src->Inputs()[0]);
node->AddAttribute("axes", axes);
}
else
{
int64_t ax = ConvertAxisToOnnx(axis, src->Inputs()[0]);
node->AddAttribute("axis", ax);
}
}
node->AddAttribute("keepdims", keepReducedDimensions);
SetReduceElementsAttributes(src, node);
}
}
}
void CNTKToONNXHelper::SetReduceElementsAttributes(const FunctionPtr src, Node *node)
{
std::wstring reductionOpName = src->OpName();
if (reductionOpName == L"ReduceElements")
{
reductionOpName = src->Attributes()[L"reductionOpName"].Value<wstring>();
}
auto keepReducedDimensions = (int64_t)((bool)src->Attributes()[L"reductionKeepDimensions"].Value<bool>() ? 1 : 0);
bool forceKeepReducedDimensions = false;
if (src->Inputs()[0].HasSequenceAxis())
{
// TODO: IMPORTANT. this is a workaround related to how batch/sequence axes are unpacked and broadcased.
// in general, batch/sequence axes are moved to static axis position during unpacking and broadcasting.
// as a result, a tensor may end up with duplicated batch/sequence axes.
// this is most often when there is a sequence axis. Set keepdims to 1 avoid
// some of the cases but it is not the solution.
// roughly here is a test code that would fail without this workaround:
// shape = (2, )
// batch_size = 1
// seq_len = 1
// data = generate_sequential_data((batch_size, seq_len, *shape))
// x1 = C.sequence.input_variable(shape)
// x1_reduced = C.reduce_mean(x1, 0, keepdims = False)
// model = x1 + x1_reduced
// model = C.reduce_mean(model, 0, keepdims = False)
// model.save(tmpdir + "/broadcast_sequence.onnx", format = C.ModelFormat.ONNX)
// loaded_model = C.Function.load(tmpdir + "/broadcast_sequence.onnx", format = C.ModelFormat.ONNX)
// o1 = loaded_model.eval({ loaded_model.arguments[0]: data })
keepReducedDimensions = 1;
forceKeepReducedDimensions = true;
}
std::vector<Axis> reductionAxes;
if (src->Attributes().Contains(L"axisVec"))
reductionAxes = AsVector<Axis>(src->Attributes()[L"axisVec"].Value<std::vector<DictionaryValue>>());
else if (src->Attributes().Contains(L"axis"))
reductionAxes.push_back((Axis)(src->Attributes()[L"axis"].Value<Axis>()));
// Reduction on batch axis in CNTK removes the batch axis, even if keepdims is true.
// For ONNX export we need to make sure we export keepdims as 0 (false).
// The same applies for AllStaticAxes.
if (!forceKeepReducedDimensions &&
(reductionAxes.size() == 1
&& (reductionAxes[0] == Axis::DefaultBatchAxis()
|| reductionAxes[0] == Axis::AllStaticAxes()
|| reductionAxes[0] == Axis::AllAxes())))
keepReducedDimensions = 0;
std::vector<int64_t> axes = ConvertAxesToOnnx(reductionAxes, src->Inputs()[0]);
if (reductionOpName == L"Argmax" || reductionOpName == L"Argmin")
node->AddAttribute("axis", axes[0]);
else
if (reductionAxes[0] != Axis::AllAxes())
node->AddAttribute("axes", axes);
node->AddAttribute("keepdims", keepReducedDimensions);
}
void CNTKToONNXHelper::PutAutopadOrPadAttrInNode(LotusIR::Node* node,
const std::vector<bool>& autoPadding, const NDShape& kernelShape, bool ceilOutDim)
{
@ -3550,6 +3704,86 @@ LotusIR::Node* FindByName(LotusIR::Graph* graph, const std::string &name)
return nullptr;
}
std::vector<int64_t> GetShapeFromNodeArg(LotusIR::NodeArg *nodeArg)
{
std::vector<int64_t> shape;
const TypeProto *typeProto = nodeArg->TypeAsProto();
for (int dim = 0; dim < typeProto->tensor_type().shape().dim_size(); dim++)
{
shape.push_back(typeProto->tensor_type().shape().dim()[dim].dim_value());
}
return shape;
}
// CNTK splice allows broadcast of inputs before applying concatination.
// ONNX Concat is limited to matching input shape cases
// i.e. inputs' dimensions shall be the equal except for the concatination axis.
// for an example, see test_Concat_With_Broadcast in onnx_op_test.py.
void CNTKToONNXHelper::BroadcastInputsIfNeeded(std::vector<LotusIR::NodeArg *> &orderedInputs, const FunctionPtr& src, LotusIR::Graph* graph)
{
if (src->OpName() != L"Splice")
return;
Axis axis = (Axis)(src->Attributes()[L"axis"].Value<Axis>());
int64_t concatAxis = ConvertAxisToOnnxBroadcastOfOp(axis, src);
std::vector<std::vector<int64_t>> shapes;
int max_rank = 0;
for (auto nodeArg : orderedInputs)
{
shapes.push_back(GetShapeFromNodeArg(nodeArg));
max_rank = std::max(max_rank, shapes.rbegin()->size());
}
std::vector<int64_t> broadcast_shape(max_rank, 1);
for (int i = 0; i < shapes.size(); i++)
{
std::vector<int64_t> &shape_i = shapes[i];
for (int index_to_shape_i = 0; index_to_shape_i < shape_i.size(); index_to_shape_i++)
{
int onnx_axis = index_to_shape_i + (max_rank - shape_i.size());
if (onnx_axis == concatAxis)
// only check and update no-concat_axis dimensions
continue;
else if (broadcast_shape[onnx_axis] == 1)
broadcast_shape[onnx_axis] = shape_i[index_to_shape_i];
else if (broadcast_shape[onnx_axis] != shape_i[index_to_shape_i] && shape_i[index_to_shape_i] != 1)
LogicError("Invalid splice inputs shape");
}
}
// TODO: use ONNX Expand once ONNX version 7 is supported
// Without Expand op, we create a zeros constant of expected shape and apply broadcast add
// to get input to the right shape for concatination.
for (int i = 0; i < orderedInputs.size(); i++)
{
std::vector<int64_t> &shape_i = shapes[i];
bool need_broadcast = shape_i.size() < max_rank;
while (shape_i.size() < max_rank)
shape_i.insert(shape_i.begin(), 1);
for (int onnx_axis = 0; onnx_axis < shape_i.size(); onnx_axis++)
{
if (onnx_axis != concatAxis && shape_i[onnx_axis] != broadcast_shape[onnx_axis])
{
shape_i[onnx_axis] = broadcast_shape[onnx_axis];
need_broadcast = true;
}
}
if (!need_broadcast)
continue;
LotusIR::NodeArg *nodeArg = orderedInputs[i];
// We insert an "Add" with broadcast to get desired shape that can be accepted by ONNX Concat.
LotusIR::NodeArg &nodeArg2 = AddZerosConstantNodeArg(graph, nodeArg->Name() + "_braodcast_for_desired_shape",
shape_i, src->Inputs()[i].GetDataType());
const std::string out_arg_name = nodeArg->Name() + "_post_braodcasted_with_desired_shape";
LotusIR::Node *node = AddAddNode(*nodeArg, nodeArg2, graph, out_arg_name);
orderedInputs[i] = const_cast<NodeArg*>(node->OutputDefs()[0]);
}
}
LotusIR::Node* CNTKToONNXHelper::AddNode(const FunctionPtr& src, LotusIR::Graph* graph, const std::vector<LotusIR::NodeArg *>& inputs, const std::vector<LotusIR::NodeArg *>& outputs)
{
LotusIR::Node* node = nullptr;
@ -3672,6 +3906,11 @@ LotusIR::Node* CNTKToONNXHelper::AddNode(const FunctionPtr& src, LotusIR::Graph*
node = graph->AddNode(nodeName + string("_add"), "Add",
"", { &mulTensorOutputArg, input2 }, { &addTensorOutputArg });
}
else if (src->OpName() == L"Splice")
{
BroadcastInputsIfNeeded(orderedInputs, src, graph);
node = graph->AddNode(nodeName, ToOPName(src), "", orderedInputs, outputs);
}
else
node = graph->AddNode(nodeName, ToOPName(src), "", orderedInputs, outputs);
}

Просмотреть файл

@ -59,7 +59,7 @@ private:
static bool FixConstantShapeForConstantVariableInputPair(const std::vector<Variable> &inputs,
std::vector<Variable> &fixedInputs);
static const Node *GetChildNode(const Node *parentNode, const NodeArg *nodeArg);
static const Node *GetChildNode(const Node *parentNode, const NodeArg *nodeArg, int &nodeArgIndex);
static std::vector<Axis> AttributeProtoToAxes(const AttributeProto &attributeProto);
static Axis AttributeProtoToAxis(const AttributeProto &attributeProto);
@ -132,6 +132,8 @@ private:
static std::pair<std::vector<size_t>, std::vector<size_t>> AdjustONNXPadsVecForCNTKPadOp(const Variable &operand, std::vector<int64_t> &pads);
static NDShape ReverseShape(const NDShape &shape);
static std::pair<std::vector<Axis>, bool> GetReduceElementsAttributes(const Node *node, const Variable &input);
static std::pair<Variable, Variable> BroadcastElementWiseInput(const Node *node,
const Variable &input0, const Variable &input1);
@ -601,14 +603,16 @@ const CNTK::Constant CNTK::ONNXToCNTKHelper::CreateConstantWithTensorData(CNTK::
}
}
const Node *ONNXToCNTKHelper::GetChildNode(const Node *parentNode, const NodeArg *nodeArg)
const Node *ONNXToCNTKHelper::GetChildNode(const Node *parentNode, const NodeArg *nodeArg, int &nodeArgIndex)
{
Node::NodeConstIterator itChildNode = parentNode->InputNodesBegin();
for (; itChildNode != parentNode->InputNodesEnd(); ++itChildNode)
{
const Node *childNode = *itChildNode;
const ConstPointerContainer<std::vector<NodeArg *>> &childOutputDefs = childNode->OutputDefs();
for (ConstPointerContainer<std::vector<NodeArg *>>::ConstIterator itChildOutput = childOutputDefs.begin(); itChildOutput != childOutputDefs.end(); ++itChildOutput)
nodeArgIndex = 0;
for (ConstPointerContainer<std::vector<NodeArg *>>::ConstIterator itChildOutput = childOutputDefs.begin();
itChildOutput != childOutputDefs.end(); ++itChildOutput, nodeArgIndex++)
{
const NodeArg *childOutput = *itChildOutput;
if (childOutput == nodeArg)
@ -1838,6 +1842,29 @@ std::pair<Variable, Variable> ONNXToCNTKHelper::BroadcastElementWiseInput(
}
}
std::pair<std::vector<Axis>, bool> ONNXToCNTKHelper::GetReduceElementsAttributes(const Node *node, const Variable &input)
{
bool keepdims = GetNamedAttributeAsInt64(node, "keepdims", 1) == 1;
std::vector<Axis> axes = ConvertONNXAxesToCNTKCppApi(GetNamedAttributeAsInt64Vec(node, "axes", vector<int64_t>({})), input);
// use default of all axes according to ONNX
if (axes.empty())
{
if (keepdims)
axes = vector<Axis>({ Axis::AllAxes() });
else
{
// In the case of keepdims being false, CNTK does not allow reduce on Axis::AllAxes().
// We have to list out all axes instead.
if (input.DynamicAxes().size() != 0)
LogicError("ReduceElements with default on all axes is not supported with input of dynamic axis.");
axes.resize(input.Shape().Rank());
std::generate(axes.begin(), axes.end(), [static_axis = 0]() mutable { return Axis(static_axis++); });
}
}
return std::make_pair(axes, keepdims);
}
Axis ONNXToCNTKHelper::ConvertONNXAxisToCNTKCppApi(int64_t axis, const Variable &operand)
{
// reverse CNTKToONNXHelper::ConvertAxisToOnnx
@ -2487,127 +2514,91 @@ FunctionPtr ONNXToCNTKHelper::CreateFunction(const Node *node, const std::vector
}
else if (onnxOpName == "ReduceMax")
{
std::vector<Axis> axes = ConvertONNXAxesToCNTKCppApi(GetNamedAttributeAsInt64Vec(node, "axes", vector<int64_t>({})), inputs[0]);
bool keepdims;
std::vector<Axis> axes;
// use default of all axes according to ONNX
if (axes.empty())
{
axes = vector<Axis>({ Axis::AllAxes() });
}
std::tie<std::vector<Axis>, bool>(axes, keepdims) = GetReduceElementsAttributes(node, inputs[0]);
bool keepdims = GetNamedAttributeAsInt64(node, "keepdims", 1) == 1;
FunctionPtr cntkFunction = ReduceMax(inputs[0], axes, keepdims, ToFixedWStringFromMultiByte(node->Name()));
return cntkFunction;
}
else if (onnxOpName == "ReduceMin")
{
std::vector<Axis> axes = ConvertONNXAxesToCNTKCppApi(GetNamedAttributeAsInt64Vec(node, "axes", vector<int64_t>({})), inputs[0]);
bool keepdims;
std::vector<Axis> axes;
// use default of all axes according to ONNX
if (axes.empty())
{
axes = vector<Axis>({ Axis::AllAxes() });
}
std::tie<std::vector<Axis>, bool>(axes, keepdims) = GetReduceElementsAttributes(node, inputs[0]);
bool keepdims = GetNamedAttributeAsInt64(node, "keepdims", 1) == 1;
FunctionPtr cntkFunction = ReduceMin(inputs[0], axes, keepdims, ToFixedWStringFromMultiByte(node->Name()));
return cntkFunction;
}
else if (onnxOpName == "ReduceSum")
{
std::vector<Axis> axes = ConvertONNXAxesToCNTKCppApi(GetNamedAttributeAsInt64Vec(node, "axes", vector<int64_t>({})), inputs[0]);
bool keepdims;
std::vector<Axis> axes;
// use default of all axes according to ONNX
if (axes.empty())
{
axes = vector<Axis>({ Axis::AllAxes() });
}
std::tie<std::vector<Axis>, bool>(axes, keepdims) = GetReduceElementsAttributes(node, inputs[0]);
bool keepdims = GetNamedAttributeAsInt64(node, "keepdims", 1) == 1;
FunctionPtr cntkFunction = ReduceSum(inputs[0], axes, keepdims, ToFixedWStringFromMultiByte(node->Name()));
return cntkFunction;
}
else if (onnxOpName == "ReduceMean")
{
std::vector<Axis> axes = ConvertONNXAxesToCNTKCppApi(GetNamedAttributeAsInt64Vec(node, "axes", vector<int64_t>({})), inputs[0]);
bool keepdims;
std::vector<Axis> axes;
// use default of all axes according to ONNX
if (axes.empty())
{
axes = vector<Axis>({ Axis::AllAxes() });
}
std::tie<std::vector<Axis>, bool>(axes, keepdims) = GetReduceElementsAttributes(node, inputs[0]);
bool keepdims = GetNamedAttributeAsInt64(node, "keepdims", 1) == 1;
FunctionPtr cntkFunction = ReduceMean(inputs[0], axes, keepdims, ToFixedWStringFromMultiByte(node->Name()));
return cntkFunction;
}
else if (onnxOpName == "ReduceProd")
{
std::vector<Axis> axes = ConvertONNXAxesToCNTKCppApi(GetNamedAttributeAsInt64Vec(node, "axes", vector<int64_t>({})), inputs[0]);
bool keepdims;
std::vector<Axis> axes;
std::tie<std::vector<Axis>, bool>(axes, keepdims) = GetReduceElementsAttributes(node, inputs[0]);
// use default of all axes according to ONNX
if (axes.empty())
{
axes = vector<Axis>({ Axis::AllAxes() });
}
bool keepdims = GetNamedAttributeAsInt64(node, "keepdims", 1) == 1;
FunctionPtr cntkFunction = ReduceProd(inputs[0], axes, keepdims, ToFixedWStringFromMultiByte(node->Name()));
return cntkFunction;
}
else if (onnxOpName == "ReduceLogSumExp" || onnxOpName == "ReduceLogSum")
{
std::vector<Axis> axes = ConvertONNXAxesToCNTKCppApi(GetNamedAttributeAsInt64Vec(node, "axes", vector<int64_t>({})), inputs[0]);
bool keepdims;
std::vector<Axis> axes;
std::tie<std::vector<Axis>, bool>(axes, keepdims) = GetReduceElementsAttributes(node, inputs[0]);
// use default of all axes according to ONNX
if (axes.empty())
{
axes = vector<Axis>({ Axis::AllAxes() });
}
bool keepdims = GetNamedAttributeAsInt64(node, "keepdims", 1) == 1;
FunctionPtr cntkFunction = ReduceLogSum(inputs[0], axes, keepdims, ToFixedWStringFromMultiByte(node->Name()));
return cntkFunction;
}
else if (onnxOpName == "ReduceL1")
{
std::vector<Axis> axes = ConvertONNXAxesToCNTKCppApi(GetNamedAttributeAsInt64Vec(node, "axes", vector<int64_t>({})), inputs[0]);
bool keepdims;
std::vector<Axis> axes;
// use default of all axes according to ONNX
if (axes.empty())
{
axes = vector<Axis>({ Axis::AllAxes() });
}
std::tie<std::vector<Axis>, bool>(axes, keepdims) = GetReduceElementsAttributes(node, inputs[0]);
bool keepdims = GetNamedAttributeAsInt64(node, "keepdims", 1) == 1;
FunctionPtr cntkFunction = ReduceL1(inputs[0], axes, keepdims, ToFixedWStringFromMultiByte(node->Name()));
return cntkFunction;
}
else if (onnxOpName == "ReduceL2")
{
std::vector<Axis> axes = ConvertONNXAxesToCNTKCppApi(GetNamedAttributeAsInt64Vec(node, "axes", vector<int64_t>({})), inputs[0]);
bool keepdims;
std::vector<Axis> axes;
// use default of all axes according to ONNX
if (axes.empty())
{
axes = vector<Axis>({ Axis::AllAxes() });
}
std::tie<std::vector<Axis>, bool>(axes, keepdims) = GetReduceElementsAttributes(node, inputs[0]);
bool keepdims = GetNamedAttributeAsInt64(node, "keepdims", 1) == 1;
FunctionPtr cntkFunction = ReduceL2(inputs[0], axes, keepdims, ToFixedWStringFromMultiByte(node->Name()));
return cntkFunction;
}
else if (onnxOpName == "ReduceSumSquare")
{
std::vector<Axis> axes = ConvertONNXAxesToCNTKCppApi(GetNamedAttributeAsInt64Vec(node, "axes", vector<int64_t>({})), inputs[0]);
bool keepdims;
std::vector<Axis> axes;
// use default of all axes according to ONNX
if (axes.empty())
{
axes = vector<Axis>({ Axis::AllAxes() });
}
std::tie<std::vector<Axis>, bool>(axes, keepdims) = GetReduceElementsAttributes(node, inputs[0]);
bool keepdims = GetNamedAttributeAsInt64(node, "keepdims", 1) == 1;
FunctionPtr cntkFunction = ReduceSumSquare(inputs[0], axes, keepdims, ToFixedWStringFromMultiByte(node->Name()));
return cntkFunction;
}
@ -2636,7 +2627,8 @@ FunctionPtr ONNXToCNTKHelper::CreateFunction(const Node *node, const std::vector
newShape = GetShapeFromInput(node->InputDefs()[1], graph);
}
const Node *childNode = GetChildNode(node, node->InputDefs()[0]);
int nodeArgIndexDummy = 0;
const Node *childNode = GetChildNode(node, node->InputDefs()[0], nodeArgIndexDummy);
if (childNode != nullptr && Operators::IsRNNOp(childNode->OpType()))
{
// Adjust for batch and sequence axes swap between CNTK and ONNX.
@ -2883,26 +2875,6 @@ FunctionPtr ONNXToCNTKHelper::CreateFunction(const Node *node, const std::vector
}
}
std::pair<const Node *, int> FindParent(const Node *node)
{
Node::NodeConstIterator it = node->OutputNodesBegin();
if (it != node->OutputNodesEnd())
{
const Node *parent = *it;
int index = 0;
for (auto nodeArg : parent->InputDefs())
{
// TODO: Check whether we should use node output arg name for the check below.
if (nodeArg->Name() == node->Name())
{
return std::make_pair(parent, index);
}
index++;
}
}
return std::make_pair(nullptr, -1);
}
std::pair<const Node *, int> FindParentAndChildIndex(const Node *node)
{
Node::NodeConstIterator it = node->OutputNodesBegin();
@ -3329,13 +3301,20 @@ std::vector<Variable> ONNXToCNTKHelper::CreateCNTKInputsStartingFromIndex(const
for (int i = startIndex; i < inputDefs.size(); i++)
{
const NodeArg *nodeArg = inputDefs[i];
const Node *inputNode = GetChildNode(node, nodeArg);
// nodeArg may be one of outputDefs from another node inputNode
// in case there are multiple outputDefs, we need to know the index of the nodeArg
int nodeArgIndex = 0;
const Node *inputNode = GetChildNode(node, nodeArg, nodeArgIndex);
if (inputNode != nullptr)
{
ONNXToCNTKMap::iterator itNodeMap = constructedNodeMap.find(const_cast<Node *>(inputNode));
if (itNodeMap != constructedNodeMap.end())
{
inputs.insert(inputs.end(), itNodeMap->second.begin(), itNodeMap->second.end());
std::vector<FunctionPtr> inputCNTKFunctionPtrs = itNodeMap->second;
for (auto f : inputCNTKFunctionPtrs)
{
inputs.insert(inputs.end(), f->Outputs()[nodeArgIndex]);
}
}
else
{

Просмотреть файл

@ -508,6 +508,7 @@ namespace ONNX
{ L"Softsign",{ 0 } },
{ L"ImageScaler",{ 0, 1, 2, 3 } },
{ L"MeanVarianceNormalization",{ 0 } },
{ L"Sequence::Slice",{ 0, 1 } },
};
std::unordered_map<std::wstring, std::vector<int>> Operators::_cntkToONNXInputIndices = {

Просмотреть файл

@ -83,7 +83,8 @@ def verify_one_input(model, data, tmpdir, name, device=None, loaded_model=None,
# TODO: it is better to compare data.shape with model.arguments[0] and
# to pad batch dimension as needed.
if model.arguments[0].has_batch_axis():
# Some tests have already expanded batch axis to data (i.e. reduction test)
if model.arguments[0].has_batch_axis() and type(data)!=list:
data.shape = (1, ) + data.shape
assert len(model.outputs) == len(loaded_model.outputs)
@ -444,6 +445,22 @@ def test_Concat(tmpdir, dtype):
verify_one_input(model, data1, tmpdir, 'Concat_1')
@pytest.mark.parametrize("dtype", DType_Config)
def test_Concat_With_Broadcast(tmpdir, dtype):
with C.default_options(dtype = dtype):
shape1 = [2,3,1,1,3]
shape2 = [1,3,4,1]
shape3 = [3,4,1]
axis = 2
data1 = np.random.uniform(-10, 10, shape1).astype(dtype)
data2 = np.random.uniform(-10, 10, shape2).astype(dtype)
data3 = np.random.uniform(-10, 10, shape3).astype(dtype)
x = C.input_variable(shape1)
y = C.constant(value=data2)
z = C.constant(value=data3)
model = C.splice(x, y, z, axis=axis)
verify_one_input(model, data1, tmpdir, 'Concat_Braodcast')
@pytest.mark.parametrize("dtype", DType_Config)
def test_Conv(tmpdir, dtype, device_id):
if device_id == -1 and dtype == np.float16: