CNTK V2 library: Enabled non scalar initial value for Past/Future value nodes and other bug fixes
This commit is contained in:
Родитель
dfcade2d8c
Коммит
867ace750c
|
@ -286,7 +286,8 @@ namespace CNTK
|
|||
///
|
||||
std::wstring AsString() const
|
||||
{
|
||||
std::wstringstream wStrStream(L"{");
|
||||
std::wstringstream wStrStream;
|
||||
wStrStream << L"{";
|
||||
for (size_t i = 0; i < Rank(); i++)
|
||||
{
|
||||
if (i != 0)
|
||||
|
@ -2491,7 +2492,7 @@ namespace CNTK
|
|||
///
|
||||
/// Create an instance of the CNTK built-in splice operation to splice together all the specified tensor operands into a single output tensor
|
||||
///
|
||||
CNTK_API FunctionPtr Splice(const std::vector<Variable>& operands, size_t axis, const std::wstring& name = L"");
|
||||
CNTK_API FunctionPtr Splice(const std::vector<Variable>& operands, const Axis& axis, const std::wstring& name = L"");
|
||||
|
||||
///
|
||||
/// Create a new Function instance which just combines the outputs of the specified list of 'operands' Functions such that the 'Outputs' of the
|
||||
|
|
|
@ -195,8 +195,6 @@ namespace CNTK
|
|||
auto initialStateVar = Constant::Scalar(node->As<PastValueNode<ElementType>>()->InitialActivationValue(), AsDeviceDescriptor(node->GetDeviceId()));
|
||||
inputVars.push_back(initialStateVar);
|
||||
}
|
||||
else
|
||||
LogicError("LoadLegacyModel: Currently loading models with non-scalar initial value for PastValueNode/FutureValueNode is unsupported");
|
||||
|
||||
primitiveFunctionConfigParameters[PrimitiveFunction::AttributeNameOffset] = (size_t)node->As<PastValueNode<ElementType>>()->TimeStep();
|
||||
opType = PrimitiveOpType::PastValue;
|
||||
|
@ -208,8 +206,6 @@ namespace CNTK
|
|||
auto initialStateVar = Constant::Scalar(node->As<FutureValueNode<ElementType>>()->InitialActivationValue(), AsDeviceDescriptor(node->GetDeviceId()));
|
||||
inputVars.push_back(initialStateVar);
|
||||
}
|
||||
else
|
||||
LogicError("LoadLegacyModel: Currently loading models with non-scalar initial value for PastValueNode/FutureValueNode is unsupported");
|
||||
|
||||
primitiveFunctionConfigParameters[PrimitiveFunction::AttributeNameOffset] = (size_t)node->As<FutureValueNode<ElementType>>()->TimeStep();
|
||||
opType = PrimitiveOpType::FutureValue;
|
||||
|
|
|
@ -134,6 +134,7 @@
|
|||
<ClInclude Include="Utils.h" />
|
||||
<ClInclude Include="stdafx.h" />
|
||||
<ClInclude Include="targetver.h" />
|
||||
<ClInclude Include="Value.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="BackCompat.cpp" />
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
<ClInclude Include="API\CNTKLibraryExperimental.h">
|
||||
<Filter>API</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Value.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="API">
|
||||
|
|
|
@ -474,9 +474,8 @@ namespace CNTK
|
|||
if (!axis1.IsStaticAxis() || !axis2.IsStaticAxis())
|
||||
LogicError("TransposeAxes operation currently does not support transposing dynamic axes");
|
||||
|
||||
auto transposedTensorShape = AsTensorShape(inputs[0].Shape());
|
||||
transposedTensorShape.SwapDimsInPlace(axis1.StaticAxisIndex(), axis2.StaticAxisIndex());
|
||||
outputShape = AsNDShape(transposedTensorShape);
|
||||
outputShape = inputs[0].Shape();
|
||||
std::swap(outputShape[axis1.StaticAxisIndex()], outputShape[axis2.StaticAxisIndex()]);
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::Slice:
|
||||
|
@ -507,7 +506,7 @@ namespace CNTK
|
|||
if ((axis.StaticAxisIndex() < outputTensorShape.GetRank()) && (0 <= realBeginIndex) && (realBeginIndex <= realEndIndex) && (realEndIndex <= sliceAxisDim))
|
||||
outputTensorShape.NarrowTo(axis.StaticAxisIndex(), realBeginIndex, realEndIndex);
|
||||
|
||||
outputShape = AsNDShape(outputTensorShape);
|
||||
outputShape = AsNDShape(outputTensorShape, /*allowNonFlattenableTensorShapes = */ true);
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::Reshape:
|
||||
|
@ -611,15 +610,11 @@ namespace CNTK
|
|||
Variable inputOperandVar = inputs[0];
|
||||
Variable initialStateVar = inputs[1];
|
||||
|
||||
// TODO: Current we only support a scalar initial state
|
||||
if (!initialStateVar.IsConstant() || (initialStateVar.Shape().Rank() > 0))
|
||||
LogicError("Currently PastValue/FutureValue Function only supports scalar initial state");
|
||||
|
||||
// TODO: We currently only support input operand with 1 dynamic axis for PastValue/FutureValue
|
||||
if (inputOperandVar.DynamicAxes().size() != 2)
|
||||
LogicError("Currently PastValue/FutureValue Function only supports input operand with with 2 dynamic axis (1 sequence-axis and 1 batch-axis)");
|
||||
|
||||
outputShape = UnaryElementwiseOpOutputShape(inputs[0].Shape());
|
||||
outputShape = BinaryElementwiseOpOutputShape(op, inputs[0].Shape(), inputs[1].Shape());
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::ReduceElements:
|
||||
|
@ -975,16 +970,11 @@ namespace CNTK
|
|||
Variable inputOperandVar = functionInputs[0];
|
||||
Variable initialStateVar = functionInputs[1];
|
||||
|
||||
// Get the intial state of the PastValue/FutureValue operation
|
||||
ElementType initStateValue;
|
||||
NDArrayView tempView({}, &initStateValue, 1, DeviceDescriptor::CPUDevice());
|
||||
tempView.CopyFrom(*(Constant(initialStateVar).Value()));
|
||||
|
||||
size_t offset = primitiveFunction->Attributes()[PrimitiveFunction::AttributeNameOffset].Value<size_t>();
|
||||
if (op == PrimitiveOpType::PastValue)
|
||||
computationNodePtr = New<PastValueNode<ElementType>>(network->GetDeviceId(), functionName, (float)initStateValue, AsTensorShape(inputOperandVar.Shape()), offset);
|
||||
computationNodePtr = New<PastValueNode<ElementType>>(network->GetDeviceId(), functionName, AsTensorShape(inputOperandVar.Shape()), offset);
|
||||
else
|
||||
computationNodePtr = New<FutureValueNode<ElementType>>(network->GetDeviceId(), functionName, (float)initStateValue, AsTensorShape(inputOperandVar.Shape()), offset);
|
||||
computationNodePtr = New<FutureValueNode<ElementType>>(network->GetDeviceId(), functionName, AsTensorShape(inputOperandVar.Shape()), offset);
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -1043,9 +1033,14 @@ namespace CNTK
|
|||
// Let's reorder inputNodesBasePtrs properly since the ordering of inputs of CNTK internal ComputationNode may be different from the PrimitiveFunction inputs ordering
|
||||
ReorderAsCNTKComputationNodeInputs(op, inputNodesBasePtrs);
|
||||
if (computationNodePtr->Is<INumInputs>())
|
||||
inputNodesBasePtrs.resize(computationNodePtr->As<INumInputs>()->GetExpectedNumInputs());
|
||||
else if ((op == PrimitiveOpType::PastValue) || (op == PrimitiveOpType::FutureValue)) // TODO: Temporary hack to be replaced with support for non-scalar ininital state value operands
|
||||
inputNodesBasePtrs.resize(1);
|
||||
{
|
||||
auto computationNodeExpectedInputCount = computationNodePtr->As<INumInputs>()->GetExpectedNumInputs();
|
||||
if (computationNodeExpectedInputCount != inputNodesBasePtrs.size())
|
||||
LogicError("Input count mismatch: The Primitive function for op %s has %d inputs while the corresponding ComputationNode has %d inputs",
|
||||
PrimitiveOpTypeName(op),
|
||||
inputNodesBasePtrs.size(),
|
||||
computationNodeExpectedInputCount);
|
||||
}
|
||||
|
||||
network->AddNodeToNetAndAttachInputs(computationNodePtr, inputNodesBasePtrs);
|
||||
|
||||
|
@ -1185,6 +1180,9 @@ namespace CNTK
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
m_computationNetwork->SetTraceLevel(1);
|
||||
#endif
|
||||
m_computationNetwork->CompileNetwork();
|
||||
|
||||
// Verify that the shapes of the output Variables that we computed match the corresponding nodes in the ComputationNetwork
|
||||
|
@ -1237,6 +1235,14 @@ namespace CNTK
|
|||
if (var.DynamicAxes().size() > 2)
|
||||
LogicError("More than 2 dynamic axis for a variable is currently unsupported");
|
||||
|
||||
//if (value->Data()->Shape().SubShape(0, var.Shape().Rank()) != var.Shape())
|
||||
//{
|
||||
// InvalidArgument("The %s dimensions of the Value shape (%s) do not match the shape of the variable (%s) that it corresponds to!",
|
||||
// Internal::IsReversingTensorShapesInErrorMessagesEnabled() ? "trailing" : "leading",
|
||||
// AsStringForErrorReporting(value->Data()->Shape()).c_str()),
|
||||
// AsStringForErrorReporting(var.Shape()).c_str()));
|
||||
//}
|
||||
|
||||
size_t maxNumTimeSteps = value->Data()->Shape()[var.Shape().Rank()];
|
||||
size_t numSequences = value->Data()->Shape()[var.Shape().Rank() + 1];
|
||||
|
||||
|
@ -1280,9 +1286,7 @@ namespace CNTK
|
|||
currentSequenceLength++;
|
||||
}
|
||||
else
|
||||
{
|
||||
currentSequenceEndAlreadyFound = true;
|
||||
}
|
||||
}
|
||||
|
||||
sequenceLengths[i] = currentSequenceLength;
|
||||
|
@ -1595,13 +1599,36 @@ namespace CNTK
|
|||
const DeviceDescriptor& computeDevice,
|
||||
const std::unordered_set<Variable>& outputsToRetainBackwardStateFor)
|
||||
{
|
||||
// TODO: How about zero argument functions?
|
||||
// Validate arguments and outputs
|
||||
if (outputs.empty())
|
||||
InvalidArgument("CompositeFunction::Forward: At least one output has to be specified!");
|
||||
|
||||
// Make sure that the DataType of the variables and corresponding values match
|
||||
// TODO: We need a better way to determine the ElementType for the network
|
||||
auto dataType = arguments.begin()->second->Data()->GetDataType();
|
||||
auto dataType = DataType::Unknown;
|
||||
for (auto variableValuePair : arguments)
|
||||
{
|
||||
if (dataType == DataType::Unknown)
|
||||
dataType = variableValuePair.first.GetDataType();
|
||||
else if (dataType != variableValuePair.first.GetDataType())
|
||||
LogicError("CompositeFunction::Forward: The DataType of all arguments of the Function must be same");
|
||||
}
|
||||
|
||||
if (dataType == DataType::Unknown)
|
||||
{
|
||||
for (auto variableValuePair : outputs)
|
||||
{
|
||||
if (dataType == DataType::Unknown)
|
||||
dataType = variableValuePair.first.GetDataType();
|
||||
}
|
||||
}
|
||||
|
||||
if (dataType == DataType::Float)
|
||||
GetComputationNetwork<float>(computeDevice, outputsToRetainBackwardStateFor, true);
|
||||
else
|
||||
else if (dataType == DataType::Double)
|
||||
GetComputationNetwork<double>(computeDevice, outputsToRetainBackwardStateFor, true);
|
||||
else
|
||||
InvalidArgument("Unsupported DataType %s", DataTypeName(dataType));
|
||||
|
||||
// TODO: Avoid copying the data when possible
|
||||
|
||||
|
@ -2075,10 +2102,13 @@ namespace CNTK
|
|||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Select, std::vector<Variable>({ condition, leftOperand, rightOperand }), Dictionary(), name), name);
|
||||
}
|
||||
|
||||
FunctionPtr Splice(const std::vector<Variable>& operands, size_t axis, const std::wstring& name /*= L""*/)
|
||||
FunctionPtr Splice(const std::vector<Variable>& operands, const Axis& axis, const std::wstring& name /*= L""*/)
|
||||
{
|
||||
if (!axis.IsStaticAxis())
|
||||
LogicError("Splice: Currently only splicing along a static axis is supported");
|
||||
|
||||
auto additionalProperties = Dictionary();
|
||||
additionalProperties[PrimitiveFunction::AttributeNameAxis] = Axis(axis);
|
||||
additionalProperties[PrimitiveFunction::AttributeNameAxis] = axis;
|
||||
|
||||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Splice, operands, std::move(additionalProperties), name), name);
|
||||
}
|
||||
|
@ -2209,15 +2239,23 @@ namespace CNTK
|
|||
|
||||
FunctionPtr ZeroesLike(const Variable& operand)
|
||||
{
|
||||
if (operand.Shape().Rank() > 1)
|
||||
LogicError("Internal::ZeroesLike: Currently only 1D inputs are supported!");
|
||||
|
||||
if (operand.IsSparse())
|
||||
{
|
||||
if (operand.Shape().Rank() > 1)
|
||||
LogicError("Internal::ZeroesLike: Currently only 1D sparse inputs are supported!");
|
||||
|
||||
return Times(Constant({ 1, operand.Shape()[0] }, operand.GetDataType(), 0.0), operand);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto rowSliceFunc = Internal::Slice(operand, Axis(0), 0, 1);
|
||||
return Minus(rowSliceFunc, rowSliceFunc);
|
||||
auto output = Minus(rowSliceFunc, rowSliceFunc);
|
||||
|
||||
// Reduce away all but the static axis 0
|
||||
for (size_t i = 1; i < output->Output().Shape().Rank(); ++i)
|
||||
output = ReduceSum(output, Axis(i));
|
||||
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -71,13 +71,16 @@ namespace CNTK
|
|||
return DeviceDescriptor::GPUDevice(deviceId);
|
||||
}
|
||||
|
||||
inline NDShape AsNDShape(const Microsoft::MSR::CNTK::TensorShape& tensorShape)
|
||||
inline NDShape AsNDShape(const Microsoft::MSR::CNTK::TensorShape& tensorShape, bool allowNonFlattenableTensorShapes = false)
|
||||
{
|
||||
// The TensorShape should be flattenable to 1D
|
||||
for (size_t i = 1; i < tensorShape.GetRank(); ++i)
|
||||
if (!allowNonFlattenableTensorShapes)
|
||||
{
|
||||
if (!tensorShape.CanFlatten(i))
|
||||
InvalidArgument("AsNDShape() can only be called for TensorShapes that can be flattened to 1D");
|
||||
// The TensorShape should be flattenable to 1D
|
||||
for (size_t i = 1; i < tensorShape.GetRank(); ++i)
|
||||
{
|
||||
if (!tensorShape.CanFlatten(i))
|
||||
InvalidArgument("AsNDShape() can only be called for TensorShapes that can be flattened to 1D");
|
||||
}
|
||||
}
|
||||
|
||||
return std::vector<size_t>(tensorShape.GetDims().begin(), tensorShape.GetDims().end());
|
||||
|
|
|
@ -193,6 +193,23 @@ std::pair<CNTK::FunctionPtr, CNTK::FunctionPtr> LSTMPComponentWithSelfStabilizat
|
|||
return { LSTMCell.first, LSTMCell.second };
|
||||
}
|
||||
|
||||
// This is currently unused
|
||||
inline CNTK::FunctionPtr SimpleRecurrentLayer(const CNTK::Variable& input, const CNTK::NDShape& outputDim, const std::function<CNTK::FunctionPtr(const CNTK::Variable&)>& recurrenceHook, const CNTK::DeviceDescriptor& device)
|
||||
{
|
||||
auto dh = CNTK::PlaceholderVariable(outputDim, input.DynamicAxes());
|
||||
|
||||
unsigned long seed = 1;
|
||||
auto createProjectionParam = [device, &seed](size_t outputDim, size_t inputDim) {
|
||||
return CNTK::Parameter(CNTK::NDArrayView::RandomUniform<float>({ outputDim, inputDim }, -0.5, 0.5, seed++, device));
|
||||
};
|
||||
|
||||
auto hProjWeights = createProjectionParam(outputDim[0], outputDim[0]);
|
||||
auto inputProjWeights = createProjectionParam(outputDim[0], input.Shape()[0]);
|
||||
|
||||
auto output = Times(hProjWeights, recurrenceHook(dh)) + Times(inputProjWeights, input);
|
||||
return output->ReplacePlaceholders({ { dh, output } });
|
||||
}
|
||||
|
||||
inline std::vector<size_t> GenerateSequenceLengths(size_t numSequences, size_t maxAllowedSequenceLength)
|
||||
{
|
||||
std::vector<size_t> sequenceLengths(numSequences);
|
||||
|
@ -208,13 +225,13 @@ inline std::vector<size_t> GenerateSequenceLengths(size_t numSequences, size_t m
|
|||
}
|
||||
|
||||
template <typename ElementType>
|
||||
inline std::vector<std::vector<ElementType>> GenerateSequences(const std::vector<size_t>& sequenceLengths, size_t dim)
|
||||
inline std::vector<std::vector<ElementType>> GenerateSequences(const std::vector<size_t>& sequenceLengths, const CNTK::NDShape& sampleShape)
|
||||
{
|
||||
size_t numSequences = sequenceLengths.size();
|
||||
std::vector<std::vector<ElementType>> sequences;
|
||||
for (size_t i = 0; i < numSequences; ++i)
|
||||
{
|
||||
std::vector<ElementType> currentSequence(dim * sequenceLengths[i]);
|
||||
std::vector<ElementType> currentSequence(sampleShape.TotalSize() * sequenceLengths[i]);
|
||||
for (size_t j = 0; j < currentSequence.size(); ++j)
|
||||
currentSequence[j] = ((ElementType)rand()) / RAND_MAX;
|
||||
|
||||
|
@ -244,17 +261,21 @@ inline std::vector<std::vector<size_t>> GenerateOneHotSequences(const std::vecto
|
|||
}
|
||||
|
||||
template <typename ElementType>
|
||||
inline CNTK::ValuePtr GenerateSequences(const std::vector<size_t>& sequenceLengths, size_t dim, const CNTK::DeviceDescriptor& device, bool oneHot)
|
||||
inline CNTK::ValuePtr GenerateSequences(const std::vector<size_t>& sequenceLengths, const CNTK::NDShape& sampleShape, const CNTK::DeviceDescriptor& device, bool oneHot)
|
||||
{
|
||||
if (!oneHot)
|
||||
{
|
||||
std::vector<std::vector<ElementType>> sequences = GenerateSequences<ElementType>(sequenceLengths, dim);
|
||||
return CNTK::Value::Create({ dim }, sequences, device, true);
|
||||
std::vector<std::vector<ElementType>> sequences = GenerateSequences<ElementType>(sequenceLengths, sampleShape);
|
||||
return CNTK::Value::Create(sampleShape, sequences, device, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<std::vector<size_t>> oneHotSequences = GenerateOneHotSequences(sequenceLengths, dim);
|
||||
return CNTK::Value::Create<ElementType>({ dim }, oneHotSequences, device, true);
|
||||
if (sampleShape.Rank() != 1)
|
||||
throw std::runtime_error("GenerateSequences can generate one hot sequences only for 1D sample shapes");
|
||||
|
||||
size_t vocabularySize = sampleShape[0];
|
||||
std::vector<std::vector<size_t>> oneHotSequences = GenerateOneHotSequences(sequenceLengths, vocabularySize);
|
||||
return CNTK::Value::Create<ElementType>(vocabularySize, oneHotSequences, device, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -296,3 +317,39 @@ inline void PrintTrainingProgress(const CNTK::Trainer& trainer, size_t minibatch
|
|||
printf("Minibatch %d: CrossEntropy loss = %.8g, Evaluation criterion = %.8g\n", (int)minibatchIdx, trainLossValue, evaluationValue);
|
||||
}
|
||||
}
|
||||
|
||||
inline std::vector<size_t> GetStrides(const CNTK::NDShape& shape)
|
||||
{
|
||||
std::vector<size_t> strides(shape.Rank() - 1);
|
||||
size_t totalSize = 1;
|
||||
for (size_t i = 0; i < shape.Rank() - 1; ++i)
|
||||
{
|
||||
totalSize *= shape[i];
|
||||
strides[i] = totalSize;
|
||||
}
|
||||
|
||||
return strides;
|
||||
}
|
||||
|
||||
inline CNTK::NDShape UnflattenedShape(size_t flatennedIdx, const std::vector<size_t>& strides)
|
||||
{
|
||||
CNTK::NDShape unflattenedShape(strides.size() + 1);
|
||||
size_t remainder = flatennedIdx;
|
||||
for (int i = (int)strides.size() - 1; i >= 0; --i)
|
||||
{
|
||||
unflattenedShape[i + 1] = remainder / strides[i];
|
||||
remainder = remainder % strides[i];
|
||||
}
|
||||
unflattenedShape[0] = remainder;
|
||||
|
||||
return unflattenedShape;
|
||||
}
|
||||
|
||||
inline size_t FlattenedIndex(const CNTK::NDShape& shape, const std::vector<size_t>& strides)
|
||||
{
|
||||
size_t flattenedIdx = shape[0];
|
||||
for (int i = 0; i < strides.size(); ++i)
|
||||
flattenedIdx += shape[i + 1] * strides[i];
|
||||
|
||||
return flattenedIdx;
|
||||
};
|
||||
|
|
|
@ -10,7 +10,7 @@ void TestReduceSum(const DeviceDescriptor& device)
|
|||
size_t dim = 23;
|
||||
|
||||
auto sequenceLengths = GenerateSequenceLengths(numSequences, maxAllowedSequenceLength);
|
||||
auto sequences = GenerateSequences<float>(sequenceLengths, dim);
|
||||
auto sequences = GenerateSequences<float>(sequenceLengths, { dim });
|
||||
ValuePtr sequencesValue = Value::Create({ dim }, sequences, device, true);
|
||||
|
||||
// Test ReduceSum along a static axis
|
||||
|
@ -113,67 +113,88 @@ void TestReduceSum(const DeviceDescriptor& device)
|
|||
}
|
||||
}
|
||||
|
||||
void TestSlice(const DeviceDescriptor& device)
|
||||
void TestSlice(size_t sampleRank, const DeviceDescriptor& device)
|
||||
{
|
||||
size_t numSequences = 7;
|
||||
size_t maxAllowedSequenceLength = 11;
|
||||
size_t dim = 23;
|
||||
size_t maxDimSize = 23;
|
||||
NDShape inputShape(sampleRank);
|
||||
for (size_t i = 0; i < sampleRank; ++i)
|
||||
inputShape[i] = (rand() % maxDimSize) + 1;
|
||||
|
||||
auto sequenceLengths = GenerateSequenceLengths(numSequences, maxAllowedSequenceLength);
|
||||
auto sequences = GenerateSequences<float>(sequenceLengths, dim);
|
||||
ValuePtr sequencesValue = Value::Create({ dim }, sequences, device, true);
|
||||
auto sequences = GenerateSequences<float>(sequenceLengths, inputShape);
|
||||
ValuePtr sequencesValue = Value::Create(inputShape, sequences, device, true);
|
||||
|
||||
// Test slice along a static axis
|
||||
{
|
||||
auto testStaticAxisSlice = [&sequences, &sequenceLengths, dim, sequencesValue, device](int beginOffset, int endOffset)
|
||||
auto testStaticAxisSlice = [&sequences, &sequenceLengths, inputShape, sequencesValue, device](size_t sliceAxis, int beginOffset, int endOffset)
|
||||
{
|
||||
size_t maxActualSequenceLength = sequencesValue->Data()->Shape()[1];
|
||||
size_t numSequences = sequencesValue->Data()->Shape()[2];
|
||||
size_t maxActualSequenceLength = sequencesValue->Data()->Shape()[inputShape.Rank()];
|
||||
size_t numSequences = sequencesValue->Data()->Shape()[inputShape.Rank() + 1];
|
||||
|
||||
auto inputVar = InputVariable({ dim }, DataType::Float, L"input");
|
||||
auto rowSliceFunc = Slice(inputVar, Axis(0), beginOffset, endOffset);
|
||||
auto inputVar = InputVariable(inputShape, DataType::Float, L"input");
|
||||
auto sliceFunc = Slice(inputVar, Axis(sliceAxis), beginOffset, endOffset);
|
||||
|
||||
NDShape outputShape = rowSliceFunc->Output().Shape().AppendShape({ maxActualSequenceLength, numSequences });
|
||||
std::vector<float> outputData(outputShape.TotalSize());
|
||||
ValuePtr outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputShape, outputData, false), sequencesValue->Mask()->DeepClone());
|
||||
NDShape outputShape = sliceFunc->Output().Shape();
|
||||
auto outputDataShape = outputShape.AppendShape({ maxActualSequenceLength, numSequences });
|
||||
std::vector<float> outputData(outputDataShape.TotalSize());
|
||||
ValuePtr outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputDataShape, outputData, false), sequencesValue->Mask()->DeepClone());
|
||||
|
||||
std::unordered_map<Variable, ValuePtr> outputs = { { rowSliceFunc->Output(), outputValue } };
|
||||
rowSliceFunc->Forward({ { inputVar, sequencesValue } }, outputs, device);
|
||||
std::unordered_map<Variable, ValuePtr> outputs = { { sliceFunc->Output(), outputValue } };
|
||||
sliceFunc->Forward({ { inputVar, sequencesValue } }, outputs, device);
|
||||
|
||||
size_t rowSliceStartOffset = (beginOffset >= 0) ? beginOffset : (dim + beginOffset);
|
||||
size_t sliceLength = endOffset - beginOffset;
|
||||
std::vector<float> expectedOutputValues(sliceLength * maxActualSequenceLength * numSequences);
|
||||
std::vector<size_t> inputShapeStrides = GetStrides(inputShape);
|
||||
std::vector<size_t> outputShapeStrides = GetStrides(outputShape);
|
||||
|
||||
size_t sliceStartOffset = (beginOffset >= 0) ? beginOffset : (inputShape[sliceAxis] + beginOffset);
|
||||
std::vector<float> expectedOutputValues(outputShape.TotalSize() * maxActualSequenceLength * numSequences);
|
||||
for (size_t i = 0; i < numSequences; ++i)
|
||||
{
|
||||
size_t currentSequenceLength = sequenceLengths[i];
|
||||
for (size_t j = 0; j < currentSequenceLength; ++j)
|
||||
{
|
||||
for (size_t k = 0; k < sliceLength; ++k)
|
||||
expectedOutputValues[(((i * maxActualSequenceLength) + j) * sliceLength) + k] = sequences[i][(j * dim) + k + rowSliceStartOffset];
|
||||
for (size_t k = 0; k < outputShape.TotalSize(); ++k)
|
||||
{
|
||||
auto outputIdx = UnflattenedShape(k, outputShapeStrides);
|
||||
auto inputIdx = outputIdx;
|
||||
inputIdx[sliceAxis] += sliceStartOffset;
|
||||
auto flatInputIdx = FlattenedIndex(inputIdx, inputShapeStrides);
|
||||
expectedOutputValues[(((i * maxActualSequenceLength) + j) * outputShape.TotalSize()) + k] = sequences[i][(j * inputShape.TotalSize()) + flatInputIdx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FloatingPointVectorCompare(outputData, expectedOutputValues, "testStaticAxisSlice: Forward prop results do not match expected results");
|
||||
};
|
||||
|
||||
testStaticAxisSlice(3, 5);
|
||||
testStaticAxisSlice(-1, 0);
|
||||
testStaticAxisSlice(-3, -1);
|
||||
size_t sliceAxis = 0;
|
||||
testStaticAxisSlice(sliceAxis, 3, 5);
|
||||
|
||||
if (sliceAxis < (inputShape.Rank() - 1))
|
||||
sliceAxis++;
|
||||
|
||||
testStaticAxisSlice(sliceAxis, -1, 0);
|
||||
|
||||
if (sliceAxis < (inputShape.Rank() - 1))
|
||||
sliceAxis++;
|
||||
|
||||
testStaticAxisSlice(sliceAxis, - 3, -1);
|
||||
}
|
||||
|
||||
// Test slice along a dynamic axis
|
||||
{
|
||||
auto testDynamicAxisSlice = [&sequences, &sequenceLengths, dim, sequencesValue, device](const Axis& axis, int beginOffset, int endOffset)
|
||||
auto testDynamicAxisSlice = [&sequences, &sequenceLengths, inputShape, sequencesValue, device](const Axis& axis, int beginOffset, int endOffset)
|
||||
{
|
||||
if (axis.IsStaticAxis())
|
||||
RuntimeError("Called the dynamic axis slice test with a static axis");
|
||||
|
||||
size_t maxActualSequenceLength = sequencesValue->Data()->Shape()[1];
|
||||
size_t numSequences = sequencesValue->Data()->Shape()[2];
|
||||
size_t maxActualSequenceLength = sequencesValue->Data()->Shape()[inputShape.Rank()];
|
||||
size_t numSequences = sequencesValue->Data()->Shape()[inputShape.Rank() + 1];
|
||||
|
||||
size_t sliceLength = endOffset - beginOffset;
|
||||
|
||||
auto inputVar = InputVariable({ dim }, DataType::Float, L"input");
|
||||
auto inputVar = InputVariable(inputShape, DataType::Float, L"input");
|
||||
auto sliceFunc = Slice(inputVar, axis, beginOffset, endOffset);
|
||||
|
||||
size_t outputSequenceAxisLength = (axis == Axis::DefaultDynamicAxis()) ? sliceLength : maxActualSequenceLength;
|
||||
|
@ -188,7 +209,7 @@ void TestSlice(const DeviceDescriptor& device)
|
|||
size_t startSequenceIdx = (axis == Axis::DefaultBatchAxis()) ? ((beginOffset >= 0) ? beginOffset : (numSequences + beginOffset)) : 0;
|
||||
size_t endSequenceIdx = (axis == Axis::DefaultBatchAxis()) ? ((endOffset > 0) ? endOffset : (numSequences + endOffset)) : numSequences;
|
||||
|
||||
std::vector<float> expectedOutputValues(dim * outputSequenceAxisLength * outputBatchAxisLength);
|
||||
std::vector<float> expectedOutputValues(inputShape.TotalSize() * outputSequenceAxisLength * outputBatchAxisLength);
|
||||
for (size_t i = startSequenceIdx; i < endSequenceIdx; ++i)
|
||||
{
|
||||
size_t currentSequenceLength = sequenceLengths[i];
|
||||
|
@ -196,8 +217,8 @@ void TestSlice(const DeviceDescriptor& device)
|
|||
size_t endFrameIdx = (axis == Axis::DefaultDynamicAxis()) ? ((endOffset > 0) ? endOffset : (currentSequenceLength + endOffset)) : currentSequenceLength;
|
||||
for (size_t j = startFrameIdx; j < endFrameIdx; ++j)
|
||||
{
|
||||
for (size_t k = 0; k < dim; ++k)
|
||||
expectedOutputValues[((((i - startSequenceIdx) * outputSequenceAxisLength) + (j - startFrameIdx)) * dim) + k] = sequences[i][(j * dim) + k];
|
||||
for (size_t k = 0; k < inputShape.TotalSize(); ++k)
|
||||
expectedOutputValues[((((i - startSequenceIdx) * outputSequenceAxisLength) + (j - startFrameIdx)) * inputShape.TotalSize()) + k] = sequences[i][(j * inputShape.TotalSize()) + k];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -344,13 +365,59 @@ void TestRecurrentFunctionCloning()
|
|||
CompareFunctions(clonedFunctionWithParametersShared, clonedFunctionWithParametersFrozen, ParameterCloningMethod::Freeze, cloningReplacements, visitedFunctions);
|
||||
}
|
||||
|
||||
void TestTranspose(size_t numAxes, size_t axis1, size_t axis2, const DeviceDescriptor& device)
|
||||
{
|
||||
srand(1);
|
||||
|
||||
size_t maxDimSize = 15;
|
||||
NDShape inputShape(numAxes);
|
||||
for (size_t i = 0; i < numAxes; ++i)
|
||||
inputShape[i] = (rand() % maxDimSize) + 1;
|
||||
|
||||
auto inputVar = InputVariable(inputShape, DataType::Float, false, L"leftInput");
|
||||
auto transposeFunc = TransposeAxes(inputVar, Axis(axis1), Axis(axis2));
|
||||
|
||||
std::vector<float> inputData(inputShape.TotalSize());
|
||||
for (size_t i = 0; i < inputData.size(); ++i)
|
||||
inputData[i] = ((float)rand()) / RAND_MAX;
|
||||
|
||||
auto inputValueShape = inputShape.AppendShape({ 1, 1 });
|
||||
ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputValueShape, inputData, true));
|
||||
|
||||
NDShape outputShape = transposeFunc->Output().Shape();
|
||||
NDShape outputValueShape = outputShape.AppendShape({ 1, 1 });
|
||||
std::vector<float> outputData(outputValueShape.TotalSize());
|
||||
ValuePtr outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputValueShape, outputData, false));
|
||||
|
||||
std::unordered_map<Variable, ValuePtr> outputs = { { transposeFunc->Output(), outputValue } };
|
||||
transposeFunc->Forward({ { inputVar, inputValue } }, outputs, device);
|
||||
|
||||
std::vector<size_t> inputShapeStrides = GetStrides(inputShape);
|
||||
std::vector<size_t> outputShapeStrides = GetStrides(outputShape);
|
||||
|
||||
// Verify forward prop results
|
||||
std::vector<float> expectedOutputValues(outputShape.TotalSize());
|
||||
for (size_t i = 0; i < expectedOutputValues.size(); ++i)
|
||||
{
|
||||
auto unflattenedShape = UnflattenedShape(i, outputShapeStrides);
|
||||
std::swap(unflattenedShape[axis1], unflattenedShape[axis2]);
|
||||
size_t flattenedIndex = FlattenedIndex(unflattenedShape, inputShapeStrides);
|
||||
expectedOutputValues[i] = inputData[flattenedIndex];
|
||||
}
|
||||
|
||||
FloatingPointVectorCompare(outputData, expectedOutputValues, "TestTimesAndPlus: Forward prop results do not match expected results");
|
||||
}
|
||||
|
||||
void FunctionTests()
|
||||
{
|
||||
TestSlice(DeviceDescriptor::CPUDevice());
|
||||
TestSlice(DeviceDescriptor::GPUDevice(0));
|
||||
TestSlice(2, DeviceDescriptor::CPUDevice());
|
||||
TestSlice(1, DeviceDescriptor::GPUDevice(0));
|
||||
|
||||
TestReduceSum(DeviceDescriptor::CPUDevice());
|
||||
TestReduceSum(DeviceDescriptor::GPUDevice(0));
|
||||
|
||||
TestRecurrentFunctionCloning();
|
||||
|
||||
TestTranspose(2, 0, 1, DeviceDescriptor::CPUDevice());
|
||||
TestTranspose(3, 1, 2, DeviceDescriptor::GPUDevice(0));
|
||||
}
|
||||
|
|
|
@ -80,7 +80,7 @@ void TestRecurrentNetworkCreation(const DeviceDescriptor& device, bool testSaveA
|
|||
{
|
||||
std::vector<size_t> sequenceLengths = GenerateSequenceLengths(numSequences, maxAllowedSequenceLength);
|
||||
|
||||
ValuePtr inputValue = GenerateSequences<ElementType>(sequenceLengths, inputDim, device, false);
|
||||
ValuePtr inputValue = GenerateSequences<ElementType>(sequenceLengths, { inputDim }, device, false);
|
||||
|
||||
std::vector<std::vector<ElementType>> labelsData;
|
||||
for (size_t i = 0; i < numSequences; ++i)
|
||||
|
|
|
@ -59,6 +59,9 @@ void TrainSequenceToSequenceTranslator(const DeviceDescriptor& device, bool useS
|
|||
{
|
||||
thoughtVectorH = Reshape(thoughtVectorH, thoughtVectorH->Output().Shape().AppendShape({ 1 }));
|
||||
thoughtVectorC = Reshape(thoughtVectorC, thoughtVectorC->Output().Shape().AppendShape({ 1 }));
|
||||
|
||||
labelEmbedding = Reshape(labelEmbedding, labelEmbedding->Output().Shape().AppendShape({ 1 }));
|
||||
labelSentenceStartEmbeddedScattered = Reshape(labelSentenceStartEmbeddedScattered, labelSentenceStartEmbeddedScattered->Output().Shape().AppendShape({ 1 }));
|
||||
}
|
||||
|
||||
auto thoughtVectorBroadcastH = Sequence::BroadcastAs(thoughtVectorH, labelEmbedding);
|
||||
|
@ -179,6 +182,6 @@ void TrainSequenceToSequenceTranslator(const DeviceDescriptor& device, bool useS
|
|||
void TrainSequenceToSequenceTranslator()
|
||||
{
|
||||
// TODO: Also test with sparse input variables in the graph
|
||||
TrainSequenceToSequenceTranslator(DeviceDescriptor::GPUDevice(0), false, false, true, false);
|
||||
TrainSequenceToSequenceTranslator(DeviceDescriptor::CPUDevice(), false, true, false, true);
|
||||
TrainSequenceToSequenceTranslator(DeviceDescriptor::GPUDevice(0), false, false, true, false);
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче