CNTK V2 library: Enabled non scalar initial value for Past/Future value nodes and other bug fixes

This commit is contained in:
Amit Agarwal 2016-09-17 14:27:27 -07:00
Родитель dfcade2d8c
Коммит 867ace750c
10 изменённых файлов: 249 добавлений и 82 удалений

Просмотреть файл

@ -286,7 +286,8 @@ namespace CNTK
///
std::wstring AsString() const
{
std::wstringstream wStrStream(L"{");
std::wstringstream wStrStream;
wStrStream << L"{";
for (size_t i = 0; i < Rank(); i++)
{
if (i != 0)
@ -2491,7 +2492,7 @@ namespace CNTK
///
/// Create an instance of the CNTK built-in splice operation to splice together all the specified tensor operands into a single output tensor
///
CNTK_API FunctionPtr Splice(const std::vector<Variable>& operands, size_t axis, const std::wstring& name = L"");
CNTK_API FunctionPtr Splice(const std::vector<Variable>& operands, const Axis& axis, const std::wstring& name = L"");
///
/// Create a new Function instance which just combines the outputs of the specified list of 'operands' Functions such that the 'Outputs' of the

Просмотреть файл

@ -195,8 +195,6 @@ namespace CNTK
auto initialStateVar = Constant::Scalar(node->As<PastValueNode<ElementType>>()->InitialActivationValue(), AsDeviceDescriptor(node->GetDeviceId()));
inputVars.push_back(initialStateVar);
}
else
LogicError("LoadLegacyModel: Currently loading models with non-scalar initial value for PastValueNode/FutureValueNode is unsupported");
primitiveFunctionConfigParameters[PrimitiveFunction::AttributeNameOffset] = (size_t)node->As<PastValueNode<ElementType>>()->TimeStep();
opType = PrimitiveOpType::PastValue;
@ -208,8 +206,6 @@ namespace CNTK
auto initialStateVar = Constant::Scalar(node->As<FutureValueNode<ElementType>>()->InitialActivationValue(), AsDeviceDescriptor(node->GetDeviceId()));
inputVars.push_back(initialStateVar);
}
else
LogicError("LoadLegacyModel: Currently loading models with non-scalar initial value for PastValueNode/FutureValueNode is unsupported");
primitiveFunctionConfigParameters[PrimitiveFunction::AttributeNameOffset] = (size_t)node->As<FutureValueNode<ElementType>>()->TimeStep();
opType = PrimitiveOpType::FutureValue;

Просмотреть файл

@ -134,6 +134,7 @@
<ClInclude Include="Utils.h" />
<ClInclude Include="stdafx.h" />
<ClInclude Include="targetver.h" />
<ClInclude Include="Value.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="BackCompat.cpp" />

Просмотреть файл

@ -32,6 +32,7 @@
<ClInclude Include="API\CNTKLibraryExperimental.h">
<Filter>API</Filter>
</ClInclude>
<ClInclude Include="Value.h" />
</ItemGroup>
<ItemGroup>
<Filter Include="API">

Просмотреть файл

@ -474,9 +474,8 @@ namespace CNTK
if (!axis1.IsStaticAxis() || !axis2.IsStaticAxis())
LogicError("TransposeAxes operation currently does not support transposing dynamic axes");
auto transposedTensorShape = AsTensorShape(inputs[0].Shape());
transposedTensorShape.SwapDimsInPlace(axis1.StaticAxisIndex(), axis2.StaticAxisIndex());
outputShape = AsNDShape(transposedTensorShape);
outputShape = inputs[0].Shape();
std::swap(outputShape[axis1.StaticAxisIndex()], outputShape[axis2.StaticAxisIndex()]);
break;
}
case PrimitiveOpType::Slice:
@ -507,7 +506,7 @@ namespace CNTK
if ((axis.StaticAxisIndex() < outputTensorShape.GetRank()) && (0 <= realBeginIndex) && (realBeginIndex <= realEndIndex) && (realEndIndex <= sliceAxisDim))
outputTensorShape.NarrowTo(axis.StaticAxisIndex(), realBeginIndex, realEndIndex);
outputShape = AsNDShape(outputTensorShape);
outputShape = AsNDShape(outputTensorShape, /*allowNonFlattenableTensorShapes = */ true);
break;
}
case PrimitiveOpType::Reshape:
@ -611,15 +610,11 @@ namespace CNTK
Variable inputOperandVar = inputs[0];
Variable initialStateVar = inputs[1];
// TODO: Current we only support a scalar initial state
if (!initialStateVar.IsConstant() || (initialStateVar.Shape().Rank() > 0))
LogicError("Currently PastValue/FutureValue Function only supports scalar initial state");
// TODO: We currently only support input operand with 1 dynamic axis for PastValue/FutureValue
if (inputOperandVar.DynamicAxes().size() != 2)
LogicError("Currently PastValue/FutureValue Function only supports input operand with with 2 dynamic axis (1 sequence-axis and 1 batch-axis)");
outputShape = UnaryElementwiseOpOutputShape(inputs[0].Shape());
outputShape = BinaryElementwiseOpOutputShape(op, inputs[0].Shape(), inputs[1].Shape());
break;
}
case PrimitiveOpType::ReduceElements:
@ -975,16 +970,11 @@ namespace CNTK
Variable inputOperandVar = functionInputs[0];
Variable initialStateVar = functionInputs[1];
// Get the intial state of the PastValue/FutureValue operation
ElementType initStateValue;
NDArrayView tempView({}, &initStateValue, 1, DeviceDescriptor::CPUDevice());
tempView.CopyFrom(*(Constant(initialStateVar).Value()));
size_t offset = primitiveFunction->Attributes()[PrimitiveFunction::AttributeNameOffset].Value<size_t>();
if (op == PrimitiveOpType::PastValue)
computationNodePtr = New<PastValueNode<ElementType>>(network->GetDeviceId(), functionName, (float)initStateValue, AsTensorShape(inputOperandVar.Shape()), offset);
computationNodePtr = New<PastValueNode<ElementType>>(network->GetDeviceId(), functionName, AsTensorShape(inputOperandVar.Shape()), offset);
else
computationNodePtr = New<FutureValueNode<ElementType>>(network->GetDeviceId(), functionName, (float)initStateValue, AsTensorShape(inputOperandVar.Shape()), offset);
computationNodePtr = New<FutureValueNode<ElementType>>(network->GetDeviceId(), functionName, AsTensorShape(inputOperandVar.Shape()), offset);
break;
}
@ -1043,9 +1033,14 @@ namespace CNTK
// Let's reorder inputNodesBasePtrs properly since the ordering of inputs of CNTK internal ComputationNode may be different from the PrimitiveFunction inputs ordering
ReorderAsCNTKComputationNodeInputs(op, inputNodesBasePtrs);
if (computationNodePtr->Is<INumInputs>())
inputNodesBasePtrs.resize(computationNodePtr->As<INumInputs>()->GetExpectedNumInputs());
else if ((op == PrimitiveOpType::PastValue) || (op == PrimitiveOpType::FutureValue)) // TODO: Temporary hack to be replaced with support for non-scalar ininital state value operands
inputNodesBasePtrs.resize(1);
{
auto computationNodeExpectedInputCount = computationNodePtr->As<INumInputs>()->GetExpectedNumInputs();
if (computationNodeExpectedInputCount != inputNodesBasePtrs.size())
LogicError("Input count mismatch: The Primitive function for op %s has %d inputs while the corresponding ComputationNode has %d inputs",
PrimitiveOpTypeName(op),
inputNodesBasePtrs.size(),
computationNodeExpectedInputCount);
}
network->AddNodeToNetAndAttachInputs(computationNodePtr, inputNodesBasePtrs);
@ -1185,6 +1180,9 @@ namespace CNTK
}
}
#ifdef _DEBUG
m_computationNetwork->SetTraceLevel(1);
#endif
m_computationNetwork->CompileNetwork();
// Verify that the shapes of the output Variables that we computed match the corresponding nodes in the ComputationNetwork
@ -1237,6 +1235,14 @@ namespace CNTK
if (var.DynamicAxes().size() > 2)
LogicError("More than 2 dynamic axis for a variable is currently unsupported");
//if (value->Data()->Shape().SubShape(0, var.Shape().Rank()) != var.Shape())
//{
// InvalidArgument("The %s dimensions of the Value shape (%s) do not match the shape of the variable (%s) that it corresponds to!",
// Internal::IsReversingTensorShapesInErrorMessagesEnabled() ? "trailing" : "leading",
// AsStringForErrorReporting(value->Data()->Shape()).c_str()),
// AsStringForErrorReporting(var.Shape()).c_str()));
//}
size_t maxNumTimeSteps = value->Data()->Shape()[var.Shape().Rank()];
size_t numSequences = value->Data()->Shape()[var.Shape().Rank() + 1];
@ -1280,9 +1286,7 @@ namespace CNTK
currentSequenceLength++;
}
else
{
currentSequenceEndAlreadyFound = true;
}
}
sequenceLengths[i] = currentSequenceLength;
@ -1595,13 +1599,36 @@ namespace CNTK
const DeviceDescriptor& computeDevice,
const std::unordered_set<Variable>& outputsToRetainBackwardStateFor)
{
// TODO: How about zero argument functions?
// Validate arguments and outputs
if (outputs.empty())
InvalidArgument("CompositeFunction::Forward: At least one output has to be specified!");
// Make sure that the DataType of the variables and corresponding values match
// TODO: We need a better way to determine the ElementType for the network
auto dataType = arguments.begin()->second->Data()->GetDataType();
auto dataType = DataType::Unknown;
for (auto variableValuePair : arguments)
{
if (dataType == DataType::Unknown)
dataType = variableValuePair.first.GetDataType();
else if (dataType != variableValuePair.first.GetDataType())
LogicError("CompositeFunction::Forward: The DataType of all arguments of the Function must be same");
}
if (dataType == DataType::Unknown)
{
for (auto variableValuePair : outputs)
{
if (dataType == DataType::Unknown)
dataType = variableValuePair.first.GetDataType();
}
}
if (dataType == DataType::Float)
GetComputationNetwork<float>(computeDevice, outputsToRetainBackwardStateFor, true);
else
else if (dataType == DataType::Double)
GetComputationNetwork<double>(computeDevice, outputsToRetainBackwardStateFor, true);
else
InvalidArgument("Unsupported DataType %s", DataTypeName(dataType));
// TODO: Avoid copying the data when possible
@ -2075,10 +2102,13 @@ namespace CNTK
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Select, std::vector<Variable>({ condition, leftOperand, rightOperand }), Dictionary(), name), name);
}
FunctionPtr Splice(const std::vector<Variable>& operands, size_t axis, const std::wstring& name /*= L""*/)
FunctionPtr Splice(const std::vector<Variable>& operands, const Axis& axis, const std::wstring& name /*= L""*/)
{
if (!axis.IsStaticAxis())
LogicError("Splice: Currently only splicing along a static axis is supported");
auto additionalProperties = Dictionary();
additionalProperties[PrimitiveFunction::AttributeNameAxis] = Axis(axis);
additionalProperties[PrimitiveFunction::AttributeNameAxis] = axis;
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Splice, operands, std::move(additionalProperties), name), name);
}
@ -2209,15 +2239,23 @@ namespace CNTK
FunctionPtr ZeroesLike(const Variable& operand)
{
if (operand.Shape().Rank() > 1)
LogicError("Internal::ZeroesLike: Currently only 1D inputs are supported!");
if (operand.IsSparse())
{
if (operand.Shape().Rank() > 1)
LogicError("Internal::ZeroesLike: Currently only 1D sparse inputs are supported!");
return Times(Constant({ 1, operand.Shape()[0] }, operand.GetDataType(), 0.0), operand);
}
else
{
auto rowSliceFunc = Internal::Slice(operand, Axis(0), 0, 1);
return Minus(rowSliceFunc, rowSliceFunc);
auto output = Minus(rowSliceFunc, rowSliceFunc);
// Reduce away all but the static axis 0
for (size_t i = 1; i < output->Output().Shape().Rank(); ++i)
output = ReduceSum(output, Axis(i));
return output;
}
}

Просмотреть файл

@ -71,13 +71,16 @@ namespace CNTK
return DeviceDescriptor::GPUDevice(deviceId);
}
inline NDShape AsNDShape(const Microsoft::MSR::CNTK::TensorShape& tensorShape)
inline NDShape AsNDShape(const Microsoft::MSR::CNTK::TensorShape& tensorShape, bool allowNonFlattenableTensorShapes = false)
{
// The TensorShape should be flattenable to 1D
for (size_t i = 1; i < tensorShape.GetRank(); ++i)
if (!allowNonFlattenableTensorShapes)
{
if (!tensorShape.CanFlatten(i))
InvalidArgument("AsNDShape() can only be called for TensorShapes that can be flattened to 1D");
// The TensorShape should be flattenable to 1D
for (size_t i = 1; i < tensorShape.GetRank(); ++i)
{
if (!tensorShape.CanFlatten(i))
InvalidArgument("AsNDShape() can only be called for TensorShapes that can be flattened to 1D");
}
}
return std::vector<size_t>(tensorShape.GetDims().begin(), tensorShape.GetDims().end());

Просмотреть файл

@ -193,6 +193,23 @@ std::pair<CNTK::FunctionPtr, CNTK::FunctionPtr> LSTMPComponentWithSelfStabilizat
return { LSTMCell.first, LSTMCell.second };
}
// This is currently unused
inline CNTK::FunctionPtr SimpleRecurrentLayer(const CNTK::Variable& input, const CNTK::NDShape& outputDim, const std::function<CNTK::FunctionPtr(const CNTK::Variable&)>& recurrenceHook, const CNTK::DeviceDescriptor& device)
{
auto dh = CNTK::PlaceholderVariable(outputDim, input.DynamicAxes());
unsigned long seed = 1;
auto createProjectionParam = [device, &seed](size_t outputDim, size_t inputDim) {
return CNTK::Parameter(CNTK::NDArrayView::RandomUniform<float>({ outputDim, inputDim }, -0.5, 0.5, seed++, device));
};
auto hProjWeights = createProjectionParam(outputDim[0], outputDim[0]);
auto inputProjWeights = createProjectionParam(outputDim[0], input.Shape()[0]);
auto output = Times(hProjWeights, recurrenceHook(dh)) + Times(inputProjWeights, input);
return output->ReplacePlaceholders({ { dh, output } });
}
inline std::vector<size_t> GenerateSequenceLengths(size_t numSequences, size_t maxAllowedSequenceLength)
{
std::vector<size_t> sequenceLengths(numSequences);
@ -208,13 +225,13 @@ inline std::vector<size_t> GenerateSequenceLengths(size_t numSequences, size_t m
}
template <typename ElementType>
inline std::vector<std::vector<ElementType>> GenerateSequences(const std::vector<size_t>& sequenceLengths, size_t dim)
inline std::vector<std::vector<ElementType>> GenerateSequences(const std::vector<size_t>& sequenceLengths, const CNTK::NDShape& sampleShape)
{
size_t numSequences = sequenceLengths.size();
std::vector<std::vector<ElementType>> sequences;
for (size_t i = 0; i < numSequences; ++i)
{
std::vector<ElementType> currentSequence(dim * sequenceLengths[i]);
std::vector<ElementType> currentSequence(sampleShape.TotalSize() * sequenceLengths[i]);
for (size_t j = 0; j < currentSequence.size(); ++j)
currentSequence[j] = ((ElementType)rand()) / RAND_MAX;
@ -244,17 +261,21 @@ inline std::vector<std::vector<size_t>> GenerateOneHotSequences(const std::vecto
}
template <typename ElementType>
inline CNTK::ValuePtr GenerateSequences(const std::vector<size_t>& sequenceLengths, size_t dim, const CNTK::DeviceDescriptor& device, bool oneHot)
inline CNTK::ValuePtr GenerateSequences(const std::vector<size_t>& sequenceLengths, const CNTK::NDShape& sampleShape, const CNTK::DeviceDescriptor& device, bool oneHot)
{
if (!oneHot)
{
std::vector<std::vector<ElementType>> sequences = GenerateSequences<ElementType>(sequenceLengths, dim);
return CNTK::Value::Create({ dim }, sequences, device, true);
std::vector<std::vector<ElementType>> sequences = GenerateSequences<ElementType>(sequenceLengths, sampleShape);
return CNTK::Value::Create(sampleShape, sequences, device, true);
}
else
{
std::vector<std::vector<size_t>> oneHotSequences = GenerateOneHotSequences(sequenceLengths, dim);
return CNTK::Value::Create<ElementType>({ dim }, oneHotSequences, device, true);
if (sampleShape.Rank() != 1)
throw std::runtime_error("GenerateSequences can generate one hot sequences only for 1D sample shapes");
size_t vocabularySize = sampleShape[0];
std::vector<std::vector<size_t>> oneHotSequences = GenerateOneHotSequences(sequenceLengths, vocabularySize);
return CNTK::Value::Create<ElementType>(vocabularySize, oneHotSequences, device, true);
}
}
@ -296,3 +317,39 @@ inline void PrintTrainingProgress(const CNTK::Trainer& trainer, size_t minibatch
printf("Minibatch %d: CrossEntropy loss = %.8g, Evaluation criterion = %.8g\n", (int)minibatchIdx, trainLossValue, evaluationValue);
}
}
inline std::vector<size_t> GetStrides(const CNTK::NDShape& shape)
{
std::vector<size_t> strides(shape.Rank() - 1);
size_t totalSize = 1;
for (size_t i = 0; i < shape.Rank() - 1; ++i)
{
totalSize *= shape[i];
strides[i] = totalSize;
}
return strides;
}
inline CNTK::NDShape UnflattenedShape(size_t flatennedIdx, const std::vector<size_t>& strides)
{
CNTK::NDShape unflattenedShape(strides.size() + 1);
size_t remainder = flatennedIdx;
for (int i = (int)strides.size() - 1; i >= 0; --i)
{
unflattenedShape[i + 1] = remainder / strides[i];
remainder = remainder % strides[i];
}
unflattenedShape[0] = remainder;
return unflattenedShape;
}
inline size_t FlattenedIndex(const CNTK::NDShape& shape, const std::vector<size_t>& strides)
{
size_t flattenedIdx = shape[0];
for (int i = 0; i < strides.size(); ++i)
flattenedIdx += shape[i + 1] * strides[i];
return flattenedIdx;
};

Просмотреть файл

@ -10,7 +10,7 @@ void TestReduceSum(const DeviceDescriptor& device)
size_t dim = 23;
auto sequenceLengths = GenerateSequenceLengths(numSequences, maxAllowedSequenceLength);
auto sequences = GenerateSequences<float>(sequenceLengths, dim);
auto sequences = GenerateSequences<float>(sequenceLengths, { dim });
ValuePtr sequencesValue = Value::Create({ dim }, sequences, device, true);
// Test ReduceSum along a static axis
@ -113,67 +113,88 @@ void TestReduceSum(const DeviceDescriptor& device)
}
}
void TestSlice(const DeviceDescriptor& device)
void TestSlice(size_t sampleRank, const DeviceDescriptor& device)
{
size_t numSequences = 7;
size_t maxAllowedSequenceLength = 11;
size_t dim = 23;
size_t maxDimSize = 23;
NDShape inputShape(sampleRank);
for (size_t i = 0; i < sampleRank; ++i)
inputShape[i] = (rand() % maxDimSize) + 1;
auto sequenceLengths = GenerateSequenceLengths(numSequences, maxAllowedSequenceLength);
auto sequences = GenerateSequences<float>(sequenceLengths, dim);
ValuePtr sequencesValue = Value::Create({ dim }, sequences, device, true);
auto sequences = GenerateSequences<float>(sequenceLengths, inputShape);
ValuePtr sequencesValue = Value::Create(inputShape, sequences, device, true);
// Test slice along a static axis
{
auto testStaticAxisSlice = [&sequences, &sequenceLengths, dim, sequencesValue, device](int beginOffset, int endOffset)
auto testStaticAxisSlice = [&sequences, &sequenceLengths, inputShape, sequencesValue, device](size_t sliceAxis, int beginOffset, int endOffset)
{
size_t maxActualSequenceLength = sequencesValue->Data()->Shape()[1];
size_t numSequences = sequencesValue->Data()->Shape()[2];
size_t maxActualSequenceLength = sequencesValue->Data()->Shape()[inputShape.Rank()];
size_t numSequences = sequencesValue->Data()->Shape()[inputShape.Rank() + 1];
auto inputVar = InputVariable({ dim }, DataType::Float, L"input");
auto rowSliceFunc = Slice(inputVar, Axis(0), beginOffset, endOffset);
auto inputVar = InputVariable(inputShape, DataType::Float, L"input");
auto sliceFunc = Slice(inputVar, Axis(sliceAxis), beginOffset, endOffset);
NDShape outputShape = rowSliceFunc->Output().Shape().AppendShape({ maxActualSequenceLength, numSequences });
std::vector<float> outputData(outputShape.TotalSize());
ValuePtr outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputShape, outputData, false), sequencesValue->Mask()->DeepClone());
NDShape outputShape = sliceFunc->Output().Shape();
auto outputDataShape = outputShape.AppendShape({ maxActualSequenceLength, numSequences });
std::vector<float> outputData(outputDataShape.TotalSize());
ValuePtr outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputDataShape, outputData, false), sequencesValue->Mask()->DeepClone());
std::unordered_map<Variable, ValuePtr> outputs = { { rowSliceFunc->Output(), outputValue } };
rowSliceFunc->Forward({ { inputVar, sequencesValue } }, outputs, device);
std::unordered_map<Variable, ValuePtr> outputs = { { sliceFunc->Output(), outputValue } };
sliceFunc->Forward({ { inputVar, sequencesValue } }, outputs, device);
size_t rowSliceStartOffset = (beginOffset >= 0) ? beginOffset : (dim + beginOffset);
size_t sliceLength = endOffset - beginOffset;
std::vector<float> expectedOutputValues(sliceLength * maxActualSequenceLength * numSequences);
std::vector<size_t> inputShapeStrides = GetStrides(inputShape);
std::vector<size_t> outputShapeStrides = GetStrides(outputShape);
size_t sliceStartOffset = (beginOffset >= 0) ? beginOffset : (inputShape[sliceAxis] + beginOffset);
std::vector<float> expectedOutputValues(outputShape.TotalSize() * maxActualSequenceLength * numSequences);
for (size_t i = 0; i < numSequences; ++i)
{
size_t currentSequenceLength = sequenceLengths[i];
for (size_t j = 0; j < currentSequenceLength; ++j)
{
for (size_t k = 0; k < sliceLength; ++k)
expectedOutputValues[(((i * maxActualSequenceLength) + j) * sliceLength) + k] = sequences[i][(j * dim) + k + rowSliceStartOffset];
for (size_t k = 0; k < outputShape.TotalSize(); ++k)
{
auto outputIdx = UnflattenedShape(k, outputShapeStrides);
auto inputIdx = outputIdx;
inputIdx[sliceAxis] += sliceStartOffset;
auto flatInputIdx = FlattenedIndex(inputIdx, inputShapeStrides);
expectedOutputValues[(((i * maxActualSequenceLength) + j) * outputShape.TotalSize()) + k] = sequences[i][(j * inputShape.TotalSize()) + flatInputIdx];
}
}
}
FloatingPointVectorCompare(outputData, expectedOutputValues, "testStaticAxisSlice: Forward prop results do not match expected results");
};
testStaticAxisSlice(3, 5);
testStaticAxisSlice(-1, 0);
testStaticAxisSlice(-3, -1);
size_t sliceAxis = 0;
testStaticAxisSlice(sliceAxis, 3, 5);
if (sliceAxis < (inputShape.Rank() - 1))
sliceAxis++;
testStaticAxisSlice(sliceAxis, -1, 0);
if (sliceAxis < (inputShape.Rank() - 1))
sliceAxis++;
testStaticAxisSlice(sliceAxis, - 3, -1);
}
// Test slice along a dynamic axis
{
auto testDynamicAxisSlice = [&sequences, &sequenceLengths, dim, sequencesValue, device](const Axis& axis, int beginOffset, int endOffset)
auto testDynamicAxisSlice = [&sequences, &sequenceLengths, inputShape, sequencesValue, device](const Axis& axis, int beginOffset, int endOffset)
{
if (axis.IsStaticAxis())
RuntimeError("Called the dynamic axis slice test with a static axis");
size_t maxActualSequenceLength = sequencesValue->Data()->Shape()[1];
size_t numSequences = sequencesValue->Data()->Shape()[2];
size_t maxActualSequenceLength = sequencesValue->Data()->Shape()[inputShape.Rank()];
size_t numSequences = sequencesValue->Data()->Shape()[inputShape.Rank() + 1];
size_t sliceLength = endOffset - beginOffset;
auto inputVar = InputVariable({ dim }, DataType::Float, L"input");
auto inputVar = InputVariable(inputShape, DataType::Float, L"input");
auto sliceFunc = Slice(inputVar, axis, beginOffset, endOffset);
size_t outputSequenceAxisLength = (axis == Axis::DefaultDynamicAxis()) ? sliceLength : maxActualSequenceLength;
@ -188,7 +209,7 @@ void TestSlice(const DeviceDescriptor& device)
size_t startSequenceIdx = (axis == Axis::DefaultBatchAxis()) ? ((beginOffset >= 0) ? beginOffset : (numSequences + beginOffset)) : 0;
size_t endSequenceIdx = (axis == Axis::DefaultBatchAxis()) ? ((endOffset > 0) ? endOffset : (numSequences + endOffset)) : numSequences;
std::vector<float> expectedOutputValues(dim * outputSequenceAxisLength * outputBatchAxisLength);
std::vector<float> expectedOutputValues(inputShape.TotalSize() * outputSequenceAxisLength * outputBatchAxisLength);
for (size_t i = startSequenceIdx; i < endSequenceIdx; ++i)
{
size_t currentSequenceLength = sequenceLengths[i];
@ -196,8 +217,8 @@ void TestSlice(const DeviceDescriptor& device)
size_t endFrameIdx = (axis == Axis::DefaultDynamicAxis()) ? ((endOffset > 0) ? endOffset : (currentSequenceLength + endOffset)) : currentSequenceLength;
for (size_t j = startFrameIdx; j < endFrameIdx; ++j)
{
for (size_t k = 0; k < dim; ++k)
expectedOutputValues[((((i - startSequenceIdx) * outputSequenceAxisLength) + (j - startFrameIdx)) * dim) + k] = sequences[i][(j * dim) + k];
for (size_t k = 0; k < inputShape.TotalSize(); ++k)
expectedOutputValues[((((i - startSequenceIdx) * outputSequenceAxisLength) + (j - startFrameIdx)) * inputShape.TotalSize()) + k] = sequences[i][(j * inputShape.TotalSize()) + k];
}
}
@ -344,13 +365,59 @@ void TestRecurrentFunctionCloning()
CompareFunctions(clonedFunctionWithParametersShared, clonedFunctionWithParametersFrozen, ParameterCloningMethod::Freeze, cloningReplacements, visitedFunctions);
}
void TestTranspose(size_t numAxes, size_t axis1, size_t axis2, const DeviceDescriptor& device)
{
srand(1);
size_t maxDimSize = 15;
NDShape inputShape(numAxes);
for (size_t i = 0; i < numAxes; ++i)
inputShape[i] = (rand() % maxDimSize) + 1;
auto inputVar = InputVariable(inputShape, DataType::Float, false, L"leftInput");
auto transposeFunc = TransposeAxes(inputVar, Axis(axis1), Axis(axis2));
std::vector<float> inputData(inputShape.TotalSize());
for (size_t i = 0; i < inputData.size(); ++i)
inputData[i] = ((float)rand()) / RAND_MAX;
auto inputValueShape = inputShape.AppendShape({ 1, 1 });
ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputValueShape, inputData, true));
NDShape outputShape = transposeFunc->Output().Shape();
NDShape outputValueShape = outputShape.AppendShape({ 1, 1 });
std::vector<float> outputData(outputValueShape.TotalSize());
ValuePtr outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputValueShape, outputData, false));
std::unordered_map<Variable, ValuePtr> outputs = { { transposeFunc->Output(), outputValue } };
transposeFunc->Forward({ { inputVar, inputValue } }, outputs, device);
std::vector<size_t> inputShapeStrides = GetStrides(inputShape);
std::vector<size_t> outputShapeStrides = GetStrides(outputShape);
// Verify forward prop results
std::vector<float> expectedOutputValues(outputShape.TotalSize());
for (size_t i = 0; i < expectedOutputValues.size(); ++i)
{
auto unflattenedShape = UnflattenedShape(i, outputShapeStrides);
std::swap(unflattenedShape[axis1], unflattenedShape[axis2]);
size_t flattenedIndex = FlattenedIndex(unflattenedShape, inputShapeStrides);
expectedOutputValues[i] = inputData[flattenedIndex];
}
FloatingPointVectorCompare(outputData, expectedOutputValues, "TestTimesAndPlus: Forward prop results do not match expected results");
}
void FunctionTests()
{
TestSlice(DeviceDescriptor::CPUDevice());
TestSlice(DeviceDescriptor::GPUDevice(0));
TestSlice(2, DeviceDescriptor::CPUDevice());
TestSlice(1, DeviceDescriptor::GPUDevice(0));
TestReduceSum(DeviceDescriptor::CPUDevice());
TestReduceSum(DeviceDescriptor::GPUDevice(0));
TestRecurrentFunctionCloning();
TestTranspose(2, 0, 1, DeviceDescriptor::CPUDevice());
TestTranspose(3, 1, 2, DeviceDescriptor::GPUDevice(0));
}

Просмотреть файл

@ -80,7 +80,7 @@ void TestRecurrentNetworkCreation(const DeviceDescriptor& device, bool testSaveA
{
std::vector<size_t> sequenceLengths = GenerateSequenceLengths(numSequences, maxAllowedSequenceLength);
ValuePtr inputValue = GenerateSequences<ElementType>(sequenceLengths, inputDim, device, false);
ValuePtr inputValue = GenerateSequences<ElementType>(sequenceLengths, { inputDim }, device, false);
std::vector<std::vector<ElementType>> labelsData;
for (size_t i = 0; i < numSequences; ++i)

Просмотреть файл

@ -59,6 +59,9 @@ void TrainSequenceToSequenceTranslator(const DeviceDescriptor& device, bool useS
{
thoughtVectorH = Reshape(thoughtVectorH, thoughtVectorH->Output().Shape().AppendShape({ 1 }));
thoughtVectorC = Reshape(thoughtVectorC, thoughtVectorC->Output().Shape().AppendShape({ 1 }));
labelEmbedding = Reshape(labelEmbedding, labelEmbedding->Output().Shape().AppendShape({ 1 }));
labelSentenceStartEmbeddedScattered = Reshape(labelSentenceStartEmbeddedScattered, labelSentenceStartEmbeddedScattered->Output().Shape().AppendShape({ 1 }));
}
auto thoughtVectorBroadcastH = Sequence::BroadcastAs(thoughtVectorH, labelEmbedding);
@ -179,6 +182,6 @@ void TrainSequenceToSequenceTranslator(const DeviceDescriptor& device, bool useS
void TrainSequenceToSequenceTranslator()
{
// TODO: Also test with sparse input variables in the graph
TrainSequenceToSequenceTranslator(DeviceDescriptor::GPUDevice(0), false, false, true, false);
TrainSequenceToSequenceTranslator(DeviceDescriptor::CPUDevice(), false, true, false, true);
TrainSequenceToSequenceTranslator(DeviceDescriptor::GPUDevice(0), false, false, true, false);
}