From 933adb275a10b8a379bd11f896fcaa250a683d17 Mon Sep 17 00:00:00 2001 From: Amit Agarwal Date: Sat, 4 Jun 2016 18:26:52 -0700 Subject: [PATCH] Add recurrence and variable length sequence support for the 'Function' abstraction --- CNTK.sln | 3 + Source/CNTKv2LibraryDll/API/CNTKLibrary.h | 524 +++++++++++++++-- .../API/CNTKLibraryInternals.h | 8 + .../CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj | 1 + .../CNTKv2LibraryDll.vcxproj.filters | 1 + Source/CNTKv2LibraryDll/Common.cpp | 4 + Source/CNTKv2LibraryDll/Function.cpp | 555 +++++++++++++++--- Source/CNTKv2LibraryDll/Function.h | 128 +++- .../LibraryTests/FeedForwardTests.cpp | 10 +- .../LibraryTests/LibraryTests.vcxproj | 2 + .../LibraryTests/LibraryTests.vcxproj.filters | 3 + Source/CNTKv2LibraryDll/LibraryTests/Main.cpp | 2 + .../LibraryTests/RecurrentFunctionTests.cpp | 440 ++++++++++++++ .../LibraryTests/TensorTests.cpp | 6 +- Source/CNTKv2LibraryDll/NDArrayView.cpp | 66 +-- Source/CNTKv2LibraryDll/NDMask.cpp | 109 ++++ Source/CNTKv2LibraryDll/Utils.cpp | 12 + Source/CNTKv2LibraryDll/Utils.h | 27 +- Source/CNTKv2LibraryDll/Value.cpp | 114 +++- Source/Common/Include/Sequences.h | 7 +- Source/Math/CPUMatrix.cpp | 2 + Source/Math/GPUMatrix.cu | 1 + Source/Math/GPUSparseMatrix.cu | 1 + Source/Math/Matrix.cpp | 2 + 24 files changed, 1819 insertions(+), 209 deletions(-) create mode 100644 Source/CNTKv2LibraryDll/LibraryTests/RecurrentFunctionTests.cpp create mode 100644 Source/CNTKv2LibraryDll/NDMask.cpp diff --git a/CNTK.sln b/CNTK.sln index 3be62b637..340ce7299 100644 --- a/CNTK.sln +++ b/CNTK.sln @@ -1287,6 +1287,9 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKv2LibraryDll", "Source\ EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LibraryTests", "Source\CNTKv2LibraryDll\LibraryTests\LibraryTests.vcxproj", "{F4CC3AB2-0DB2-4281-929A-2E68E30F0F6E}" + ProjectSection(ProjectDependencies) = postProject + {E5606ECE-48CA-4464-BB12-09D81D02B9EF} = {E5606ECE-48CA-4464-BB12-09D81D02B9EF} + EndProjectSection EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/Source/CNTKv2LibraryDll/API/CNTKLibrary.h b/Source/CNTKv2LibraryDll/API/CNTKLibrary.h index fcf1fdb2d..912d7379e 100644 --- a/Source/CNTKv2LibraryDll/API/CNTKLibrary.h +++ b/Source/CNTKv2LibraryDll/API/CNTKLibrary.h @@ -16,6 +16,7 @@ #include #include #include +#include namespace CNTK { @@ -24,6 +25,7 @@ namespace CNTK /// enum class DataType { + Unknown, Float, Double, @@ -413,13 +415,13 @@ namespace CNTK /// Static method to construct a new NDArrayView object whose contents are drawn from a normal distribution with the specified mean and standard deviation.. /// template - static NDArrayViewPtr RandomNormal(const NDShape& shape, double mean, double stdDev, unsigned long seed, const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()); + static NDArrayViewPtr RandomNormal(const NDShape& shape, double mean, double stdDev, unsigned long seed = 1, const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()); /// /// Static method to construct a new NDArrayView object whose contents are drawn from a uniform distribution in the specified value range. /// template - static NDArrayViewPtr RandomUniform(const NDShape& shape, double rangeStart, double rangeEnd, unsigned long seed, const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()); + static NDArrayViewPtr RandomUniform(const NDShape& shape, double rangeStart, double rangeEnd, unsigned long seed = 1, const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()); private: // Disallow copy construction and assignment @@ -430,17 +432,20 @@ namespace CNTK NDArrayView& operator=(NDArrayView&&) = delete; NDArrayView(NDArrayView&& other) = delete; + private: + static const size_t AutoSelectRowColSplitPoint = SIZE_MAX; + private: NDArrayView(CNTK::DataType dataType, const DeviceDescriptor& device, CNTK::StorageFormat storageType, const NDShape& viewShape, bool readOnly, void* tensorView); template - std::shared_ptr> GetMatrixImpl() const; + std::shared_ptr> GetMatrixImpl(size_t rowColSplitPoint) const; template - std::shared_ptr> GetMatrix() const; + std::shared_ptr> GetMatrix(size_t rowColSplitPoint = AutoSelectRowColSplitPoint) const; template - std::shared_ptr> GetWritableMatrix(); + std::shared_ptr> GetWritableMatrix(size_t rowColSplitPoint = AutoSelectRowColSplitPoint); template const Microsoft::MSR::CNTK::TensorView* GetTensorView() const; @@ -461,9 +466,91 @@ namespace CNTK void* m_tensorView; }; + /// + /// Denotes a multi-dimensional mask used for specifying specific sections of a NDArrayView object as masked/invalid. + /// This type denotes a view and there may be multiple simultaneous views of the data underlying a NDMask instance. + /// + class CNTK_API NDMask final : public _Internal::_ReferenceCounter + { + friend class CompositeFunction; + + public: + /// + /// Construct a new Mask object of specified shape + /// + explicit NDMask(const NDShape& shape, const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()); + + /// + /// Destruct 'this' mask object + /// + ~NDMask(); + + /// + /// Mask out the specified sub-section of 'this' mask + /// + void MaskSection(const std::vector& sectionOffset, const NDShape& sectionShape); + + /// + /// Clear the mask; i.e. unmask all currently masked values + /// + void Clear(); + + /// + /// Returns the descriptor of the device that 'this' mask resides on + /// + DeviceDescriptor Device() const + { + return m_device; + } + + /// + /// Returns the shape 'this' mask. + /// + NDShape Shape() const + { + return m_maskShape; + } + + /// + /// Creates a new NDMask with newly allocated storage on the same device as 'this' mask and copies 'this' mask's contents into the newly allocated mask. + /// + NDMaskPtr DeepClone() const; + + /// + /// Creates a new NDMask which is an alias of 'this' mask. + /// + NDMaskPtr Alias() const; + + /// + /// Copies the contents of the 'source' NDMask to 'this' mask. + /// The shapes of the 'source' mask and 'this' mask must be identical. + /// + void CopyFrom(const NDMask& source); + + private: + NDMask(const NDShape& shape, Microsoft::MSR::CNTK::Matrix* matrix); + Microsoft::MSR::CNTK::Matrix* GetMatrix() const; + + // Disallow copy construction and assignment + NDMask(const NDMask&) = delete; + NDMask& operator=(const NDMask&) = delete; + + // Disallow move construction and assignment + NDMask& operator=(NDMask&&) = delete; + NDMask(NDMask&& other) = delete; + + private: + DeviceDescriptor m_device; + NDShape m_maskShape; + + Microsoft::MSR::CNTK::Matrix* m_matrixView; + }; + /// - /// Denotes a multi-dimensional array with an optional mask and is - /// the actual data fed into or produced from a computation. + /// Denotes a multi-dimensional array with an optional mask and is the actual data fed into or produced from a computation. + /// The mask is typically lower dimensionailty than the data, meaning data is masked in coarse individual sample units where + /// sample shape is data.Shape().SubShape(0, data.Shape().NumAxes() - mask.Shape().NumAxes) + /// Also, note that the size of the data's trailing mask.Shape().NumAxes() dimensions must match the mask shape dimensions. /// class CNTK_API Value : public _Internal::_ReferenceCounter { @@ -473,6 +560,18 @@ namespace CNTK /// Value(const NDArrayViewPtr& data); + /// + /// A multi-dimensional value with an associated mask. + /// + Value(const NDArrayViewPtr& data, const NDMaskPtr& mask); + + /// + /// Create a new Value object containing a collection of variable length sequences. + /// The created Value object contains a copy of the specified 'sequences' data. + /// + template + static ValuePtr Create(const NDShape& sampleShape, const std::vector>& sequences, const DeviceDescriptor& device, bool readOnly = false); + /// /// Destruct 'this' Value object. /// @@ -483,6 +582,27 @@ namespace CNTK /// virtual NDArrayViewPtr Data() const; + /// + /// Returns the NDMask object corresponding to the mask associated with 'this value object. + /// + virtual NDMaskPtr Mask() const; + + /// + /// Creates a new Value with newly allocated storage on the same device as 'this' Value and copies 'this' Value's contents into the newly allocated Value. + /// + virtual ValuePtr DeepClone(bool readOnly = false) const; + + /// + /// Creates a new Value which is an alias of 'this' Value. + /// + virtual ValuePtr Alias(bool readOnly = false) const; + + /// + /// Copies the contents of the 'source' Value to 'this' Value. + /// The shapes of the 'source' Value's data and mask must be identical to 'this' Value's data and mask. + /// + virtual void CopyFrom(const Value& source); + private: // Disallow copy construction and assignment Value(const Value&) = delete; @@ -494,8 +614,170 @@ namespace CNTK private: NDArrayViewPtr m_data; + NDMaskPtr m_mask; }; + /// + /// Denotes an Axis of a Variable and is used for specifying the axes parameters of certain Functions such as reductions. + /// Besides the static axes corresponding to each of the axes of the Variable's shape, Input and Output Variables + /// also have one or more dynamic axes (corresponding to the sequence dimensions) and one implicit batch axis denoting the axes + /// along which multiple sequences are batched in the Values corresponding to the variable when performing computations. + /// + class Axis final + { + public: + /// + /// Construct an Axis object denoting a static axis with the specified index. + /// + Axis(size_t staticAxisIdx) + : m_staticAxisIdx(staticAxisIdx) + { + const wchar_t* staticAxisNamePrefix = L"staticAxis_"; + std::wstring tempName = staticAxisNamePrefix; + tempName = tempName + std::to_wstring(staticAxisIdx); + m_name = CopyString(tempName.c_str()); + } + + /// + /// Construct a dynamic axis with the specified name. + /// + Axis(const std::wstring& name) + : m_staticAxisIdx(SIZE_MAX) + { + m_name = CopyString(name.c_str()); + } + + /// + /// Copy constructor. + /// + Axis(const Axis& other) + : m_staticAxisIdx(SIZE_MAX), m_name(nullptr) + { + *this = other; + } + + /// + /// Copy assignment. + /// + Axis& operator=(const Axis& other) + { + if (this != &other) + { + delete[] m_name; + + m_staticAxisIdx = other.m_staticAxisIdx; + m_name = (other.m_name != nullptr) ? CopyString(other.m_name) : other.m_name; + } + + return *this; + } + + /// + /// Move constructor. + /// + Axis(Axis&& other) + : m_staticAxisIdx(SIZE_MAX), m_name(nullptr) + { + *this = std::move(other); + } + + /// + /// Move assignment. + /// + Axis& operator=(Axis&& other) + { + assert(this != &other); + + delete[] m_name; + + m_staticAxisIdx = other.m_staticAxisIdx; + m_name = other.m_name; + + other.m_staticAxisIdx = SIZE_MAX; + other.m_name = nullptr; + + return *this; + } + + /// + /// Returns a boolean indicating if 'this' Axis corresponds to a static axis + /// + bool IsStaticAxis() const + { + return m_staticAxisIdx == SIZE_MAX; + } + + /// + /// Returns the axis index if 'this' Axis is a static axis. Throws an exception otherwise. + /// + size_t StaticAxisIndex() const + { + if (!IsStaticAxis()) + InvalidArgument("Cannot query the static axis index for a non-static axis"); + + return m_staticAxisIdx; + } + + /// + /// Static Axis object representing the default dynamic axis. + /// + static Axis DefaultDynamicAxis; + + /// + /// Static Axis object representing the batch axis. + /// + static Axis BatchAxis; + + /// + /// Special Axis object denoting all the axes of the Value object in whose context it is used. + /// + static Axis AllAxes; + + /// + /// Name of 'this' axis + /// + std::wstring Name() const + { + return m_name; + } + + /// + /// Destructor + /// + ~Axis() + { + delete[] m_name; + } + + /// + /// Default constructor; results in an invalid axis object. + /// + Axis() + : m_staticAxisIdx(SIZE_MAX), m_name(nullptr) + { + } + + private: + size_t m_staticAxisIdx; + wchar_t* m_name; + }; + + inline bool operator==(const Axis& first, const Axis& second) + { + if (first.IsStaticAxis() != second.IsStaticAxis()) + return false; + + if (first.IsStaticAxis()) + return first.StaticAxisIndex() == second.StaticAxisIndex(); + else + return first.Name() == second.Name(); + } + + inline bool operator!=(const Axis& first, const Axis& second) + { + return !(first == second); + } + /// /// Enumeration type denoting the kind of a symbolic Variable object /// @@ -504,7 +786,8 @@ namespace CNTK Input, Output, Parameter, - Constant + Constant, + Placeholder }; /// @@ -524,7 +807,7 @@ namespace CNTK /// Create an 'Input' Variable. /// Variable(const NDShape& shape, CNTK::DataType dataType, const std::wstring& name = L"") - : Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, false, name) + : Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, false, { Axis::DefaultDynamicAxis }, name) { } @@ -532,15 +815,15 @@ namespace CNTK /// Create an 'Input' Variable and specify if gradients are to be computed for this input /// Variable(const NDShape& shape, CNTK::DataType dataType, bool needsGradient, const std::wstring& name = L"") - : Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, needsGradient, name) + : Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, needsGradient, { Axis::DefaultDynamicAxis }, name) { } /// /// Create an 'Output' variable /// - Variable(const NDShape& shape, CNTK::DataType dataType, Function* ownerFunction, const std::wstring& name = L"") - : Variable(shape, VariableKind::Output, dataType, ownerFunction, nullptr, false, name) + Variable(const NDShape& shape, CNTK::DataType dataType, Function* ownerFunction, const std::vector& dynamicAxes, const std::wstring& name = L"") + : Variable(shape, VariableKind::Output, dataType, ownerFunction, nullptr, false, dynamicAxes, name) { } @@ -558,6 +841,14 @@ namespace CNTK return m_dataFields->m_shape; } + /// + /// Returns the dynamic axes of 'this' variable + /// + std::vector DynamicAxes() const + { + return m_dataFields->m_dynamicAxes; + } + /// /// Returns the VariableKind of 'this' variable /// @@ -567,7 +858,7 @@ namespace CNTK } /// - /// Returns a boolean value indicating if 'this' variable is a parameter + /// Returns a boolean value indicating if 'this' variable is a Parameter /// bool IsParameter() const { @@ -575,13 +866,21 @@ namespace CNTK } /// - /// Returns a boolean value indicating if 'this' variable is a constant + /// Returns a boolean value indicating if 'this' variable is a Constant /// bool IsConstant() const { return Kind() == VariableKind::Constant; } + /// + /// Returns a boolean value indicating if 'this' variable is a Placeholder + /// + bool IsPlaceholder() const + { + return Kind() == VariableKind::Placeholder; + } + /// /// Returns the name of 'this' variable /// @@ -624,9 +923,8 @@ namespace CNTK } protected: - // Create an 'Input' Variable - Variable(const NDShape& shape, VariableKind varType, CNTK::DataType dataType, const NDArrayViewPtr& value, bool needsGradient, const std::wstring& name) - : Variable(shape, varType, dataType, nullptr, value, needsGradient, name) + Variable(const NDShape& shape, VariableKind varType, CNTK::DataType dataType, const NDArrayViewPtr& value, bool needsGradient, const std::vector& dynamicAxes, const std::wstring& name) + : Variable(shape, varType, dataType, nullptr, value, needsGradient, dynamicAxes, name) { } @@ -637,14 +935,14 @@ namespace CNTK } private: - Variable(const NDShape& shape, VariableKind varType, CNTK::DataType dataType, Function* ownerFunction, const NDArrayViewPtr& value, bool needsGradient, const std::wstring& name) - : m_dataFields(new _VariableFields(shape, varType, dataType, ownerFunction, value, needsGradient, (name == L"") ? nullptr : name.c_str()), [](_Internal::_ReferenceCounter* ptr) { delete ptr; }) + Variable(const NDShape& shape, VariableKind varType, CNTK::DataType dataType, Function* ownerFunction, const NDArrayViewPtr& value, bool needsGradient, const std::vector& dynamicAxes, const std::wstring& name) + : m_dataFields(new _VariableFields(shape, varType, dataType, ownerFunction, value, needsGradient, dynamicAxes, (name == L"") ? nullptr : name.c_str()), [](_Internal::_ReferenceCounter* ptr) { delete ptr; }) { } private: - struct _VariableFields : public _Internal::_ReferenceCounter + struct _VariableFields final : public _Internal::_ReferenceCounter { NDShape m_shape; VariableKind m_varKind; @@ -653,9 +951,10 @@ namespace CNTK NDArrayViewPtr m_value; bool m_needsGradient; wchar_t* m_name; + _Internal::_SimpleVector m_dynamicAxes; - _VariableFields(const NDShape& shape, VariableKind varType, CNTK::DataType type, Function* ownerFunction, const NDArrayViewPtr& value, bool needsGradient, const wchar_t* name) - : m_shape(shape), m_varKind(varType), m_dataType(type), m_ownerFunction(ownerFunction), m_value(value), m_needsGradient(needsGradient), m_name(nullptr) + _VariableFields(const NDShape& shape, VariableKind varType, CNTK::DataType type, Function* ownerFunction, const NDArrayViewPtr& value, bool needsGradient, const std::vector& dynamicAxes, const wchar_t* name) + : m_shape(shape), m_varKind(varType), m_dataType(type), m_ownerFunction(ownerFunction), m_value(value), m_needsGradient(needsGradient), m_dynamicAxes(_Internal::_SimpleVector::CreateSimpleVector(dynamicAxes)), m_name(nullptr) { if (name != nullptr) m_name = CopyString(name); @@ -665,6 +964,15 @@ namespace CNTK { delete[] m_name; } + + private: + // Disallow copy construction and assignment + _VariableFields(const _VariableFields&) = delete; + _VariableFields& operator=(const _VariableFields& other) = delete; + + // Disallow move construction and assignment + _VariableFields(_VariableFields&&) = delete; + _VariableFields& operator=(_VariableFields&&) = delete; }; typedef _Internal::_ReferenceCounterSharedPtr<_VariableFields> _VariableFieldsPtr; @@ -689,7 +997,7 @@ namespace CNTK /// Construct a parameter whose initial contents are a copy of the specified 'value' /// explicit Parameter(const NDArrayViewPtr& value, const std::wstring& name = L"") - : Variable(value->Shape(), VariableKind::Parameter, value->DataType(), value->DeepClone(), true, name) + : Variable(value->Shape(), VariableKind::Parameter, value->DataType(), value->DeepClone(), true, {}, name) { } @@ -700,7 +1008,7 @@ namespace CNTK /// template Parameter(const NDShape& shape, ElemType initValue, const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice(), const std::wstring& name = L"") - : Variable(shape, VariableKind::Parameter, GetDataType(), new NDArrayView(initValue, shape, device), true, name) + : Variable(shape, VariableKind::Parameter, GetDataType(), new NDArrayView(initValue, shape, device), true, {}, name) { } @@ -738,7 +1046,7 @@ namespace CNTK /// Contruct a Constant whose initial contents are a copy of the specified value /// Constant(const NDArrayViewPtr& value, const std::wstring& name = L"") - : Variable(value->Shape(), VariableKind::Constant, value->DataType(), value->DeepClone(true), false, name) + : Variable(value->Shape(), VariableKind::Constant, value->DataType(), value->DeepClone(true), false, {}, name) { } @@ -749,7 +1057,7 @@ namespace CNTK /// template Constant(const NDShape& shape, ElemType initValue, const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice(), const std::wstring& name = L"") - : Variable(shape, VariableKind::Constant, GetDataType(), new NDArrayView(initValue, shape, device), false, name) + : Variable(shape, VariableKind::Constant, GetDataType(), new NDArrayView(initValue, shape, device), false, {}, name) { } @@ -774,6 +1082,38 @@ namespace CNTK static_assert(sizeof(Constant) == sizeof(Variable), "The Constant type should not have any data fields beyond what it's base type 'Variable' has."); + /// + /// Denotes a Placeholder input to a Function. + /// All placeholder inputs of a Function must be replaced with non-placeholder Variables before Forward evaluation of the Function. + /// + class CNTK_API Placeholder final : public Variable + { + template + friend struct std::hash; + + friend class Function; + + public: + /// + /// Contruct a Placeholder with the specified NDShape + /// + explicit Placeholder(const NDShape& shape, const std::wstring& name = L"") + : Variable(shape, VariableKind::Placeholder, DataType::Unknown, nullptr, false, {Axis::DefaultDynamicAxis}, name) + { + } + + /// + /// DownCast a Variable to a Placeholder. Only allowed if the VariableKind is Placeholder and throws an exception otherwise. + /// + explicit Placeholder(const Variable& variable) + : Variable(variable) + { + if (!IsPlaceholder()) + InvalidArgument("A non-placeholder Variable being converted to a Placeholder"); + } + }; + + static_assert(sizeof(Placeholder) == sizeof(Variable), "The Placeholder type should not have any data fields beyond what it's base type 'Variable' has."); #pragma warning(pop) } @@ -801,6 +1141,14 @@ namespace std { return std::hash()(x); } }; + + template <> struct hash + { + size_t operator()(const CNTK::Placeholder& x) const + { + return std::hash()(x); + } + }; } namespace CNTK @@ -822,7 +1170,7 @@ namespace CNTK BackPropState(const FunctionPtr& function) : m_function(function) {} private: - virtual void _ForceRTTIGeneration() + virtual void _ForceRTTIGeneration() final { LogicError("This is an internal method that is never supposed to be called"); } @@ -832,6 +1180,9 @@ namespace CNTK }; typedef _Internal::_ReferenceCounterSharedPtr BackPropStatePtr; +#pragma warning(push) +#pragma warning(disable : 4251 4275) + /// /// Represents a function (optionally differentiable) /// A Function is a symbolic entity with zero or more input arguments and one or more outputs. @@ -839,8 +1190,10 @@ namespace CNTK /// A Function effectively is an arbitrary computation graph composed of other primitive Functions, where Variable objects /// for the edges and leaves of the graph. /// - class Function : public _Internal::_ReferenceCounter + class CNTK_API Function : public _Internal::_ReferenceCounter { + friend class CompositeFunction; + public: /// /// Computes and stores the values of speficied variables in the 'outputs' map, using provided 'inputs' values corresponding @@ -944,7 +1297,7 @@ namespace CNTK /// std::vector Inputs() const { - return m_inputs; + return _Inputs(); } /// @@ -971,7 +1324,9 @@ namespace CNTK /// std::unordered_set Arguments() const { - return m_arguments; + return FilteredInputs([](const Variable& var) { + return ((var.Kind() == VariableKind::Input) || (var.Kind() == VariableKind::Output)); + }); } /// @@ -979,7 +1334,9 @@ namespace CNTK /// std::unordered_set Parameters() const { - return m_parameters; + return FilteredInputs([](const Variable& var) { + return (var.Kind() == VariableKind::Parameter); + }); } /// @@ -987,11 +1344,58 @@ namespace CNTK /// std::unordered_set Constants() const { - return m_constants; + return FilteredInputs([](const Variable& var) { + return (var.Kind() == VariableKind::Constant); + }); + } + + /// + /// Returns the set of all Constant variables of 'this' Function. + /// + std::unordered_set Placeholders() const + { + return FilteredInputs([](const Variable& var) { + return (var.Kind() == VariableKind::Placeholder); + }); + } + + FunctionPtr ReplacePlaceholders(const std::unordered_map& placeholderReplacements) + { + // Cannot be called on primitive functions + if (RootFunction() == nullptr) + InvalidArgument("ReplacePlaceholders should never be called on primitive functions"); + + _Internal::_SimpleSet visitedFunctions; + _Internal::_SimpleSet replacedPlaceholders; + auto abiSafePlaceholderReplacementsMap = _Internal::_SimpleMap::CreateSimpleMap(placeholderReplacements); + _ReplacePlaceholders(abiSafePlaceholderReplacementsMap, visitedFunctions, replacedPlaceholders); + + if (abiSafePlaceholderReplacementsMap.Keys() != replacedPlaceholders) + InvalidArgument("At least one of the placeholders specified for replacement was not found in the function"); + + return this; } private: + template + std::unordered_set FilteredInputs(FilterFunction&& filterFunc) const + { + std::unordered_set filteredInputs; + auto inputs = Inputs(); + for (size_t i = 0; i < inputs.size(); ++i) + { + if (filterFunc(inputs[i])) + filteredInputs.insert(VariableType(inputs[i])); + } + + return filteredInputs; + + } + + _Internal::_SimpleVector _Inputs() const; + virtual void _ReplacePlaceholders(const _Internal::_SimpleMap& placeholderReplacements, _Internal::_SimpleSet& visitedFunctions, _Internal::_SimpleSet& replacedPlaceholders); + // Disallow copy and move construction and assignment Function(const Function&) = delete; Function(Function&&) = delete; @@ -1010,21 +1414,13 @@ namespace CNTK { m_inputs.PushBack(inputs[i]); - switch (inputs[i].Kind()) + if ((inputs[i].Kind() != VariableKind::Input) && + (inputs[i].Kind() != VariableKind::Output) && + (inputs[i].Kind() != VariableKind::Parameter) && + (inputs[i].Kind() != VariableKind::Constant) && + (inputs[i].Kind() != VariableKind::Placeholder)) { - case VariableKind::Input: - case VariableKind::Output: - m_arguments.Insert(inputs[i]); - break; - case VariableKind::Parameter: - m_parameters.Insert(*(reinterpret_cast(&(inputs[i])))); - break; - case VariableKind::Constant: - m_constants.Insert(*(reinterpret_cast(&(inputs[i])))); - break; - default: InvalidArgument("Function input has invalid VariableKind!"); - break; } } @@ -1054,13 +1450,11 @@ namespace CNTK _Internal::_SimpleVector m_inputs; _Internal::_SimpleVector m_outputs; - _Internal::_SimpleSet m_arguments; - _Internal::_SimpleSet m_parameters; - _Internal::_SimpleSet m_constants; FunctionPtr m_rootFunction; wchar_t* m_name; }; +#pragma warning(pop) CNTK_API FunctionPtr _Combine(const _Internal::_SimpleVector& operands, const std::wstring& name = L""); @@ -1080,6 +1474,11 @@ namespace CNTK /// CNTK_API FunctionPtr Sigmoid(const Variable& operand, const std::wstring& name = L""); + /// + /// Create an instance of the CNTK built-in elementwise tanh operation with the specified input operand. + /// + CNTK_API FunctionPtr Tanh(const Variable& operand, const std::wstring& name = L""); + /// /// Create an instance of the CNTK built-in operation to compute cross-entropy with softmax for specified input operands. /// @@ -1090,6 +1489,35 @@ namespace CNTK /// CNTK_API FunctionPtr PredictionError(const Variable& prediction, const Variable& labels, const std::wstring& name = L""); + /// + /// Create an instance of the CNTK built-in elementwise exp operation with the specified input operand. + /// + CNTK_API FunctionPtr Exp(const Variable& operand, const std::wstring& name = L""); + + /// + /// Create an instance of the CNTK built-in operation for getting the past value along the lone dynamic axis of the specified operand. + /// Throws an exception of the operand has more than one dynamic axis. + /// + CNTK_API FunctionPtr PastValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name = L""); + + //CNTK_API FunctionPtr PastValue(const Variable& initialState, const Variable& operand, Axis axis, const std::wstring& name = L""); + + /// + /// Create an instance of the CNTK built-in operation for getting the future value along the lone dynamic axis of the specified operand. + /// Throws an exception of the operand has more than one dynamic axis. + /// + CNTK_API FunctionPtr FutureValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name = L""); + + /// + /// Create an instance of the CNTK built-in elementwise multiplication operation on specified tensor input operands. + /// + CNTK_API FunctionPtr ElementTimes(const Variable& leftOperand, const Variable& rightOperand, const std::wstring& name = L""); + + /// + /// Create an instance of the CNTK built-in sum reduction operation on specified tensor input operand along all the axes + /// + CNTK_API FunctionPtr ReduceSum(const Variable& operand, const std::wstring& name = L""); + /// /// Create a new Function instance which just combines the outputs of the specified list of 'operands' Functions such that the 'Outputs' of the /// new 'Function' are union of the 'Outputs' of each of the specified 'operands' Functions. diff --git a/Source/CNTKv2LibraryDll/API/CNTKLibraryInternals.h b/Source/CNTKv2LibraryDll/API/CNTKLibraryInternals.h index 05bef668a..35dd498fa 100644 --- a/Source/CNTKv2LibraryDll/API/CNTKLibraryInternals.h +++ b/Source/CNTKv2LibraryDll/API/CNTKLibraryInternals.h @@ -51,6 +51,7 @@ namespace CNTK { // Forward declarations class CompositeFunction; + class Function; namespace _Internal { @@ -233,6 +234,8 @@ namespace CNTK template friend CNTK_API bool operator==(const _SimpleVector& first, const _SimpleVector& second); + friend class Function; + public: _SimpleVector(); _SimpleVector(size_t numElements, const T& initVal = T()); @@ -361,6 +364,8 @@ namespace CNTK class CNTK_API _SimpleMap final { friend class CompositeFunction; + friend class Function; + public: _SimpleMap(); ~_SimpleMap(); @@ -398,6 +403,9 @@ namespace CNTK class NDArrayView; typedef _Internal::_ReferenceCounterSharedPtr NDArrayViewPtr; + class NDMask; + typedef _Internal::_ReferenceCounterSharedPtr NDMaskPtr; + class Value; typedef _Internal::_ReferenceCounterSharedPtr ValuePtr; diff --git a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj index f575a30b6..cd89f858c 100644 --- a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj +++ b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj @@ -141,6 +141,7 @@ + Create diff --git a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj.filters b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj.filters index fc3b52ee6..0dcd63972 100644 --- a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj.filters +++ b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj.filters @@ -9,6 +9,7 @@ + diff --git a/Source/CNTKv2LibraryDll/Common.cpp b/Source/CNTKv2LibraryDll/Common.cpp index e3060cc67..07e163cb7 100644 --- a/Source/CNTKv2LibraryDll/Common.cpp +++ b/Source/CNTKv2LibraryDll/Common.cpp @@ -13,4 +13,8 @@ namespace CNTK // TODO: Should return the global default device. return GPUDevice(0); } + + /*static*/ Axis Axis::DefaultDynamicAxis = Axis(L"defaultDynamicAxis"); + /*static*/ Axis Axis::BatchAxis = Axis(L"batchAxis"); + /*static*/ Axis Axis::AllAxes = Axis(L"allAxes"); } diff --git a/Source/CNTKv2LibraryDll/Function.cpp b/Source/CNTKv2LibraryDll/Function.cpp index fdcf0e78d..05c253f86 100644 --- a/Source/CNTKv2LibraryDll/Function.cpp +++ b/Source/CNTKv2LibraryDll/Function.cpp @@ -9,6 +9,7 @@ #include "ComputationNetworkBuilder.h" #include "Utils.h" #include "ComputationNode.h" +#include "ReshapingNodes.h" using namespace Microsoft::MSR::CNTK; @@ -16,12 +17,44 @@ bool g_shareNodeValueMatrices = true; namespace CNTK { + _Internal::_SimpleVector Function::_Inputs() const + { + const CompositeFunction* compositeFunction = dynamic_cast(this); + if (compositeFunction == nullptr) + return m_inputs; + else + return _Internal::_SimpleVector::CreateSimpleVector(compositeFunction->DetermineInputs()); + } + + /*virtual*/ void Function::_ReplacePlaceholders(const _Internal::_SimpleMap& placeholderReplacements, _Internal::_SimpleSet& visitedFunctions, _Internal::_SimpleSet& replacedPlaceholders) + { + visitedFunctions.Insert(this); + + for (auto iter = m_inputs.m_vector->begin(); iter != m_inputs.m_vector->end(); ++iter) + { + if (iter->IsPlaceholder()) + { + Placeholder placeholder(*iter); + if (placeholderReplacements.Contains(placeholder)) + { + *iter = placeholderReplacements[placeholder]; + replacedPlaceholders.Insert(placeholder); + } + } + else if ((iter->Kind() == VariableKind::Output) && !visitedFunctions.Contains(iter->Owner())) + iter->Owner()->_ReplacePlaceholders(placeholderReplacements, visitedFunctions, replacedPlaceholders); + } + } + template - /*static*/ ComputationNodeBasePtr CompositeFunction::GetNode(const Variable& variable, ComputationNetworkBuilder& builder, std::unordered_map& variableToNodeMap, std::unordered_map& isVariableRootMap) + /*static*/ ComputationNodeBasePtr CompositeFunction::GetNode(const Variable& variable, Microsoft::MSR::CNTK::ComputationNetworkPtr& network, ComputationNetworkBuilder& builder, std::unordered_map& variableToNodeMap, std::unordered_map& isVariableRootMap) { if (variableToNodeMap.find(variable) != variableToNodeMap.end()) return variableToNodeMap[variable]; + // Lets add a null entry in the map for this variable, to break infinite recursion when processing recurrent graphs + variableToNodeMap[variable] = nullptr; + std::shared_ptr> computationNodePtr; if (variable.IsParameter() || variable.IsConstant()) { @@ -47,7 +80,7 @@ namespace CNTK else { assert(variable.Kind() == VariableKind::Output); - computationNodePtr = GetOutputVariableNode(variable, builder, variableToNodeMap, isVariableRootMap)->As>()->shared_from_this(); + computationNodePtr = GetOutputVariableNode(variable, network, builder, variableToNodeMap, isVariableRootMap)->As>()->shared_from_this(); } variableToNodeMap[variable] = computationNodePtr; @@ -56,9 +89,10 @@ namespace CNTK } template - /*static*/ ComputationNodeBasePtr CompositeFunction::GetOutputVariableNode(const Variable& variable, ComputationNetworkBuilder& builder, std::unordered_map& variableToNodeMap, std::unordered_map& isVariableRootMap) + /*static*/ ComputationNodeBasePtr CompositeFunction::GetOutputVariableNode(const Variable& variable, Microsoft::MSR::CNTK::ComputationNetworkPtr& network, ComputationNetworkBuilder& builder, std::unordered_map& variableToNodeMap, std::unordered_map& isVariableRootMap) { assert(variable.Kind() == VariableKind::Output); + Function* function = variable.Owner(); ComputationNodeBasePtr computationNodePtr; if (dynamic_cast(function) != nullptr) @@ -67,11 +101,15 @@ namespace CNTK // Create the nodes corresponding to the inputs auto functionInputs = primitiveFunction->Inputs(); - std::shared_ptr> input0Node = GetNode(functionInputs[0], builder, variableToNodeMap, isVariableRootMap)->As>()->shared_from_this(); + auto input0BaseNodePtr = GetNode(functionInputs[0], network, builder, variableToNodeMap, isVariableRootMap); + std::shared_ptr> input0Node = (input0BaseNodePtr != nullptr) ? input0BaseNodePtr->As>()->shared_from_this() : nullptr; std::shared_ptr> input1Node; if (functionInputs.size() > 1) - input1Node = GetNode(functionInputs[1], builder, variableToNodeMap, isVariableRootMap)->As>()->shared_from_this(); + { + auto input1BaseNodePtr = GetNode(functionInputs[1], network, builder, variableToNodeMap, isVariableRootMap); + input1Node = (input1BaseNodePtr != nullptr) ? input1BaseNodePtr->As>()->shared_from_this() : nullptr; + } PrimitiveOpType op = primitiveFunction->OpType(); switch (op) @@ -86,15 +124,60 @@ namespace CNTK case PrimitiveOpType::Sigmoid: computationNodePtr = builder.Sigmoid(input0Node, function->Name()); break; + case PrimitiveOpType::Tanh: + computationNodePtr = builder.Tanh(input0Node, function->Name()); + break; case PrimitiveOpType::CrossEntropyWithSoftmax: computationNodePtr = builder.CrossEntropyWithSoftmax(input1Node, input0Node, function->Name()); break; case PrimitiveOpType::PredictionError: computationNodePtr = builder.ErrorPrediction(input1Node, input0Node, function->Name()); break; + case PrimitiveOpType::Exp: + computationNodePtr = builder.Exp(input0Node, function->Name()); + break; + case PrimitiveOpType::PastValue: + case PrimitiveOpType::FutureValue: + { + Variable initialStateVar = functionInputs[0]; + Variable inputOperandVar = functionInputs[1]; + // TODO: Current we only support a scalar initial state + if (!initialStateVar.IsConstant() || (initialStateVar.Shape().NumAxes() > 0)) + LogicError("Currently PastValue/FutureValue Function only supports scalar initial state"); + + // TODO: We currently only support input operand with 1 static axis for PastValue/FutureValue + if (inputOperandVar.Shape().NumAxes() != 1) + LogicError("Currently PastValue/FutureValue Function only supports input operand with 1 static axis"); + + // TODO: We currently only support input operand with 1 dynamic axis for PastValue/FutureValue + if (inputOperandVar.DynamicAxes().size() != 1) + LogicError("Currently PastValue/FutureValue Function only supports input operand with 1 dynamic axis"); + + // Get the intial state of the PastValue/FutureValue operation + ElementType initStateValue; + NDArrayView tempView({}, &initStateValue, 1, DeviceDescriptor::CPUDevice()); + tempView.CopyFrom(*Constant(initialStateVar).Value()); + + if (op == PrimitiveOpType::PastValue) + computationNodePtr = builder.PastValue(input1Node, (float)initStateValue, inputOperandVar.Shape()[0], primitiveFunction->FunctionConfig()[L"stepSize"].GetValue(), function->Name()); + else + computationNodePtr = builder.FutureValue(input1Node, (float)initStateValue, inputOperandVar.Shape()[0], primitiveFunction->FunctionConfig()[L"stepSize"].GetValue(), function->Name()); + + break; + } + case PrimitiveOpType::ElementTimes: + computationNodePtr = builder.ElementTimes(input0Node, input1Node, function->Name()); + break; + case PrimitiveOpType::ReduceSum: + { + // TODO: Use the new ReduceElements node instead of the legacy SumElements node for reduction. Currently ReduceElements has incorrect MBLayout inference. + //computationNodePtr = network->AddNodeToNetAndAttachInputs(New>(network->GetDeviceId(), function->Name(), L"Sum", 0), { input0Node }); + computationNodePtr = builder.Sum(input0Node, function->Name()); + break; + } case PrimitiveOpType::Combine: for (size_t i = 0; i < functionInputs.size(); ++i) - GetNode(functionInputs[i], builder, variableToNodeMap, isVariableRootMap); + GetNode(functionInputs[i], network, builder, variableToNodeMap, isVariableRootMap); computationNodePtr = variableToNodeMap[variable]; @@ -152,7 +235,7 @@ namespace CNTK std::vector forwardRootNodes; for (size_t i = 0; i < rootFunctionOutputs.size(); ++i) { - auto currentRootNode = GetNode(rootFunctionOutputs[i], builder, m_variableToNodeMap, m_isVariableRootMap); + auto currentRootNode = GetNode(rootFunctionOutputs[i], m_computationNetwork, builder, m_variableToNodeMap, m_isVariableRootMap); forwardRootNodes.push_back(currentRootNode); if (backpropRoots.Contains(rootFunctionOutputs[i])) @@ -168,6 +251,26 @@ namespace CNTK m_currentBackpropRoots = backpropRoots; + // In case of recurrence, the inputs of some of the ComputationNodes are not attached due to cycles. + // Now attach those after we have created all ComputationNodes in the network + for (auto iter = m_variableToNodeMap.begin(); iter != m_variableToNodeMap.end(); ++iter) + { + auto currentComputationNodeInputs = iter->second->GetInputs(); + + // TODO: Can any node other than a non PastValue/FutureValue Function have a null input attached after the first pass is finished? + if (std::find(currentComputationNodeInputs.begin(), currentComputationNodeInputs.end(), nullptr) != currentComputationNodeInputs.end()) + { + // We found a null input; this variable must correspond to a PastValue or FutureValue function + const PrimitiveFunction* primitiveFunc = dynamic_cast(iter->first.Owner().GetPtr()); + if ((primitiveFunc == nullptr) || ((primitiveFunc->OpType() != PrimitiveOpType::PastValue) && (primitiveFunc->OpType() != PrimitiveOpType::FutureValue))) + InvalidArgument("Invalid Function graph detected; recurrence found at a Function that is not a PastValue/FutureValue function"); + + // The 2nd input of the PastValue/FutureValue function denotes the recurrent input + auto actualInput = m_variableToNodeMap[primitiveFunc->Inputs()[1]]; + iter->second->AttachInputs({ actualInput }); + } + } + m_computationNetwork->CompileNetwork(); // Verify that the shapes of the output Variables that we computed match the corresponding nodes in the ComputationNetwork @@ -193,42 +296,195 @@ namespace CNTK return m_computationNetwork; } - /*static*/ void CompositeFunction::CopyNDArrayViewToComputationNodeValue(const NDArrayViewPtr& arrayView, ComputationNodeBasePtr node) + template + /*static*/ std::pair>, MBLayoutPtr> CompositeFunction::GetCNTKImplMatrixAndMBLayoutFromValueObject(Variable var, const ValuePtr& value) { - switch (arrayView->DataType()) + if (var.DataType() != value->Data()->DataType()) + LogicError("The Variable's DataType %s does not match the corresponding Value's DataType %s", DataTypeName(var.DataType()), DataTypeName(value->Data()->DataType())); + + if (GetDataType() != value->Data()->DataType()) + LogicError("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(value->Data()->DataType())); + + if (value->Data()->Shape().NumAxes() == var.Shape().NumAxes()) + return{ value->Data()->GetMatrix(), nullptr }; + + if (value->Data()->Shape().NumAxes() != (var.Shape().NumAxes() + var.DynamicAxes().size() + 1)) + InvalidArgument("Value's number of axes should be larger than the Variable's number of axes by 1 + number of dynamic axes"); + + if (var.DynamicAxes().size() > 1) + LogicError("More than one dynamic axis for a variable is currently unsupported"); + + size_t maxNumTimeSteps = value->Data()->Shape()[var.Shape().NumAxes()]; + size_t numSequences = value->Data()->Shape()[var.Shape().NumAxes() + 1]; + + auto mask = value->Mask(); + if ((mask != nullptr) && ((var.Shape().NumAxes() + mask->Shape().NumAxes()) != value->Data()->Shape().NumAxes())) + InvalidArgument("Invalid Value object; the sum of the #axes of the mask and data does not equal the Variable's number of axes by 1 + number of dynamic axes"); + + if ((numSequences == 1) || (maxNumTimeSteps == 1)) { - case DataType::Float: - { - auto& nodeData = node->As>()->Value(); - nodeData.AssignValuesOf(*(arrayView->GetMatrix())); - break; + // The data need not be shuffled + std::shared_ptr> matrixData = value->Data()->GetMatrix(var.Shape().NumAxes()); + auto layout = std::make_shared(); + if (maxNumTimeSteps == 1) + layout->InitAsFrameMode(numSequences); + else + { + layout->Init(1, maxNumTimeSteps); + layout->AddSequence(0, 0, 0, maxNumTimeSteps); + } + + return{ matrixData , layout}; } - case DataType::Double: + else { - auto& nodeData = node->As>()->Value(); - nodeData.AssignValuesOf(*(arrayView->GetMatrix())); - break; - } - default: - LogicError("Unsupported DataType %s", DataTypeName(arrayView->DataType())); - break; + std::vector sequenceLengths(numSequences, maxNumTimeSteps); + if (mask != nullptr) + { + // Determine the sequence lengths from the mask + std::unique_ptr maskData(mask->GetMatrix()->CopyToArray()); + for (size_t i = 0; i < numSequences; ++i) + { + size_t currentSequenceLength = 0; + bool currentSequenceEndAlreadyFound = false; + for (size_t j = 0; j < maxNumTimeSteps; ++j) + { + if (maskData[(i * maxNumTimeSteps) + j] == 1) + { + if (currentSequenceEndAlreadyFound) + InvalidArgument("Invalid Value object; only trailing steps of a sequence can be masked"); + + currentSequenceLength++; + } + else + { + currentSequenceEndAlreadyFound = true; + } + } + + sequenceLengths[i] = currentSequenceLength; + } + } + + // The data needs to be rearranged since CNTK requires sequences to be interleaved across timesteps + std::vector sequences; + for (size_t i = 0; i < numSequences; ++i) + sequences.push_back({ i, SIZE_MAX, 0, sequenceLengths[i]}); + + auto layout = std::make_shared(); + std::vector> placement; + std::vector rowAllocations; + layout->InitAsPackedSequences(sequences, placement, rowAllocations); + if (maxNumTimeSteps != layout->GetNumTimeSteps()) + LogicError("The number of time steps in the packed MBLayout does not match the longest sequence's length in the Value object"); + + if (numSequences != layout->GetNumSequences()) + LogicError("The number of sequences in the packed MBLayout does not match the sequence count in the Value object"); + + // Now generate the gather indices + auto matrixData = std::make_shared>(var.Shape().TotalSize(), layout->GetNumCols(), AsCNTKImplDeviceId(value->Data()->Device())); + std::vector gatherIndicesVector(layout->GetNumCols(), 0); + for (size_t i = 0; i < numSequences; ++i) + { + size_t targetParallelStreamIdx = placement[i].first; + size_t targetStartIdxInParallelStream = placement[i].second; + for (size_t j = 0; j < sequenceLengths[i]; ++j) + gatherIndicesVector[((targetStartIdxInParallelStream + j) * layout->GetNumParallelSequences()) + targetParallelStreamIdx] = (ElementType)((i * maxNumTimeSteps) + j); + } + + auto gatherIdxMatrix = std::make_shared>(1, layout->GetNumCols(), gatherIndicesVector.data(), AsCNTKImplDeviceId(value->Data()->Device())); + matrixData->DoGatherColumnsOf(0, *gatherIdxMatrix, *(value->Data()->GetMatrix(var.Shape().NumAxes())), 1); + return{ matrixData, layout }; } } - /*static*/ void CompositeFunction::CopyNDArrayViewToComputationNodeGradient(const NDArrayViewPtr& arrayView, ComputationNodeBasePtr node) + template + /*static*/ ValuePtr CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout(Variable var, const Matrix& matrix, const MBLayoutPtr& layout) { - switch (arrayView->DataType()) + if (var.DynamicAxes().size() > 1) + LogicError("More than one dynamic axis for a variable is currently unsupported"); + + if (GetDataType() != var.DataType()) + LogicError("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(var.DataType())); + + if ((layout != nullptr) && (matrix.GetNumRows() != var.Shape().TotalSize())) + LogicError("Unexpected matrix layout: The number of rows in the matrix does not match the sample size of the Variable"); + + NDShape valueDataShape = var.Shape(); + if (layout != nullptr) + valueDataShape = valueDataShape.AppendShape({ layout->GetNumTimeSteps(), layout->GetNumSequences() }); + + // No data shuffling needed if no layout or the layout has just one time-step or just one sequence + if ((layout == nullptr) || (layout->GetNumTimeSteps() == 1) || (layout->GetNumSequences() == 1)) { - case DataType::Float: - node->As>()->ResetGradient(*(arrayView->GetMatrix())); - break; - case DataType::Double: - node->As>()->ResetGradient(*(arrayView->GetMatrix())); - break; - default: - LogicError("Unsupported DataType %s", DataTypeName(arrayView->DataType())); - break; + // Just create a view over the existing matrix itself + + if ((matrix.GetFormat() != matrixFormatDense) || (matrix.GetFormat() != matrixFormatColMajor)) + LogicError("Only dense and column-major data storage format is currently supported"); + + auto tensorView = new TensorView(std::make_shared>(matrix.AsReference()), AsTensorShape(valueDataShape)); + auto data = NDArrayViewPtr(new NDArrayView(GetDataType(), AsDeviceDescriptor(matrix.GetDeviceId()), StorageFormat::Dense, valueDataShape, true, tensorView), [](_ReferenceCounter* ptr) { delete ptr; }); + return ValuePtr(new Value(data), [](_ReferenceCounter* ptr) { delete ptr; }); } + + if (layout->GetNumCols() != matrix.GetNumCols()) + LogicError("Bad MBLayout: The number of columns in the MBLayout does not match the number of columns in the data matrix!"); + + size_t maxNumTimeSteps = layout->GetNumTimeSteps(); + size_t numSequences = layout->GetNumSequences(); + + std::vector sequenceLengths; + auto& layoutSequences = layout->GetAllSequences(); + for (auto iter = layoutSequences.begin(); iter != layoutSequences.end(); ++iter) + { + if (iter->seqId != GAP_SEQUENCE_ID) + sequenceLengths.push_back(iter->GetNumTimeSteps()); + } + + // Reshuffle to data to unpack and uninterleave the CNTK form data + // Now generate the gather indices + auto shuffledMatrixData = std::make_shared>(matrix.GetNumRows(), maxNumTimeSteps * numSequences, matrix.GetDeviceId()); + + std::vector sequencesShorterThanLongestSequence; + for (size_t i = 0; i < numSequences; ++i) + if (sequenceLengths[i] != maxNumTimeSteps) + sequencesShorterThanLongestSequence.push_back(i); + + // Set the target location of all gaps to be the last step of the first sequence that is shorter than the longest sequence in the batch + size_t targetColIdxForInvalidColumns = sequencesShorterThanLongestSequence.empty() ? 0 : (((sequencesShorterThanLongestSequence[0] + 1) * maxNumTimeSteps) - 1); + std::vector scatterIndicesVector(layout->GetNumCols(), (ElementType)targetColIdxForInvalidColumns); + size_t i = 0; + for (auto iter = layoutSequences.begin(); iter != layoutSequences.end(); ++iter) + { + if (iter->seqId != GAP_SEQUENCE_ID) + { + size_t targetParallelStreamIdx = iter->s; + size_t targetStartIdxInParallelStream = iter->tBegin; + for (size_t j = 0; j < iter->GetNumTimeSteps(); ++j) + scatterIndicesVector[((targetStartIdxInParallelStream + j) * layout->GetNumParallelSequences()) + targetParallelStreamIdx] = (ElementType)((i * maxNumTimeSteps) + j); + + i++; + } + } + + auto scatterIdxMatrix = std::make_shared>(1, layout->GetNumCols(), scatterIndicesVector.data(), matrix.GetDeviceId()); + shuffledMatrixData->DoScatterColumnsOf(0, *scatterIdxMatrix, matrix, 1); + + // Create the mask if needed + NDMaskPtr mask; + if (!sequencesShorterThanLongestSequence.empty()) + { + mask = NDMaskPtr(new NDMask({ maxNumTimeSteps, numSequences }, AsDeviceDescriptor(matrix.GetDeviceId())), [](_ReferenceCounter* ptr) { delete ptr; }); + for (size_t i = 0; i < sequencesShorterThanLongestSequence.size(); ++i) + { + size_t shorterSequenceIdx = sequencesShorterThanLongestSequence[i]; + mask->MaskSection({ sequenceLengths[shorterSequenceIdx], shorterSequenceIdx }, { NDShape::InferredDimension, 1 }); + } + } + + auto tensorView = new TensorView(shuffledMatrixData, AsTensorShape(valueDataShape)); + auto data = NDArrayViewPtr(new NDArrayView(GetDataType(), AsDeviceDescriptor(matrix.GetDeviceId()), StorageFormat::Dense, valueDataShape, true, tensorView), [](_ReferenceCounter* ptr) { delete ptr; }); + return ValuePtr(new Value(data, mask), [](_ReferenceCounter* ptr) { delete ptr; }); } void CompositeFunction::PopulateNetworkInputs(const _Internal::_SimpleMap& arguments) @@ -245,17 +501,36 @@ namespace CNTK inputNodes.push_back(argumentComputationNode); ValuePtr argumentValue = arguments[*iter]; - CopyNDArrayViewToComputationNodeValue(argumentValue->Data(), argumentComputationNode); - // TODO: No sequence support for now - // The number of axes for argument Value can be at most 1 larger than the number of axes of the variable's shape - if (argumentValue->Data()->Shape().NumAxes() != (iter->Shape().NumAxes() + 1)) - InvalidArgument("Argument value's number of axes should be 1 larger than the argument variable's number of axes"); + MBLayoutPtr layout; + switch (argumentValue->Data()->DataType()) + { + case DataType::Float: + { + auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject(*iter, argumentValue); + layout = CNTKMatrixAndMBLayout.second; - size_t numSamples = argumentValue->Data()->Shape()[iter->Shape().NumAxes()]; + auto& nodeData = argumentComputationNode->As>()->Value(); + nodeData.AssignValuesOf(*CNTKMatrixAndMBLayout.first); + break; + } + case DataType::Double: + { + auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject(*iter, argumentValue); + layout = CNTKMatrixAndMBLayout.second; - argumentComputationNode->GetMBLayout()->InitAsFrameMode(numSamples); + auto& nodeData = argumentComputationNode->As>()->Value(); + nodeData.AssignValuesOf(*CNTKMatrixAndMBLayout.first); + break; + } + default: + LogicError("Unsupported DataType %s", DataTypeName(argumentValue->Data()->DataType())); + break; + } + + argumentComputationNode->GetMBLayout()->CopyFrom(layout); } + m_computationNetwork->BumpEvalTimeStamp(inputNodes); } @@ -270,33 +545,35 @@ namespace CNTK InvalidArgument("Gradients cannot be specified for a Variable that is not an Output of the Function"); auto outputComputationNode = m_variableToNodeMap[iter->first]; + auto nodeLayout = outputComputationNode->GetMBLayout(); ValuePtr gradientValue = iter->second; - CopyNDArrayViewToComputationNodeGradient(gradientValue->Data(), outputComputationNode); - } - } - /*static*/ void CompositeFunction::CopyComputationNodeDataToNDArrayView(const Microsoft::MSR::CNTK::ComputationNodeBasePtr& node, NDArrayViewPtr arrayView, bool copyGradient) - { - switch (arrayView->DataType()) - { - case DataType::Float: - { - auto& outputMatrix = copyGradient ? node->As>()->Gradient() : node->As>()->Value(); - auto arrayViewMatrix = arrayView->GetWritableMatrix(); - arrayViewMatrix->AssignValuesOf(outputMatrix); - break; - } - case DataType::Double: - { - auto& outputMatrix = copyGradient ? node->As>()->Gradient() : node->As>()->Value(); - auto arrayViewMatrix = arrayView->GetWritableMatrix(); - arrayViewMatrix->AssignValuesOf(outputMatrix); - break; - } - default: - LogicError("Unsupported DataType %s", DataTypeName(arrayView->DataType())); - break; + MBLayoutPtr layout; + switch (gradientValue->Data()->DataType()) + { + case DataType::Float: + { + auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject(iter->first, gradientValue); + layout = CNTKMatrixAndMBLayout.second; + if (((layout == nullptr) != (nodeLayout == nullptr)) || ((layout != nullptr) && (*layout != *nodeLayout))) + InvalidArgument("The layout of the specified gradient Value in incompatible with the layout of the corresponding Variable computed during Forward call"); + outputComputationNode->As>()->ResetGradient(*CNTKMatrixAndMBLayout.first); + break; + } + case DataType::Double: + { + auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject(iter->first, gradientValue); + layout = CNTKMatrixAndMBLayout.second; + if (((layout == nullptr) != (nodeLayout == nullptr)) || ((layout != nullptr) && (*layout != *nodeLayout))) + InvalidArgument("The layout of the specified gradient Value in incompatible with the layout of the corresponding Variable computed during Forward call"); + outputComputationNode->As>()->ResetGradient(*CNTKMatrixAndMBLayout.first); + break; + } + default: + LogicError("Unsupported DataType %s", DataTypeName(gradientValue->Data()->DataType())); + break; + } } } @@ -304,14 +581,17 @@ namespace CNTK { size_t outputValueNumAxes = var.Shape().NumAxes(); if (computationNodePtr->GetMBLayout() != nullptr) - outputValueNumAxes++; + outputValueNumAxes += 2; std::vector outputShapeDims(outputValueNumAxes); for (size_t i = 0; i < var.Shape().NumAxes(); ++i) outputShapeDims[i] = computationNodePtr->GetSampleLayout().GetDim(i); if (computationNodePtr->GetMBLayout() != nullptr) - outputShapeDims[var.Shape().NumAxes()] = computationNodePtr->GetMBLayout()->GetNumParallelSequences(); + { + outputShapeDims[var.Shape().NumAxes()] = computationNodePtr->GetMBLayout()->GetNumTimeSteps(); + outputShapeDims[var.Shape().NumAxes() + 1] = computationNodePtr->GetMBLayout()->GetNumSequences(); + } return NDShape(outputShapeDims); } @@ -331,12 +611,38 @@ namespace CNTK if (outputValuePtr->Data()->Shape() != outputShape) InvalidArgument("The shape %s of the specified Value object for output does not match the actual output shape %s", AsString(outputValuePtr->Data()->Shape()).c_str(), AsString(outputShape).c_str()); } - else + + switch (iter->first.DataType()) { - outputValuePtr = new Value(new NDArrayView(iter->first.DataType(), outputShape, nullptr, 0, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId()))); + case DataType::Float: + { + auto nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout(iter->first, computationNodePtr->As>()->Value(), computationNodePtr->GetMBLayout()); + if (outputValuePtr == nullptr) + { + auto data = NDArrayViewPtr(new NDArrayView(iter->first.DataType(), outputShape, nullptr, 0, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId())), [](_ReferenceCounter* ptr) { delete ptr; }); + auto mask = (nodeValue->Mask() != nullptr) ? NDMaskPtr(new NDMask(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()), [](_ReferenceCounter* ptr) { delete ptr; }) : nullptr; + outputValuePtr = ValuePtr(new Value(data, mask), [](_ReferenceCounter* ptr) { delete ptr; }); + } + outputValuePtr->CopyFrom(*nodeValue); + break; + } + case DataType::Double: + { + auto nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout(iter->first, computationNodePtr->As>()->Value(), computationNodePtr->GetMBLayout()); + if (outputValuePtr == nullptr) + { + auto data = NDArrayViewPtr(new NDArrayView(iter->first.DataType(), outputShape, nullptr, 0, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId())), [](_ReferenceCounter* ptr) { delete ptr; }); + auto mask = (nodeValue->Mask() != nullptr) ? NDMaskPtr(new NDMask(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()), [](_ReferenceCounter* ptr) { delete ptr; }) : nullptr; + outputValuePtr = ValuePtr(new Value(data, mask), [](_ReferenceCounter* ptr) { delete ptr; }); + } + outputValuePtr->CopyFrom(*nodeValue); + break; + } + default: + LogicError("Unsupported DataType %s", DataTypeName(iter->first.DataType())); + break; } - CopyComputationNodeDataToNDArrayView(computationNodePtr, outputValuePtr->Data(), false); outputs[iter->first] = outputValuePtr; } } @@ -365,15 +671,42 @@ namespace CNTK if (gradientValuePtr->Data()->Shape() != gradientShape) InvalidArgument("The shape %s of the specified Value object for gradient does not match the actual gradient shape %s", AsString(gradientValuePtr->Data()->Shape()).c_str(), AsString(gradientShape).c_str()); } - else - { - gradientValuePtr = new Value(new NDArrayView(iter->first.DataType(), gradientShape, nullptr, 0, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId()))); - } if (!computationNodePtr->NeedsGradient()) LogicError("Backpropagated gradient value cannot be read from a ComputationNode that has NeedsGradient set to false"); - CopyComputationNodeDataToNDArrayView(computationNodePtr, gradientValuePtr->Data(), true); + switch (iter->first.DataType()) + { + case DataType::Float: + { + auto nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout(iter->first, computationNodePtr->As>()->Gradient(), computationNodePtr->GetMBLayout()); + if (gradientValuePtr == nullptr) + { + auto data = NDArrayViewPtr(new NDArrayView(iter->first.DataType(), gradientShape, nullptr, 0, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId())), [](_ReferenceCounter* ptr) { delete ptr; }); + auto mask = NDMaskPtr((nodeValue->Mask() != nullptr) ? new NDMask(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr, [](_ReferenceCounter* ptr) { delete ptr; }); + gradientValuePtr = ValuePtr(new Value(data, mask), [](_ReferenceCounter* ptr) { delete ptr; }); + } + gradientValuePtr->CopyFrom(*nodeValue); + break; + } + case DataType::Double: + { + auto nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout(iter->first, computationNodePtr->As>()->Gradient(), computationNodePtr->GetMBLayout()); + if (gradientValuePtr == nullptr) + { + auto data = NDArrayViewPtr(new NDArrayView(iter->first.DataType(), gradientShape, nullptr, 0, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId())), [](_ReferenceCounter* ptr) { delete ptr; }); + auto mask = NDMaskPtr((nodeValue->Mask() != nullptr) ? new NDMask(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr, [](_ReferenceCounter* ptr) { delete ptr; }); + gradientValuePtr = ValuePtr(new Value(data, mask), [](_ReferenceCounter* ptr) { delete ptr; }); + + } + gradientValuePtr->CopyFrom(*nodeValue); + break; + } + default: + LogicError("Unsupported DataType %s", DataTypeName(iter->first.DataType())); + break; + } + gradients[iter->first] = gradientValuePtr; } } @@ -422,7 +755,7 @@ namespace CNTK // TODO: How to deal with the specified 'computeDevice' - return (outputsToRetainBackwardStateFor.Size() > 0) ? new CNTKBackPropState(this, m_variableToNodeMap[arguments.m_map->begin()->first]->GetEvalTimeStamp()) : nullptr; + return (outputsToRetainBackwardStateFor.Size() > 0) ? BackPropStatePtr(new CNTKBackPropState(this, { arguments.m_map->begin()->first, m_variableToNodeMap[arguments.m_map->begin()->first]->GetEvalTimeStamp() }), [](_ReferenceCounter* ptr) { delete ptr; }) : nullptr; } /*virtual*/ void CompositeFunction::Backward(const BackPropStatePtr& state, @@ -434,21 +767,21 @@ namespace CNTK // TODO: Support multiple concurrent backprop states auto backpropState = dynamic_cast(state.GetPtr()); - if (backpropState->EvalTimeStamp() != m_variableToNodeMap[*(this->Arguments().begin())]->GetEvalTimeStamp()) + if (backpropState->EvalTimeStamp().second != m_variableToNodeMap[backpropState->EvalTimeStamp().first]->GetEvalTimeStamp()) LogicError("The specified backprop state specified cannot be used for backpropagation as the Function's internal state was modified by subsequent Forward calls to the function." "This is not a user error but a shortcoming of the current implementation where multiple independent backprop states are not simultaneously supported"); - // TODO: Avoid copying the data when possible + if (rootGradientValues.Size() > 1) + LogicError("Currently gradient backprop from only one of the Function Outputs is supported"); - // Feed data into the arguments of the network - PopulateNetworkGradients(rootGradientValues); + // TODO: Avoid copying the data when possible // Zero all gradients of nodes below the root nodes for (auto iter = rootGradientValues.m_map->begin(); iter != rootGradientValues.m_map->end(); ++iter) m_computationNetwork->ZeroInputGradients(m_variableToNodeMap[iter->first]); - if (rootGradientValues.Size() > 1) - LogicError("Currently gradient backprop from only one of the Function Outputs is supported"); + // Feed data into the arguments of the network + PopulateNetworkGradients(rootGradientValues); // Backpropagate through the network auto rootComputationNodePtr = m_variableToNodeMap[rootGradientValues.m_map->begin()->first]; @@ -459,6 +792,26 @@ namespace CNTK // TODO: How to deal with the specified 'computeDevice' } + /*virtual*/ void CompositeFunction::_ReplacePlaceholders(const _Internal::_SimpleMap& placeholderReplacements, _Internal::_SimpleSet& visitedFunctions, _Internal::_SimpleSet& replacedPlaceholders) + { + RootFunction()->_ReplacePlaceholders(placeholderReplacements, visitedFunctions, replacedPlaceholders); + + // If any of the placeholders were replaced with Output variables, let's add the graph of function underneath each of those to 'm_allPrimitiveFunctions' set + for (auto iter = replacedPlaceholders.m_set->begin(); iter != replacedPlaceholders.m_set->end(); ++iter) + { + auto replacingVariable = placeholderReplacements[*iter]; + if (replacingVariable.Kind() == VariableKind::Output) + { + auto ownerFunc = replacingVariable.Owner(); + _Internal::_SimpleSet visitedFunctions; + _DetermineInputs(ownerFunc, visitedFunctions); + + // Add the newly visited functions to 'm_allPrimitiveFunctions' set + m_allPrimitiveFunctions.m_set->insert(visitedFunctions.m_set->begin(), visitedFunctions.m_set->end()); + } + } + } + FunctionPtr Times(const Variable& leftOperand, const Variable& rightOperand, const std::wstring& name/* = L""*/) { return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Times, { leftOperand, rightOperand }, Dictionary(), name), name); @@ -474,6 +827,11 @@ namespace CNTK return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Sigmoid, { operand }, Dictionary(), name), name); } + FunctionPtr Tanh(const Variable& operand, const std::wstring& name/* = L""*/) + { + return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Tanh, { operand }, Dictionary(), name), name); + } + FunctionPtr _Combine(const _Internal::_SimpleVector& operands, const std::wstring& name/* = L""*/) { _Internal::_SimpleSet uniqueOperands; @@ -501,4 +859,39 @@ namespace CNTK { return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::PredictionError, { prediction, labels }, Dictionary(), name), name); } + + FunctionPtr Exp(const Variable& operand, const std::wstring& name/* = L""*/) + { + return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Exp, { operand }, Dictionary(), name), name); + } + + FunctionPtr PastValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name/* = L""*/) + { + if (operand.DynamicAxes().size() != 1) + InvalidArgument("PastValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic axis"); + + auto additionalProperties = Dictionary(); + additionalProperties[L"stepSize"] = DictionaryValue(stepSize); + return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::PastValue, { initialState, operand }, std::move(additionalProperties), name), name); + } + + FunctionPtr FutureValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name/* = L""*/) + { + if (operand.DynamicAxes().size() != 1) + InvalidArgument("FutureValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic axis"); + + auto additionalProperties = Dictionary(); + additionalProperties[L"stepSize"] = DictionaryValue(stepSize); + return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::FutureValue, { initialState, operand }, std::move(additionalProperties), name), name); + } + + FunctionPtr ElementTimes(const Variable& leftOperand, const Variable& rightOperand, const std::wstring& name/* = L""*/) + { + return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::ElementTimes, { leftOperand, rightOperand }, Dictionary(), name), name); + } + + FunctionPtr ReduceSum(const Variable& operand, const std::wstring& name/* = L""*/) + { + return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::ReduceSum, { operand }, Dictionary(), name), name); + } } diff --git a/Source/CNTKv2LibraryDll/Function.h b/Source/CNTKv2LibraryDll/Function.h index 491a21c57..697236133 100644 --- a/Source/CNTKv2LibraryDll/Function.h +++ b/Source/CNTKv2LibraryDll/Function.h @@ -16,9 +16,15 @@ namespace CNTK Plus, Times, Sigmoid, + Tanh, Combine, CrossEntropyWithSoftmax, - PredictionError + PredictionError, + Exp, + PastValue, + FutureValue, + ElementTimes, + ReduceSum }; inline const char* PrimitiveOpTypeName(PrimitiveOpType opType) @@ -29,12 +35,24 @@ namespace CNTK return "Times"; else if (opType == PrimitiveOpType::Sigmoid) return "Sigmoid"; + else if (opType == PrimitiveOpType::Tanh) + return "Tanh"; else if (opType == PrimitiveOpType::Combine) return "Combine"; else if (opType == PrimitiveOpType::CrossEntropyWithSoftmax) return "CrossEntropyWithSoftmax"; else if (opType == PrimitiveOpType::PredictionError) return "PredictionError"; + else if (opType == PrimitiveOpType::Exp) + return "Exp"; + else if (opType == PrimitiveOpType::PastValue) + return "PastValue"; + else if (opType == PrimitiveOpType::FutureValue) + return "FutureValue"; + else if (opType == PrimitiveOpType::ElementTimes) + return "ElementTimes"; + else if (opType == PrimitiveOpType::ReduceSum) + return "ReduceSum"; else LogicError("Unknown PrimitiveOpType"); } @@ -50,14 +68,14 @@ namespace CNTK virtual BackPropStatePtr Forward(const _Internal::_SimpleMap& /*arguments*/, _Internal::_SimpleMap& /*outputs*/, const _Internal::_SimpleSet& /*outputsToRetainBackwardStateFor*/, - const DeviceDescriptor& /*computeDevice*/) + const DeviceDescriptor& /*computeDevice*/) override { NOT_IMPLEMENTED; } virtual void Backward(const BackPropStatePtr& /*state*/, const _Internal::_SimpleMap& /*rootGradientValues*/, - _Internal::_SimpleMap& /*backPropagatedGradientValuesForInputs*/) + _Internal::_SimpleMap& /*backPropagatedGradientValuesForInputs*/) override { NOT_IMPLEMENTED; } @@ -129,7 +147,7 @@ namespace CNTK return NDShape(std::move(outputDims)); } - static NDShape ReductionOpOutputShape(PrimitiveOpType op, const NDShape& operandShape, const std::initializer_list& reductionAxes) + static NDShape ReductionOpOutputShape(PrimitiveOpType op, const NDShape& operandShape, const std::vector& reductionAxes) { if (reductionAxes.size() > operandShape.NumAxes()) RuntimeError("The number of reduction axes %d exceeds the number of axes in the operand shape %s of the reduction operation %s", reductionAxes.size(), AsString(operandShape).c_str(), PrimitiveOpTypeName(op)); @@ -155,30 +173,74 @@ namespace CNTK // TODO: We are just using the input[0]'s DataType as output node's DataType. This is not always correct DataType outputDataType = inputs[0].DataType(); + // We currently require that the inputs' dynamic axes if any match + std::vector outputDynamicAxes = inputs[0].DynamicAxes(); + for (size_t i = 1; i < inputs.size(); ++i) + { + auto currentInputDynamicAxes = inputs[i].DynamicAxes(); + if (outputDynamicAxes.empty()) + outputDynamicAxes = currentInputDynamicAxes; + else + { + if (!currentInputDynamicAxes.empty() && (currentInputDynamicAxes != outputDynamicAxes)) + LogicError("Currently if an operand of a binary elementwise operation has any dynamic axes, those must match the dynamic axes of the other operand"); + } + } + switch (op) { case PrimitiveOpType::Sigmoid: + case PrimitiveOpType::Tanh: + case PrimitiveOpType::Exp: assert(inputs.size() == 1); - outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[0].Shape()), outputDataType, owner)); + outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[0].Shape()), outputDataType, owner, outputDynamicAxes)); + break; + case PrimitiveOpType::PastValue: + case PrimitiveOpType::FutureValue: + assert(inputs.size() == 2); + outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[1].Shape()), outputDataType, owner, outputDynamicAxes)); break; case PrimitiveOpType::Plus: + case PrimitiveOpType::ElementTimes: assert(inputs.size() == 2); - outputs.push_back(Variable(BinaryElementwiseOpOutputShape(op, inputs[0].Shape(), inputs[1].Shape()), outputDataType, owner)); + outputs.push_back(Variable(BinaryElementwiseOpOutputShape(op, inputs[0].Shape(), inputs[1].Shape()), outputDataType, owner, outputDynamicAxes)); break; case PrimitiveOpType::Times: assert(inputs.size() == 2); - outputs.push_back(Variable(TimesOpOutputShape(inputs[0].Shape(), inputs[1].Shape()), outputDataType, owner)); + outputs.push_back(Variable(TimesOpOutputShape(inputs[0].Shape(), inputs[1].Shape()), outputDataType, owner, outputDynamicAxes)); break; case PrimitiveOpType::CrossEntropyWithSoftmax: case PrimitiveOpType::PredictionError: { assert(inputs.size() == 2); + + if (inputs[0].Shape().NumAxes() > 1) + InvalidArgument("The shape of input operands for the %s operation should have at most one axis", PrimitiveOpTypeName(op)); + auto predictionShape = inputs[0].Shape(); auto labelsShape = inputs[1].Shape(); if (predictionShape != labelsShape) RuntimeError("Prediction output operand's shape %s is incompatible with label operand's shape %s for the %s operation", AsString(predictionShape).c_str(), AsString(labelsShape).c_str(), PrimitiveOpTypeName(op)); - outputs.push_back(Variable(ReductionOpOutputShape(op, predictionShape, { 0 }), outputDataType, owner)); + std::vector reductionAxes; + for (size_t i = 0; i < inputs[0].Shape().NumAxes(); ++i) + reductionAxes.push_back(i); + + outputs.push_back(Variable(ReductionOpOutputShape(op, predictionShape, reductionAxes), outputDataType, owner, {})); + break; + } + case PrimitiveOpType::ReduceSum: + { + assert(inputs.size() == 1); + + // TODO: For reductions, we should remove any of the dynamic axes from 'outputDynamicAxes' that are being reduced over. + // Currently we only support reductions that reduce over all axes + std::vector reductionOutputDynamicAxes = {}; + std::vector reductionAxes; + for (size_t i = 0; i < inputs[0].Shape().NumAxes(); ++i) + reductionAxes.push_back(i); + + outputs.push_back(Variable(ReductionOpOutputShape(op, inputs[0].Shape(), reductionAxes), outputDataType, owner, reductionOutputDynamicAxes)); break; } case PrimitiveOpType::Combine: @@ -200,17 +262,17 @@ namespace CNTK class CNTKBackPropState final : public BackPropState { public: - CNTKBackPropState(const FunctionPtr& function, int64_t evalTimeStamp) + CNTKBackPropState(const FunctionPtr& function, const std::pair& evalTimeStamp) : BackPropState(function), m_evalTimeStamp(evalTimeStamp) {} - int64_t EvalTimeStamp() const + std::pair EvalTimeStamp() const { return m_evalTimeStamp; } private: - int64_t m_evalTimeStamp; + std::pair m_evalTimeStamp; }; typedef _Internal::_ReferenceCounterSharedPtr CNTKBackPropStatePtr; @@ -219,33 +281,44 @@ namespace CNTK class CompositeFunction final : public Function { + friend class Function; + public: static CompositeFunctionPtr Create(const FunctionPtr& rootFunction, const std::wstring& name = L"") { _Internal::_SimpleSet visitedFunctions; - std::vector inputs = DetermineInputs(rootFunction, visitedFunctions); - auto func = new CompositeFunction(inputs, rootFunction->Outputs(), rootFunction, std::move(visitedFunctions), name); - return CompositeFunctionPtr(func, [](_ReferenceCounter* ptr) { - delete ptr; - }); + + // Call _DetermineInputs to get the set of all functions in the graph + _DetermineInputs(rootFunction, visitedFunctions); + + auto func = new CompositeFunction(rootFunction, std::move(visitedFunctions), name); + return CompositeFunctionPtr(func, [](_ReferenceCounter* ptr) { delete ptr; }); } virtual BackPropStatePtr Forward(const _Internal::_SimpleMap& arguments, _Internal::_SimpleMap& outputs, const _Internal::_SimpleSet& outputsToRetainBackwardStateFor, - const DeviceDescriptor& computeDevice); + const DeviceDescriptor& computeDevice) override; virtual void Backward(const BackPropStatePtr& state, const _Internal::_SimpleMap& rootGradientValues, - _Internal::_SimpleMap& backPropagatedGradientValuesForInputs); + _Internal::_SimpleMap& backPropagatedGradientValuesForInputs) override; private: - CompositeFunction(const std::vector& inputs, const std::vector& outputs, const FunctionPtr& rootFunction, _Internal::_SimpleSet&& allPrimitiveFunctions, const std::wstring& name) - : Function(inputs, outputs, rootFunction, name), m_allPrimitiveFunctions(std::move(allPrimitiveFunctions)) + virtual void _ReplacePlaceholders(const _Internal::_SimpleMap& placeholderReplacements, _Internal::_SimpleSet& visitedFunctions, _Internal::_SimpleSet& replacedPlaceholders) override; + + CompositeFunction(const FunctionPtr& rootFunction, _Internal::_SimpleSet&& allPrimitiveFunctions, const std::wstring& name) + : Function({}, rootFunction->Outputs(), rootFunction, name), m_allPrimitiveFunctions(std::move(allPrimitiveFunctions)) { } - static std::vector DetermineInputs(const FunctionPtr& rootFunction, _Internal::_SimpleSet& visitedFunctions) + std::vector DetermineInputs() const + { + _Internal::_SimpleSet visitedFunctions; + return _DetermineInputs(RootFunction(), visitedFunctions); + } + + static std::vector _DetermineInputs(const FunctionPtr& rootFunction, _Internal::_SimpleSet& visitedFunctions) { visitedFunctions.Insert(rootFunction); @@ -259,7 +332,7 @@ namespace CNTK else if (!visitedFunctions.Contains(currentInput.Owner())) { FunctionPtr function = currentInput.Owner(); - std::vector functionInputs = DetermineInputs(function, visitedFunctions); + std::vector functionInputs = _DetermineInputs(function, visitedFunctions); std::copy(functionInputs.begin(), functionInputs.end(), std::back_inserter(inputs)); } } @@ -271,10 +344,10 @@ namespace CNTK Microsoft::MSR::CNTK::ComputationNetworkPtr GetComputationNetwork(const DeviceDescriptor& device, const _Internal::_SimpleSet& backpropRoots); template - static Microsoft::MSR::CNTK::ComputationNodeBasePtr GetOutputVariableNode(const Variable& variable, Microsoft::MSR::CNTK::ComputationNetworkBuilder& builder, std::unordered_map& variableToNodeMap, std::unordered_map& isVariableRootMap); + static Microsoft::MSR::CNTK::ComputationNodeBasePtr GetOutputVariableNode(const Variable& variable, Microsoft::MSR::CNTK::ComputationNetworkPtr& network, Microsoft::MSR::CNTK::ComputationNetworkBuilder& builder, std::unordered_map& variableToNodeMap, std::unordered_map& isVariableRootMap); template - static Microsoft::MSR::CNTK::ComputationNodeBasePtr GetNode(const Variable& variable, Microsoft::MSR::CNTK::ComputationNetworkBuilder& builder, std::unordered_map& variableToNodeMap, std::unordered_map& isVariableRootMap); + static Microsoft::MSR::CNTK::ComputationNodeBasePtr GetNode(const Variable& variable, Microsoft::MSR::CNTK::ComputationNetworkPtr& network, Microsoft::MSR::CNTK::ComputationNetworkBuilder& builder, std::unordered_map& variableToNodeMap, std::unordered_map& isVariableRootMap); void PopulateNetworkInputs(const _Internal::_SimpleMap& arguments); void PopulateNetworkGradients(const _Internal::_SimpleMap& gradients); @@ -282,10 +355,11 @@ namespace CNTK void GetNetworkOutputs(std::unordered_map& outputs); void GetNetworkGradients(std::unordered_map& gradients); - static void CopyNDArrayViewToComputationNodeValue(const NDArrayViewPtr& arrayView, Microsoft::MSR::CNTK::ComputationNodeBasePtr node); - static void CopyNDArrayViewToComputationNodeGradient(const NDArrayViewPtr& arrayView, Microsoft::MSR::CNTK::ComputationNodeBasePtr node); + template + static std::pair>, Microsoft::MSR::CNTK::MBLayoutPtr> GetCNTKImplMatrixAndMBLayoutFromValueObject(Variable var, const ValuePtr& value); - static void CopyComputationNodeDataToNDArrayView(const Microsoft::MSR::CNTK::ComputationNodeBasePtr& node, NDArrayViewPtr arrayView, bool copyGradient); + template + static ValuePtr GetValueObjectFromCNTKImplMatrixAndMBLayout(Variable var, const Microsoft::MSR::CNTK::Matrix& matrix, const Microsoft::MSR::CNTK::MBLayoutPtr& layout); private: _Internal::_SimpleSet m_allPrimitiveFunctions; diff --git a/Source/CNTKv2LibraryDll/LibraryTests/FeedForwardTests.cpp b/Source/CNTKv2LibraryDll/LibraryTests/FeedForwardTests.cpp index b09236a20..67c68dfa5 100644 --- a/Source/CNTKv2LibraryDll/LibraryTests/FeedForwardTests.cpp +++ b/Source/CNTKv2LibraryDll/LibraryTests/FeedForwardTests.cpp @@ -62,21 +62,21 @@ void TestFeedForwardNetworkCreation(const DeviceDescriptor& device) size_t iterationCount = 4; unsigned int randSeed = 2; srand(randSeed); - size_t numSamples = 1; + size_t numSamples = 3; for (size_t i = 0; i < iterationCount; ++i) { std::vector inputData(inputDim * numSamples); for (size_t i = 0; i < inputData.size(); ++i) inputData[i] = ((float)rand()) / RAND_MAX; - NDShape inputShape = { inputDim, numSamples }; + NDShape inputShape = { inputDim, 1, numSamples }; ValuePtr inputValue = new Value(new NDArrayView(inputShape, inputData.data(), inputData.size(), DeviceDescriptor::CPUDevice(), true)); std::vector labelData(numOutputClasses * numSamples, 0); for (size_t i = 0; i < numSamples; ++i) labelData[(i*numOutputClasses) + (rand() % numOutputClasses)] = 1; - NDShape labelShape = { numOutputClasses, numSamples }; + NDShape labelShape = { numOutputClasses, 1, numSamples }; ValuePtr labelValue = new Value(new NDArrayView(labelShape, labelData.data(), labelData.size(), DeviceDescriptor::CPUDevice(), true)); ValuePtr outputValue, predictionErrorValue; @@ -119,10 +119,10 @@ void TestTimesAndPlus(size_t inputDim, for (size_t i = 0; i < inputData.size(); ++i) inputData[i] = ((ElementType)rand()) / RAND_MAX; - NDShape inputShape = { inputDim, numSamples }; + NDShape inputShape = { inputDim, 1, numSamples }; ValuePtr inputValue = new Value(new NDArrayView(inputShape, inputData.data(), inputData.size(), DeviceDescriptor::CPUDevice(), true)); - NDShape outputShape = { outputDim, numSamples }; + NDShape outputShape = { outputDim, 1, numSamples }; std::vector outputData(outputShape.TotalSize()); ValuePtr outputValue; if (usePreAllocatedOutputs) diff --git a/Source/CNTKv2LibraryDll/LibraryTests/LibraryTests.vcxproj b/Source/CNTKv2LibraryDll/LibraryTests/LibraryTests.vcxproj index f052be556..d1fe15095 100644 --- a/Source/CNTKv2LibraryDll/LibraryTests/LibraryTests.vcxproj +++ b/Source/CNTKv2LibraryDll/LibraryTests/LibraryTests.vcxproj @@ -88,12 +88,14 @@ true true true + CNTKLibrary-2.0.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + diff --git a/Source/CNTKv2LibraryDll/LibraryTests/LibraryTests.vcxproj.filters b/Source/CNTKv2LibraryDll/LibraryTests/LibraryTests.vcxproj.filters index cc77dceb3..32b13c81a 100644 --- a/Source/CNTKv2LibraryDll/LibraryTests/LibraryTests.vcxproj.filters +++ b/Source/CNTKv2LibraryDll/LibraryTests/LibraryTests.vcxproj.filters @@ -27,6 +27,9 @@ Source Files + + Source Files + diff --git a/Source/CNTKv2LibraryDll/LibraryTests/Main.cpp b/Source/CNTKv2LibraryDll/LibraryTests/Main.cpp index 11024a479..b5dd42afd 100644 --- a/Source/CNTKv2LibraryDll/LibraryTests/Main.cpp +++ b/Source/CNTKv2LibraryDll/LibraryTests/Main.cpp @@ -4,10 +4,12 @@ void NDArrayViewTests(); void TensorTests(); void FeedForwardTests(); +void RecurrentFunctionTests(); int main() { NDArrayViewTests(); TensorTests(); FeedForwardTests(); + RecurrentFunctionTests(); } diff --git a/Source/CNTKv2LibraryDll/LibraryTests/RecurrentFunctionTests.cpp b/Source/CNTKv2LibraryDll/LibraryTests/RecurrentFunctionTests.cpp new file mode 100644 index 000000000..982660d82 --- /dev/null +++ b/Source/CNTKv2LibraryDll/LibraryTests/RecurrentFunctionTests.cpp @@ -0,0 +1,440 @@ +#include "CNTKLibrary.h" +#include +#include "Common.h" +#include + +using namespace CNTK; + +static unsigned long seed = 1; + +template +std::pair LSTMPCellWithSelfStabilization(Variable input, Variable prevOutput, Variable prevCellState, const DeviceDescriptor& device) +{ + assert(input.Shape().NumAxes() == 1); + size_t inputDim = input.Shape()[0]; + + size_t outputDim = prevOutput.Shape()[0]; + size_t cellDim = prevCellState.Shape()[0]; + + auto Wxo = Parameter(NDArrayView::RandomUniform({ cellDim, inputDim }, -0.5, 0.5, seed++, device)); + auto Wxi = Parameter(NDArrayView::RandomUniform({ cellDim, inputDim }, -0.5, 0.5, seed++, device)); + auto Wxf = Parameter(NDArrayView::RandomUniform({ cellDim, inputDim }, -0.5, 0.5, seed++, device)); + auto Wxc = Parameter(NDArrayView::RandomUniform({ cellDim, inputDim }, -0.5, 0.5, seed++, device)); + + auto Bo = Parameter({ cellDim }, (ElementType)0.0, device); + auto Bc = Parameter({ cellDim }, (ElementType)0.0, device); + auto Bi = Parameter({ cellDim }, (ElementType)0.0, device); + auto Bf = Parameter({ cellDim }, (ElementType)0.0, device); + + auto Whi = Parameter(NDArrayView::RandomUniform({ cellDim, outputDim }, -0.5, 0.5, seed++, device)); + auto Wci = Parameter(NDArrayView::RandomUniform({ cellDim }, -0.5, 0.5, seed++, device)); + + auto Whf = Parameter(NDArrayView::RandomUniform({ cellDim, outputDim }, -0.5, 0.5, seed++, device)); + auto Wcf = Parameter(NDArrayView::RandomUniform({ cellDim }, -0.5, 0.5, seed++, device)); + + auto Who = Parameter(NDArrayView::RandomUniform({ cellDim, outputDim }, -0.5, 0.5, seed++, device)); + auto Wco = Parameter(NDArrayView::RandomUniform({ cellDim }, -0.5, 0.5, seed++, device)); + + auto Whc = Parameter(NDArrayView::RandomUniform({ cellDim, outputDim }, -0.5, 0.5, seed++, device)); + + auto Wmr = Parameter(NDArrayView::RandomUniform({ outputDim, cellDim }, -0.5, 0.5, seed++, device)); + + // Stabilization by routing input through an extra scalar parameter + auto sWxo = Parameter({}, (ElementType)0.0, device); + auto sWxi = Parameter({}, (ElementType)0.0, device); + auto sWxf = Parameter({}, (ElementType)0.0, device); + auto sWxc = Parameter({}, (ElementType)0.0, device); + + auto sWhi = Parameter({}, (ElementType)0.0, device); + auto sWci = Parameter({}, (ElementType)0.0, device); + + auto sWhf = Parameter({}, (ElementType)0.0, device); + auto sWcf = Parameter({}, (ElementType)0.0, device); + auto sWho = Parameter({}, (ElementType)0.0, device); + auto sWco = Parameter({}, (ElementType)0.0, device); + auto sWhc = Parameter({}, (ElementType)0.0, device); + + auto sWmr = Parameter({}, (ElementType)0.0, device); + + auto expsWxo = Exp(sWxo); + auto expsWxi = Exp(sWxi); + auto expsWxf = Exp(sWxf); + auto expsWxc = Exp(sWxc); + + auto expsWhi = Exp(sWhi); + auto expsWci = Exp(sWci); + + auto expsWhf = Exp(sWhf); + auto expsWcf = Exp(sWcf); + auto expsWho = Exp(sWho); + auto expsWco = Exp(sWco); + auto expsWhc = Exp(sWhc); + + auto expsWmr = Exp(sWmr); + + auto Wxix = Times(Wxi, ElementTimes(expsWxi, input)); + auto Whidh = Times(Whi, ElementTimes(expsWhi, prevOutput)); + auto Wcidc = ElementTimes(Wci, ElementTimes(expsWci, prevCellState)); + + auto it = Sigmoid(Plus(Plus(Plus(Wxix, Bi), Whidh), Wcidc)); + + auto Wxcx = Times(Wxc, ElementTimes(expsWxc, input)); + auto Whcdh = Times(Whc, ElementTimes(expsWhc, prevOutput)); + auto bit = ElementTimes(it, Tanh(Plus(Wxcx, Plus(Whcdh, Bc)))); + + auto Wxfx = Times(Wxf, ElementTimes(expsWxf, input)); + auto Whfdh = Times(Whf, ElementTimes(expsWhf, prevOutput)); + auto Wcfdc = ElementTimes(Wcf, ElementTimes(expsWcf, prevCellState)); + + auto ft = Sigmoid(Plus(Plus(Plus(Wxfx, Bf), Whfdh), Wcfdc)); + + auto bft = ElementTimes(ft, prevCellState); + + auto ct = Plus(bft, bit); + + auto Wxox = Times(Wxo, ElementTimes(expsWxo, input)); + auto Whodh = Times(Who, ElementTimes(expsWho, prevOutput)); + auto Wcoct = ElementTimes(Wco, ElementTimes(expsWco, ct)); + + auto ot = Sigmoid(Plus(Plus(Plus(Wxox, Bo), Whodh), Wcoct)); + + auto mt = ElementTimes(ot, Tanh(ct)); + + return{ Times(Wmr, ElementTimes(expsWmr, mt)), ct }; +} + +template +FunctionPtr LSTMPComponentWithSelfStabilization(Variable input, size_t outputDim, size_t cellDim, const DeviceDescriptor& device) +{ + auto dh = Placeholder({ outputDim }); + auto dc = Placeholder({ cellDim }); + + auto LSTMCell = LSTMPCellWithSelfStabilization(input, dh, dc, device); + + auto actualDh = PastValue(Constant({}, (ElementType)0.0, device), LSTMCell.first, 1); + auto actualDc = PastValue(Constant({}, (ElementType)0.0, device), LSTMCell.second, 1); + + // Form the recurrence loop by replacing the dh and dc placeholders with the actualDh and actualDc + return LSTMCell.first->ReplacePlaceholders({ { dh, actualDh }, { dc, actualDc } }); +} + +template +FunctionPtr LSTMNet(Variable features, size_t cellDim, size_t hiddenDim, size_t numOutputClasses, size_t numLSTMLayers, const DeviceDescriptor& device) +{ + assert(numLSTMLayers >= 1); + auto classifierRoot = LSTMPComponentWithSelfStabilization(features, hiddenDim, cellDim, device); + for (size_t i = 1; i < numLSTMLayers; ++i) { + classifierRoot = LSTMPComponentWithSelfStabilization(classifierRoot, hiddenDim, cellDim, device); + } + + auto W = Parameter(NDArrayView::RandomUniform({ numOutputClasses, hiddenDim }, -0.5, 0.5, seed++, device)); + auto b = Parameter({ numOutputClasses }, (ElementType)0.0, device); + + auto sW = Parameter({}, (ElementType)0.0, device); + auto expsW = Exp(sW); + + return Plus(Times(W, ElementTimes(expsW, classifierRoot)), b); +} + +template +void TestRecurrentNetworkCreation(const DeviceDescriptor& device) +{ + const size_t inputDim = 937; + const size_t numLSTMLayers = 3; + const size_t cellDim = 1024; + const size_t hiddenDim = 512; + const size_t numOutputClasses = 9304; + + Variable features({ inputDim }, GetDataType(), L"Features"); + auto classifierOutputFunction = LSTMNet(features, cellDim, hiddenDim, numOutputClasses, numLSTMLayers, device); + + Variable labelsVar = Variable({ numOutputClasses }, GetDataType(), L"Labels"); + auto trainingLossFunction = CrossEntropyWithSoftmax(classifierOutputFunction, labelsVar, L"lossFunction"); + auto predictionFunction = PredictionError(classifierOutputFunction, labelsVar, L"predictionError"); + + auto LSTMClassifier = Combine({ trainingLossFunction, predictionFunction, classifierOutputFunction }, L"LSTMClassifier"); + + // Now test the structure + if (LSTMClassifier->Arguments().size() != 2) + throw std::exception("TestFeedForwardNetworkCreation: Function does not have expected Argument count"); + + if (LSTMClassifier->Outputs().size() != 3) + throw std::exception("TestFeedForwardNetworkCreation: Function does not have expected Output count"); + + if (LSTMClassifier->Parameters().size() != ((numLSTMLayers * 28) + 3)) + throw std::exception("TestFeedForwardNetworkCreation: Function does not have expected Parameter count"); + + // Run Forward and backward a few times + size_t iterationCount = 3; + unsigned int randSeed = 2; + srand(randSeed); + size_t numSequences = 7; + size_t maxAllowedSequenceLength = 11; + for (size_t i = 0; i < iterationCount; ++i) + { + std::vector sequenceLengths(numSequences); + size_t maxActualSequenceLength = 0; + for (size_t i = 0; i < numSequences; ++i) + { + sequenceLengths[i] = (rand() % maxAllowedSequenceLength) + 1; + if (sequenceLengths[i] > maxActualSequenceLength) + maxActualSequenceLength = sequenceLengths[i]; + } + + std::vector> inputSequences; + for (size_t i = 0; i < numSequences; ++i) + { + std::vector currentSequence(inputDim * sequenceLengths[i]); + for (size_t j = 0; j < currentSequence.size(); ++j) + currentSequence[j] = ((ElementType)rand()) / RAND_MAX; + + inputSequences.push_back(std::move(currentSequence)); + } + + ValuePtr inputValue = Value::Create({ inputDim }, inputSequences, device, true); + + std::vector> labelsData; + for (size_t i = 0; i < numSequences; ++i) + { + std::vector currentSequence(numOutputClasses * sequenceLengths[i]); + for (size_t j = 0; j < sequenceLengths[i]; ++j) + currentSequence[(j * numOutputClasses) + (rand() % numOutputClasses)] = 1; + + labelsData.push_back(std::move(currentSequence)); + } + + ValuePtr labelValue = Value::Create({ numOutputClasses }, labelsData, device, true); + + ValuePtr outputValue, predictionErrorValue; + std::unordered_map outputs = { { classifierOutputFunction->Output(), outputValue }, { predictionFunction->Output(), predictionErrorValue } }; + auto backpropState = LSTMClassifier->Forward({ { features, inputValue }, { labelsVar, labelValue } }, outputs, device, { trainingLossFunction->Output() }); + + // Perform backprop + NDShape outputShape = trainingLossFunction->Output().Shape(); + std::vector rootGradientsData(outputShape.TotalSize(), 1); + ValuePtr rootGradientValue = new Value(new NDArrayView(outputShape, rootGradientsData.data(), rootGradientsData.size(), DeviceDescriptor::CPUDevice(), true)); + std::unordered_map paramGradients; + auto allParams = LSTMClassifier->Parameters(); + for (auto iter = allParams.begin(); iter != allParams.end(); ++iter) + paramGradients[*iter] = nullptr; + + LSTMClassifier->Backward(backpropState, { { trainingLossFunction->Output(), rootGradientValue } }, paramGradients); + } +} + +template +void TestSimpleRecurrence(size_t inputDim, + size_t outputDim, + size_t maxAllowedSequenceLength, + size_t numSequences, + const DeviceDescriptor& device, + size_t numIterations, + bool useFutureValue, + unsigned int seed = 1) +{ + Parameter timesParam(new NDArrayView((ElementType)0.5, { outputDim, inputDim }, device)); + Parameter plusParam(new NDArrayView((ElementType)0.1, { outputDim }, device)); + + Variable inputVar({ inputDim }, GetDataType(), true, L"input"); + + auto placeholder = Placeholder({ outputDim }); + auto plusOutput = Plus(plusParam, Plus(placeholder, Times(timesParam, inputVar))); + FunctionPtr placeholderReplacement; + if (useFutureValue) + placeholderReplacement = FutureValue(Constant({}, (ElementType)0.0, device), plusOutput, 1); + else + placeholderReplacement = PastValue(Constant({}, (ElementType)0.0, device), plusOutput, 1); + + plusOutput = plusOutput->ReplacePlaceholders({ { placeholder, placeholderReplacement } }); + + auto reducedOutput = ReduceSum(plusOutput); + + auto rootFunc = Combine({ reducedOutput, plusOutput }); + + srand(seed); + for (size_t iterIdx = 0; iterIdx < numIterations; ++iterIdx) + { + std::vector sequenceLengths(numSequences); + size_t maxActualSequenceLength = 0; + for (size_t i = 0; i < numSequences; ++i) + { + sequenceLengths[i] = (rand() % maxAllowedSequenceLength) + 1; + if (sequenceLengths[i] > maxActualSequenceLength) + maxActualSequenceLength = sequenceLengths[i]; + } + + size_t totalNumInputSamples = maxActualSequenceLength * numSequences; + std::vector inputData(inputDim * totalNumInputSamples, std::numeric_limits::quiet_NaN()); + for (size_t i = 0; i < numSequences; ++i) + { + for (size_t j = 0; j < maxActualSequenceLength; ++j) + { + size_t sampleIdx = (i * maxActualSequenceLength) + j; + for (size_t k = 0; k < inputDim; ++k) + { + if (j < sequenceLengths[i]) + inputData[(sampleIdx * inputDim) + k] = ((ElementType)rand()) / RAND_MAX; + } + } + } + + NDShape inputShape = { inputDim, maxActualSequenceLength, numSequences }; + + NDMaskPtr inputMask = new NDMask({ maxActualSequenceLength, numSequences }, DeviceDescriptor::CPUDevice()); + for (size_t i = 0; i < numSequences; ++i) + inputMask->MaskSection({ sequenceLengths[i], i }, {NDShape::InferredDimension, 1}); + + ValuePtr inputValue = new Value(new NDArrayView(inputShape, inputData.data(), inputData.size(), DeviceDescriptor::CPUDevice(), true), inputMask); + + NDShape reducedOutputShape = {}; + std::vector reducedOutputData(reducedOutputShape.TotalSize()); + ValuePtr reducedOutputValue = new Value(new NDArrayView(reducedOutputShape, reducedOutputData.data(), reducedOutputData.size(), DeviceDescriptor::CPUDevice(), false)); + + NDShape plusOutputShape = plusOutput->Output().Shape().AppendShape({ maxActualSequenceLength, numSequences }); + std::vector plusOutputData(plusOutputShape.TotalSize()); + ValuePtr plusOutputValue = new Value(new NDArrayView(plusOutputShape, plusOutputData.data(), plusOutputData.size(), DeviceDescriptor::CPUDevice(), false), new NDMask(inputMask->Shape(), inputMask->Device())); + + std::unordered_map outputs = { { reducedOutput->Output(), reducedOutputValue }, { plusOutput->Output(), plusOutputValue } }; + auto backpropState = rootFunc->Forward({ { inputVar, inputValue } }, outputs, device, { plusOutput->Output() }); + + // Perform backprop + std::vector rootGradientsData(plusOutputShape.TotalSize(), std::numeric_limits::quiet_NaN()); + for (size_t i = 0; i < numSequences; ++i) + { + for (size_t j = 0; j < maxActualSequenceLength; ++j) + { + size_t sampleIdx = (i * maxActualSequenceLength) + j; + for (size_t k = 0; k < outputDim; ++k) + { + if (j < sequenceLengths[i]) + rootGradientsData[(sampleIdx * outputDim) + k] = 1; + } + } + } + + ValuePtr rootGradientValue = new Value(new NDArrayView(plusOutputShape, rootGradientsData.data(), rootGradientsData.size(), DeviceDescriptor::CPUDevice(), true), inputMask->DeepClone()); + + std::vector plusParameterGradientData(plusParam.Shape().TotalSize()); + std::vector timesParameterGradientData(timesParam.Shape().TotalSize()); + std::vector inputGradientData(inputShape.TotalSize()); + ValuePtr plusParameterGradientValue = new Value(new NDArrayView(plusParam.Shape(), plusParameterGradientData.data(), plusParameterGradientData.size(), DeviceDescriptor::CPUDevice(), false)); + ValuePtr timesParameterGradientValue = new Value(new NDArrayView(timesParam.Shape(), timesParameterGradientData.data(), timesParameterGradientData.size(), DeviceDescriptor::CPUDevice(), false)); + ValuePtr inputGradientValue = new Value(new NDArrayView(inputShape, inputGradientData.data(), inputGradientData.size(), DeviceDescriptor::CPUDevice(), false), inputMask->DeepClone()); + + std::unordered_map outGradients = { { inputVar, inputGradientValue }, { plusParam, plusParameterGradientValue }, { timesParam, timesParameterGradientValue } }; + rootFunc->Backward(backpropState, { { plusOutput->Output(), rootGradientValue } }, outGradients); + + // Verify forward prop results + std::vector expectedPlusOutputData(plusOutputShape.TotalSize(), 0); + ElementType expectedReducedValue = 0; + for (size_t i = 0; i < numSequences; ++i) + { + size_t currentSequenceLength = sequenceLengths[i]; + if (useFutureValue) + { + for (int j = (int)(currentSequenceLength - 1); j >= 0; j--) + { + ElementType value = 0; + for (size_t k = 0; k < inputDim; ++k) + value += (ElementType)(0.5 * inputData[(((i * maxActualSequenceLength) + j) * inputDim) + k]); + + for (size_t k = 0; k < outputDim; ++k) + { + expectedPlusOutputData[(((i * maxActualSequenceLength) + j) * outputDim) + k] = (ElementType)(value + 0.1); + + if (j != (currentSequenceLength - 1)) + expectedPlusOutputData[(((i * maxActualSequenceLength) + j) * outputDim) + k] += expectedPlusOutputData[(((i * maxActualSequenceLength) + (j + 1)) * outputDim) + k]; + } + + expectedReducedValue += (outputDim * (ElementType)((value + 0.1) * (j + 1))); + } + } + else + { + for (size_t j = 0; j < currentSequenceLength; j++) + { + ElementType value = 0; + for (size_t k = 0; k < inputDim; ++k) + value += (ElementType)(0.5 * inputData[(((i * maxActualSequenceLength) + j) * inputDim) + k]); + + for (size_t k = 0; k < outputDim; ++k) + { + expectedPlusOutputData[(((i * maxActualSequenceLength) + j) * outputDim) + k] = (ElementType)(value + 0.1); + + if (j != 0) + expectedPlusOutputData[(((i * maxActualSequenceLength) + j) * outputDim) + k] += expectedPlusOutputData[(((i * maxActualSequenceLength) + (j - 1)) * outputDim) + k]; + } + + expectedReducedValue += (outputDim * (ElementType)((value + 0.1) * (currentSequenceLength - j))); + } + } + } + + FloatingPointVectorCompare(reducedOutputData, std::vector({ expectedReducedValue }), "TestTimesAndPlus: Forward prop results do not match expected results"); + FloatingPointVectorCompare(plusOutputData, expectedPlusOutputData, "TestTimesAndPlus: Forward prop results do not match expected results"); + + // Verify backward prop results + ElementType expectedPlusParameterGradientValue = 0; + for (size_t i = 0; i < numSequences; ++i) + { + size_t currentSequenceLength = sequenceLengths[i]; + expectedPlusParameterGradientValue += (currentSequenceLength * (currentSequenceLength + 1)) / 2; + } + + for (size_t k = 0; k < plusParam.Shape().TotalSize(); ++k) + if (plusParameterGradientData[k] != expectedPlusParameterGradientValue) + throw std::exception("TestSimpleRecurrence: Backprop prop results do not match expected results for Plus params gradients"); + + std::vector expectedTimesParamsGradientValues(timesParam.Shape().TotalSize(), 0); + for (size_t i = 0; i < numSequences; ++i) + { + size_t currentSequenceLength = sequenceLengths[i]; + for (size_t k = 0; k < inputDim; ++k) + { + ElementType gradVal = 0; + for (size_t j = 0; j < currentSequenceLength; j++) + { + if (useFutureValue) + gradVal += (j + 1) * inputData[(((i * maxActualSequenceLength) + j) * inputDim) + k]; + else + gradVal += (currentSequenceLength - j) * inputData[(((i * maxActualSequenceLength) + j) * inputDim) + k]; + } + + for (size_t j = 0; j < outputDim; ++j) + expectedTimesParamsGradientValues[(k * outputDim) + j] += gradVal; + } + } + + FloatingPointVectorCompare(timesParameterGradientData, expectedTimesParamsGradientValues, "TestSimpleRecurrence: Backprop prop results do not match expected results for Times params gradients"); + + std::vector expectedInputGradientValues(inputShape.TotalSize(), 0); + for (size_t i = 0; i < numSequences; ++i) + { + size_t currentSequenceLength = sequenceLengths[i]; + for (size_t j = 0; j < currentSequenceLength; j++) + { + ElementType gradVal = 0; + for (size_t k = 0; k < outputDim; ++k) + { + if (useFutureValue) + gradVal += (ElementType)((j + 1) * 0.5); + else + gradVal += (ElementType)((currentSequenceLength - j) * 0.5); + } + + for (size_t k = 0; k < inputDim; ++k) + expectedInputGradientValues[(((i * maxActualSequenceLength) + j) * inputDim) + k] = gradVal; + } + } + + FloatingPointVectorCompare(inputGradientData, expectedInputGradientValues, "TestSimpleRecurrence: Backprop prop results do not match expected results for Times params gradients"); + } +} + +void RecurrentFunctionTests() +{ + TestSimpleRecurrence(2, 1, 4, 1, DeviceDescriptor::CPUDevice(), 3, false); + TestSimpleRecurrence(11, 9, 16, 7, DeviceDescriptor::GPUDevice(0), 5, true); + TestRecurrentNetworkCreation(DeviceDescriptor::GPUDevice(0)); + TestRecurrentNetworkCreation(DeviceDescriptor::CPUDevice()); +} diff --git a/Source/CNTKv2LibraryDll/LibraryTests/TensorTests.cpp b/Source/CNTKv2LibraryDll/LibraryTests/TensorTests.cpp index b40d408ca..edd96eae7 100644 --- a/Source/CNTKv2LibraryDll/LibraryTests/TensorTests.cpp +++ b/Source/CNTKv2LibraryDll/LibraryTests/TensorTests.cpp @@ -29,17 +29,17 @@ void TestTensorPlus(size_t numAxesLeftOperand, size_t numAxesRightOperand, const for (size_t i = 0; i < leftInputData.size(); ++i) leftInputData[i] = ((ElementType)rand()) / RAND_MAX; - auto leftInputValueShape = leftInputShape.AppendShape({ 1 }); + auto leftInputValueShape = leftInputShape.AppendShape({ 1, 1 }); ValuePtr leftInputValue = new Value(new NDArrayView(leftInputValueShape, leftInputData, true)); std::vector rightInputData(rightInputShape.TotalSize()); for (size_t i = 0; i < rightInputData.size(); ++i) rightInputData[i] = ((ElementType)rand()) / RAND_MAX; - auto rightInputValueShape = rightInputShape.AppendShape({ 1 }); + auto rightInputValueShape = rightInputShape.AppendShape({ 1, 1 }); ValuePtr rightInputValue = new Value(new NDArrayView(rightInputValueShape, rightInputData, true)); - NDShape outputShape = plusFunc->Output().Shape().AppendShape({ 1 }); + NDShape outputShape = plusFunc->Output().Shape().AppendShape({ 1, 1 }); std::vector outputData(outputShape.TotalSize()); ValuePtr outputValue = new Value(new NDArrayView(outputShape, outputData, false)); diff --git a/Source/CNTKv2LibraryDll/NDArrayView.cpp b/Source/CNTKv2LibraryDll/NDArrayView.cpp index 83fc68b1a..b8daf7031 100644 --- a/Source/CNTKv2LibraryDll/NDArrayView.cpp +++ b/Source/CNTKv2LibraryDll/NDArrayView.cpp @@ -15,18 +15,6 @@ using namespace Microsoft::MSR::CNTK; namespace CNTK { - static std::pair GetMatrixDimensions(const NDShape& viewShape) - { - // Ensure none of the shape dimensions are unknown - if (viewShape.HasInferredDimension()) - InvalidArgument("Cannot create an NDArrayView using a view shape that has unknown dimensions for any of it's axes!"); - - size_t matrixRowSize = (viewShape.NumAxes() > 0) ? viewShape[0] : 1; - size_t matrixColSize = (viewShape.NumAxes() > 0) ? viewShape.SubShape(1).TotalSize() : 1; - - return{ matrixRowSize, matrixColSize }; - } - template static void* AllocateTensorView(const NDShape& viewShape, const DeviceDescriptor& device, @@ -103,30 +91,34 @@ namespace CNTK } template - std::shared_ptr> NDArrayView::GetMatrixImpl() const + std::shared_ptr> NDArrayView::GetMatrixImpl(size_t rowColSplitPoint) const { const TensorView* tensorView = GetTensorView(); auto tensorShape = tensorView->GetShape(); if (tensorShape.GetRank() <= 2) return tensorView->AsMatrix(); - // Determine the split point - std::vector dimsToDrop(tensorShape.GetRank(), false); - for (size_t k = 1; k < tensorShape.GetRank(); ++k) - if (tensorShape.CanFlatten(k)) - dimsToDrop[k - 1] = true; + size_t splitPoint = rowColSplitPoint; + if (splitPoint == AutoSelectRowColSplitPoint) + { + // Determine the split point + std::vector dimsToDrop(tensorShape.GetRank(), false); + for (size_t k = 1; k < tensorShape.GetRank(); ++k) + if (tensorShape.CanFlatten(k)) + dimsToDrop[k - 1] = true; - // There should be at most 2 dims we cannot drop - auto numDimsThatCannotBeDropped = std::count_if(dimsToDrop.begin(), dimsToDrop.end(), [](const bool& val) { - return !val; - }); + // There should be at most 2 dims we cannot drop + auto numDimsThatCannotBeDropped = std::count_if(dimsToDrop.begin(), dimsToDrop.end(), [](const bool& val) { + return !val; + }); - if (numDimsThatCannotBeDropped > 2) - LogicError("The TensorView underlying this NDArrayView cannot be flattened to a Matrix"); + if (numDimsThatCannotBeDropped > 2) + LogicError("The TensorView underlying this NDArrayView cannot be flattened to a Matrix"); - size_t splitPoint = 0; - while (dimsToDrop[splitPoint]) - splitPoint++; + splitPoint = 0; + while (dimsToDrop[splitPoint]) + splitPoint++; + } tensorShape.FlattenTo2DInPlace(splitPoint, "NDArrayView::GetMatrix"); @@ -134,18 +126,18 @@ namespace CNTK } template - std::shared_ptr> NDArrayView::GetMatrix() const + std::shared_ptr> NDArrayView::GetMatrix(size_t rowColSplitPoint/* = AutoSelectRowColSplitPoint*/) const { - return GetMatrixImpl(); + return GetMatrixImpl(rowColSplitPoint); } template - std::shared_ptr> NDArrayView::GetWritableMatrix() + std::shared_ptr> NDArrayView::GetWritableMatrix(size_t rowColSplitPoint/* = AutoSelectRowColSplitPoint*/) { if (IsReadOnly()) LogicError("NDArrayView::GetWritableMatrix: Cannot get writable Matrix from a read-only NDArrayView"); - return GetMatrixImpl(); + return GetMatrixImpl(rowColSplitPoint); } template @@ -168,7 +160,7 @@ namespace CNTK NDArrayViewPtr NDArrayView::DeepClone(bool readOnly/* = false*/) const { - NDArrayViewPtr newView = new NDArrayView(this->DataType(), this->Shape(), nullptr, 0, this->Device(), readOnly); + NDArrayViewPtr newView(new NDArrayView(this->DataType(), this->Shape(), nullptr, 0, this->Device(), readOnly), [](_ReferenceCounter* ptr) { delete ptr; }); switch (m_dataType) { case DataType::Float: @@ -242,9 +234,7 @@ namespace CNTK } auto aliasView = new NDArrayView(DataType(), Device(), StorageFormat(), Shape(), IsReadOnly() || readOnly, tensorView);; - return NDArrayViewPtr(aliasView, [](_ReferenceCounter* ptr) { - delete ptr; - }); + return NDArrayViewPtr(aliasView, [](_ReferenceCounter* ptr) { delete ptr; }); } // TODO: This could actually be strided? @@ -278,18 +268,16 @@ namespace CNTK auto tensorView = new TensorView(randomUniformMatrix, AsTensorShape(shape)); auto view = new NDArrayView(GetDataType(), device, StorageFormat::Dense, shape, false, tensorView); - return NDArrayViewPtr(view, [](_ReferenceCounter* ptr) { - delete ptr; - }); + return NDArrayViewPtr(view, [](_ReferenceCounter* ptr) { delete ptr; }); } // Explicit template instantiations template CNTK_API NDArrayViewPtr NDArrayView::RandomUniform(const NDShape& shape, double rangeStart, double rangeEnd, unsigned long seed, const DeviceDescriptor& device/* = DeviceDescriptor::DefaultDevice()*/); + template CNTK_API NDArrayViewPtr NDArrayView::RandomUniform(const NDShape& shape, double rangeStart, double rangeEnd, unsigned long seed, const DeviceDescriptor& device/* = DeviceDescriptor::DefaultDevice()*/); template CNTK_API const float* NDArrayView::DataBuffer() const; template CNTK_API const double* NDArrayView::DataBuffer() const; template CNTK_API float* NDArrayView::WritableDataBuffer(); template CNTK_API double* NDArrayView::WritableDataBuffer(); - } diff --git a/Source/CNTKv2LibraryDll/NDMask.cpp b/Source/CNTKv2LibraryDll/NDMask.cpp new file mode 100644 index 000000000..244edcebb --- /dev/null +++ b/Source/CNTKv2LibraryDll/NDMask.cpp @@ -0,0 +1,109 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +#include "stdafx.h" +#include "CNTKLibrary.h" +#include "Utils.h" +#include "Matrix.h" +#include +#include "TensorShape.h" + +using namespace Microsoft::MSR::CNTK; + +namespace CNTK +{ + static Matrix* AllocateMatrix(const NDShape& viewShape, const DeviceDescriptor& device) + { + auto matrixDims = GetMatrixDimensions(viewShape); + auto maskMatrix = new Matrix(matrixDims.first, matrixDims.second, AsCNTKImplDeviceId(device)); + maskMatrix->SetValue(1); + + return maskMatrix; + } + + NDMask::NDMask(const NDShape& shape, Matrix* matrix) + : m_device(AsDeviceDescriptor(matrix->GetDeviceId())), m_maskShape(shape), m_matrixView(matrix) + { + } + + NDMask::NDMask(const NDShape& shape, const DeviceDescriptor& device/* = DeviceDescriptor::DefaultDevice()*/) + : NDMask(shape, AllocateMatrix(shape, device)) + { + if (shape.NumAxes() > 2) + LogicError("NDMask instances with more than 2 axes are currently unsupported"); + } + + NDMask::~NDMask() + { + delete m_matrixView; + } + + void NDMask::MaskSection(const std::vector& sectionOffset, const NDShape& sectionShape) + { + // TODO: Implement batching of masking operation for masks residing on GPUs to avoid making + // GPU invocations for each MaskSection call. + + if (sectionOffset.size() > m_maskShape.NumAxes()) + LogicError("NDMask::MaskSection: The sectionOffset cannot have dimensionality higher than the number of axes of 'this' mask"); + + if (sectionShape.NumAxes() > m_maskShape.NumAxes()) + LogicError("NDMask::MaskSection: The section shape cannot have an axes count higher than the number of axes of 'this' mask"); + + std::vector offset(m_maskShape.NumAxes(), 0); + for (size_t i = 0; i < sectionOffset.size(); ++i) + offset[i] = sectionOffset[i]; + + NDShape shape = sectionShape.AppendShape(NDShape(m_maskShape.NumAxes() - sectionShape.NumAxes(), NDShape::InferredDimension)); + + auto maskMatrix = GetMatrix(); + size_t rowOffset = offset[0]; + size_t colOffset = offset[1]; + size_t sliceRowLength = (shape[0] != NDShape::InferredDimension) ? shape[0] : (maskMatrix->GetNumRows() - rowOffset); + size_t sliceColLength = (shape[1] != NDShape::InferredDimension) ? shape[1] : (maskMatrix->GetNumCols() - colOffset); + if ((rowOffset == 0) && (sliceRowLength == maskMatrix->GetNumRows())) + maskMatrix->ColumnSlice(colOffset, sliceColLength).SetValue(0); + else + { + // Since Matrix does not support strides in the row dimension, we will need to create separate slices for each column + for (size_t i = colOffset; i < (colOffset + sliceColLength); ++i) + { + auto column = maskMatrix->ColumnSlice(i, 1); + column.Reshape(1, maskMatrix->GetNumRows()); + column.ColumnSlice(rowOffset, sliceRowLength).SetValue(0); + } + } + } + + void NDMask::Clear() + { + GetMatrix()->SetValue(1); + } + + Matrix* NDMask::GetMatrix() const + { + return m_matrixView; + } + + void NDMask::CopyFrom(const NDMask& source) + { + if (source.Shape() != Shape()) + InvalidArgument("NDMask::CopyFrom: The 'source' mask's shape must be same as the shape of this NDMask"); + + GetMatrix()->AssignValuesOf(*source.GetMatrix()); + } + + NDMaskPtr NDMask::DeepClone() const + { + NDMaskPtr newMask = new NDMask(this->Shape(), this->Device()); + newMask->CopyFrom(*this); + + return NDMaskPtr(newMask, [](_ReferenceCounter* ptr) { delete ptr; }); + } + + NDMaskPtr NDMask::Alias() const + { + return NDMaskPtr(new NDMask(this->Shape(), new Matrix(GetMatrix()->AsReference())), [](_ReferenceCounter* ptr) { delete ptr; }); + } +} diff --git a/Source/CNTKv2LibraryDll/Utils.cpp b/Source/CNTKv2LibraryDll/Utils.cpp index f8aa8eb61..3e8214b41 100644 --- a/Source/CNTKv2LibraryDll/Utils.cpp +++ b/Source/CNTKv2LibraryDll/Utils.cpp @@ -215,6 +215,7 @@ namespace CNTK // Explicit template instantiations template class _SimpleSet; template class _SimpleSet; + template class _SimpleSet; template bool operator==(const _SimpleSet& first, const _SimpleSet& second); @@ -314,6 +315,7 @@ namespace CNTK // Explicit template instantiations template class _SimpleMap; + template class _SimpleMap; #pragma endregion _SimpleMap @@ -351,4 +353,14 @@ namespace CNTK { return (*m_dictionaryData)[key]; } + + DictionaryValue Dictionary::operator[](const wchar_t* key) const + { + return m_dictionaryData->at(key); + } + + bool Dictionary::Contains(const wchar_t* key) const + { + return (m_dictionaryData->find(key) != m_dictionaryData->end()); + } } diff --git a/Source/CNTKv2LibraryDll/Utils.h b/Source/CNTKv2LibraryDll/Utils.h index a056ebfa7..116fb1a05 100644 --- a/Source/CNTKv2LibraryDll/Utils.h +++ b/Source/CNTKv2LibraryDll/Utils.h @@ -233,9 +233,22 @@ namespace CNTK return operator[](key.c_str()); } - private: DictionaryValue& operator[](const wchar_t* key); + DictionaryValue operator[](const std::wstring& key) const + { + return operator[](key.c_str()); + } + + DictionaryValue operator[](const wchar_t* key) const; + + bool Contains(const std::wstring& key) const + { + return Contains(key.c_str()); + } + + bool Contains(const wchar_t* key) const; + private: std::unordered_map* m_dictionaryData; }; @@ -308,4 +321,16 @@ namespace CNTK return shapeString + "]"; } + + inline std::pair GetMatrixDimensions(const NDShape& viewShape) + { + // Ensure none of the shape dimensions are unknown + if (viewShape.HasInferredDimension()) + InvalidArgument("Cannot create an NDArrayView using a view shape that has unknown dimensions for any of it's axes!"); + + size_t matrixRowSize = (viewShape.NumAxes() > 0) ? viewShape[0] : 1; + size_t matrixColSize = (viewShape.NumAxes() > 0) ? viewShape.SubShape(1).TotalSize() : 1; + + return{ matrixRowSize, matrixColSize }; + } } diff --git a/Source/CNTKv2LibraryDll/Value.cpp b/Source/CNTKv2LibraryDll/Value.cpp index 98deb8cf2..4a6c0c697 100644 --- a/Source/CNTKv2LibraryDll/Value.cpp +++ b/Source/CNTKv2LibraryDll/Value.cpp @@ -8,19 +8,125 @@ namespace CNTK { Value::Value(const NDArrayViewPtr& data) - : m_data(data) + : Value(data, nullptr) { } + Value::Value(const NDArrayViewPtr& data, const NDMaskPtr& mask) + : m_data(data), m_mask(mask) + { + if ((mask != nullptr) && (mask->Shape().NumAxes() > data->Shape().NumAxes())) + InvalidArgument("The number of axes of the mask of a Value object cannot exceed the number of axes of the data NDArrayView object"); + + if (mask != nullptr) + { + auto dataShape = data->Shape(); + auto maskShape = mask->Shape(); + if (dataShape.SubShape(dataShape.NumAxes() - maskShape.NumAxes()) != maskShape) + InvalidArgument("Invalid Value object; the data and mask are incompatible. The trailing dimensions of the data do not match the dimensions of the mask"); + } + } + + template + /*static*/ ValuePtr Value::Create(const NDShape& sampleShape, const std::vector>& sequences, const DeviceDescriptor& device, bool readOnly/* = false*/) + { + size_t numSequences = sequences.size(); + std::vector sequenceLengths(numSequences); + size_t sampleSize = sampleShape.TotalSize(); + size_t maxSequenceLength = 0; + bool needsMask = false; + for (size_t i = 0; i < numSequences; ++i) + { + sequenceLengths[i] = sequences[i].size() / sampleSize; + if (maxSequenceLength < sequenceLengths[i]) + maxSequenceLength = sequenceLengths[i]; + + if ((i > 0) && (sequenceLengths[i - 1] != sequenceLengths[i])) + needsMask = true; + } + + NDShape valueDataShape = sampleShape.AppendShape({ maxSequenceLength, numSequences }); + NDArrayViewPtr valueData(new NDArrayView(GetDataType(), valueDataShape, DeviceDescriptor::CPUDevice()), [](_ReferenceCounter* ptr) { delete ptr; }); + ElementType* dataBuffer = valueData->WritableDataBuffer(); + for (size_t i = 0; i < numSequences; ++i) + std::copy(sequences[i].data(), sequences[i].data() + sequences[i].size(), dataBuffer + (maxSequenceLength * i * sampleSize)); + + NDArrayViewPtr deviceValueData; + if (device == DeviceDescriptor::CPUDevice()) + { + if (readOnly) + deviceValueData = valueData->Alias(true); + else + deviceValueData = valueData; + } + else + { + deviceValueData = NDArrayViewPtr(new NDArrayView(GetDataType(), valueDataShape, device), [](_ReferenceCounter* ptr) { delete ptr; }); + deviceValueData->CopyFrom(*valueData); + if (readOnly) + deviceValueData = deviceValueData->Alias(true); + } + + NDMaskPtr deviceValueMask; + if (needsMask) + { + NDShape valueMaskShape = { maxSequenceLength, numSequences }; + deviceValueMask = NDMaskPtr(new NDMask(valueMaskShape, device), [](_ReferenceCounter* ptr) {delete ptr; }); + for (size_t i = 0; i < numSequences; ++i) + deviceValueMask->MaskSection({ sequenceLengths[i], i }, { NDShape::InferredDimension, 1 }); + } + + return ValuePtr(new Value(deviceValueData, deviceValueMask), [](_ReferenceCounter* ptr) { delete ptr; }); + } + /*virtual*/ Value::~Value() { } /*virtual*/ NDArrayViewPtr Value::Data() const { - if (m_data == nullptr) - LogicError("The data NDArrayView underlying this 'Value' object is null!"); - + // TODO: Check if this is a derived type and throw an exception in that case return m_data; } + + /*virtual*/ NDMaskPtr Value::Mask() const + { + // TODO: Check if this is a derived type and throw an exception in that case + return m_mask; + } + + /*virtual*/ ValuePtr Value::DeepClone(bool readOnly/* = false*/) const + { + // TODO: Check if this is a derived type and throw an exception in that case + return ValuePtr(new Value(Data()->DeepClone(readOnly), (Mask() != nullptr) ? Mask()->DeepClone() : nullptr), [](_ReferenceCounter* ptr) { delete ptr; }); + } + + /*virtual*/ ValuePtr Value::Alias(bool readOnly/* = false*/) const + { + // TODO: Check if this is a derived type and throw an exception in that case + return ValuePtr(new Value(Data()->Alias(readOnly), (Mask() != nullptr) ? Mask()->Alias() : nullptr), [](_ReferenceCounter* ptr) { delete ptr; }); + } + + /*virtual*/ void Value::CopyFrom(const Value& source) + { + // TODO: Check if this is a derived type and throw an exception in that case + Data()->CopyFrom(*source.Data()); + if ((Mask() == nullptr) && (source.Mask() != nullptr)) + InvalidArgument("Value::CopyFrom: Invalid source object; Cannot copy a Value with a mask into 'this' Value that does not have a mask."); + + if (source.Mask() != nullptr) + Mask()->CopyFrom(*source.Mask()); + else + { + if (Mask() != nullptr) + { + // Clear the mask + Mask()->Clear(); + } + } + } + + // Explicit template instantiations + template /*static*/ CNTK_API ValuePtr Value::Create(const NDShape& sampleShape, const std::vector>& sequences, const DeviceDescriptor& device, bool readOnly/* = false*/); + template /*static*/ CNTK_API ValuePtr Value::Create(const NDShape& sampleShape, const std::vector>& sequences, const DeviceDescriptor& device, bool readOnly/* = false*/); } diff --git a/Source/Common/Include/Sequences.h b/Source/Common/Include/Sequences.h index f74d30dab..20b25c1d4 100644 --- a/Source/Common/Include/Sequences.h +++ b/Source/Common/Include/Sequences.h @@ -196,7 +196,6 @@ public: // packing algorithm // - width: maximum width of structure; set to maximum over sequence lengths // - inputSequences: vector of input SequenceInfo records (only seqId and GetNumTimeSteps() are used) - // - [out] *pMBLayout: MBLayout that describes the created packed sequence set // - placement, rowAllocations: temp buffers (passed in to be able to optimize memory allocations) template void InitAsPackedSequences(const SequenceInfoVector& inputSequences, @@ -255,6 +254,12 @@ public: size_t GetNumTimeSteps() const { return m_numTimeSteps; } size_t GetNumParallelSequences() const { return m_numParallelSequences; } + size_t GetNumSequences() const + { + return std::count_if(m_sequences.begin(), m_sequences.end(), [](const SequenceInfo& sequence) { + return sequence.seqId != GAP_SEQUENCE_ID; + }); + } // axis names are for now only a debugging aid // In the future, there will be a mechanism to denote that axes are meant to be the same. diff --git a/Source/Math/CPUMatrix.cpp b/Source/Math/CPUMatrix.cpp index 53ef7927f..3603f0a02 100644 --- a/Source/Math/CPUMatrix.cpp +++ b/Source/Math/CPUMatrix.cpp @@ -6313,8 +6313,10 @@ template void CPUMatrix::SetValue(CPUMatrix const&); //template void CPUMatrix::SetValue(GPUSparseMatrix const&); template void CPUMatrix::RequireSize(const size_t numRows, const size_t numCols, bool growOnly); template void CPUMatrix::Resize(const size_t numRows, const size_t numCols, bool growOnly); +template char* CPUMatrix::CopyToArray(void) const; template void CPUMatrix::CopySection(size_t numRows, size_t numCols, char* dst, size_t colStride) const; +template void CPUMatrix::Reshape(const size_t, const size_t); template CPUMatrix::CPUMatrix(const size_t, const size_t, int*, const size_t); template CPUMatrix::~CPUMatrix(); diff --git a/Source/Math/GPUMatrix.cu b/Source/Math/GPUMatrix.cu index cd2bd1a80..01d91c9bd 100644 --- a/Source/Math/GPUMatrix.cu +++ b/Source/Math/GPUMatrix.cu @@ -4410,6 +4410,7 @@ template void GPUMatrix::SetValue(GPUMatrix const&); //template void GPUMatrix::SetValue(GPUSparseMatrix const&); template void GPUMatrix::CopySection(size_t numRows, size_t numCols, char* dst, size_t colStride) const; +template void GPUMatrix::Reshape(const size_t, const size_t); template GPUMatrix::GPUMatrix(const size_t, const size_t, int, int*, const size_t); template GPUMatrix::~GPUMatrix(); diff --git a/Source/Math/GPUSparseMatrix.cu b/Source/Math/GPUSparseMatrix.cu index f8b3167e0..772a3e067 100644 --- a/Source/Math/GPUSparseMatrix.cu +++ b/Source/Math/GPUSparseMatrix.cu @@ -2676,6 +2676,7 @@ template GPUSparseMatrix::~GPUSparseMatrix(); template GPUSparseMatrix GPUSparseMatrix::ColumnSlice(size_t, size_t) const; template GPUMatrix GPUSparseMatrix::CopyColumnSliceToDense(size_t, size_t) const; template GPUSparseMatrix& GPUSparseMatrix::operator=(GPUSparseMatrix&&); +template void GPUSparseMatrix::Reshape(const size_t, const size_t); template GPUSparseMatrix::GPUSparseMatrix(DEVICEID_TYPE, const MatrixFormat); template GPUSparseMatrix::~GPUSparseMatrix(); diff --git a/Source/Math/Matrix.cpp b/Source/Math/Matrix.cpp index 399d6b3fe..3645b0c88 100644 --- a/Source/Math/Matrix.cpp +++ b/Source/Math/Matrix.cpp @@ -5404,6 +5404,8 @@ template void Matrix::SetValue(const Matrix&); template void Matrix::AssignValuesOf (const Matrix&); template bool Matrix::IsEmpty() const; template void Matrix::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, bool growOnly); +template void Matrix::Reshape(const size_t, const size_t); +template char* Matrix::CopyToArray(void) const; template Matrix::Matrix(const size_t, const size_t, int*, DEVICEID_TYPE, const size_t, const size_t);