merged with master. One thing not compiling
This commit is contained in:
Коммит
59ae7b2c77
|
@ -1657,7 +1657,7 @@ void PrintBuiltInfo()
|
|||
fprintf(stderr, "\t\tCUDA_PATH: %s\n", _CUDA_PATH_);
|
||||
#endif
|
||||
#ifdef _CUB_PATH_
|
||||
fprintf(stderr, "\t\tCUDA_PATH: %s\n", _CUB_PATH_);
|
||||
fprintf(stderr, "\t\tCUB_PATH: %s\n", _CUB_PATH_);
|
||||
#endif
|
||||
#ifdef _GIT_EXIST
|
||||
fprintf(stderr, "\t\tBuild Branch: %s\n", _BUILDBRANCH_);
|
||||
|
|
|
@ -294,7 +294,7 @@ public:
|
|||
m_deviceId = EnforceOneGPUOnly(m_deviceId); // see EnforceOneGPUOnly() for comment on what this is
|
||||
}
|
||||
|
||||
DEVICEID_TYPE GetDeviceId() { return m_deviceId; }
|
||||
DEVICEID_TYPE GetDeviceId() const { return m_deviceId; }
|
||||
|
||||
unsigned long GetRandomSeedOffset() { return m_randomSeedOffset; }
|
||||
void SetRandomSeedOffset(unsigned long value) { m_randomSeedOffset = value; }
|
||||
|
|
|
@ -119,6 +119,25 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
virtual ~IComputationNode() { }
|
||||
};
|
||||
|
||||
// =======================================================================
|
||||
// This provide a interface for stateful node (e.g., DelayNodeBase) and definition of state
|
||||
// This interface allows to Export and Import state from elsewhere
|
||||
// It is needed when doing sub-minibatch implementation
|
||||
// =======================================================================
|
||||
|
||||
class INodeState: public std::enable_shared_from_this<INodeState>
|
||||
{
|
||||
public:
|
||||
virtual ~INodeState() {}
|
||||
};
|
||||
|
||||
struct /*interface*/ IStateFulNode
|
||||
{
|
||||
typedef std::shared_ptr<INodeState> NodeStatePtr;
|
||||
virtual NodeStatePtr ExportState() = 0;
|
||||
virtual void ImportState(const NodeStatePtr& pImportedState) = 0;
|
||||
};
|
||||
|
||||
// =======================================================================
|
||||
// ComputationNetworkOwnedNodeState -- class to collect ComputationNode members that are really owned by ComputationNetwork
|
||||
// These members are only to be set, changed, and read by ComputationNetwork code.
|
||||
|
@ -1475,6 +1494,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
// =======================================================================
|
||||
// helper macro to ease access to base members in presence of C++ two-phase name lookup
|
||||
// =======================================================================
|
||||
|
|
|
@ -24,6 +24,51 @@
|
|||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// The following defines a state of a delay node which is going to be exported to others (saving for the next minibatch)
|
||||
// -----------------------------------------------------------------------
|
||||
template<class ElemType>
|
||||
class DelayedValueNodeState: public INodeState
|
||||
{
|
||||
|
||||
public:
|
||||
DelayedValueNodeState(int deviceID) :
|
||||
m_cachedActivity((size_t)0, (size_t)0, deviceID), m_delayedActivationMBLayout(nullptr), m_isEmpty(true)
|
||||
{ }
|
||||
void CacheDelayedMBLayout(const MBLayoutPtr& pMBLayout)
|
||||
{
|
||||
m_delayedActivationMBLayout = make_shared<MBLayout>();
|
||||
m_delayedActivationMBLayout->CopyFrom(pMBLayout);
|
||||
}
|
||||
void CacheState(const Matrix<ElemType>& cachedActivity)
|
||||
{
|
||||
m_cachedActivity.SetValue(cachedActivity);
|
||||
m_isEmpty = false;
|
||||
}
|
||||
void ExportDelayedMBLayout(MBLayoutPtr& pMBLayout)
|
||||
{
|
||||
pMBLayout->CopyFrom(m_delayedActivationMBLayout);
|
||||
}
|
||||
bool IsEmpty()
|
||||
{
|
||||
return m_isEmpty;
|
||||
}
|
||||
const Matrix<ElemType>& ExportCachedActivity()
|
||||
{
|
||||
return m_cachedActivity;
|
||||
}
|
||||
|
||||
protected:
|
||||
Matrix<ElemType> m_cachedActivity; // 1 column per parallel sequence
|
||||
// MBLayoutPtr m_shiftedMBLayout;
|
||||
// Currently, we only support saving state for m_timeStep == 1
|
||||
// there is no need for this m_shiftedMBLayout if m_timeStep == 1
|
||||
MBLayoutPtr m_delayedActivationMBLayout;
|
||||
bool m_isEmpty; // in some case
|
||||
// (e.g., at the boundary of sentence end or begin/full utterance mode), we don't need to store state (but we do need to need know m_delayedActivationMBLayout)
|
||||
};
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// DelayedValueNodeBase (input) -- abstract base class for PastValueNode and FutureValueNode to hold all shared code
|
||||
// The two differ in the step direction, some loop directions, and sequence-boundary flags.
|
||||
|
@ -31,9 +76,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// TODO: 'direction' is really too general. signOfTimeOffset?
|
||||
template<class ElemType, int direction/*-1 for Past/left-to-right or +1 for Future/right-to-left*/, MinibatchPackingFlags SequenceStart_or_End/*-Start or -End*/>
|
||||
class DelayedValueNodeBase : public ComputationNode<ElemType>, public ILateAttachingNode, public NumInputs<1>
|
||||
class DelayedValueNodeBase : public ComputationNode<ElemType>, public
|
||||
ILateAttachingNode, public IStateFulNode, public NumInputs<1>
|
||||
{
|
||||
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
|
||||
typedef std::shared_ptr<DelayedValueNodeState<ElemType>> DelayedNodeStatePtr;
|
||||
static const std::wstring TypeName() { return L"DelayedValue"; }
|
||||
private:
|
||||
void Init(size_t row_size, size_t col_size, ElemType initialActivationValue = (ElemType)DEFAULT_HIDDEN_ACTIVATION)
|
||||
|
@ -352,6 +399,129 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
}
|
||||
|
||||
//========================================
|
||||
// implement the IStateFulNode interface
|
||||
//========================================
|
||||
|
||||
virtual NodeStatePtr ExportState() override
|
||||
{
|
||||
NodeStatePtr pExportedState;
|
||||
size_t nT = m_pMBLayout->GetNumTimeSteps();
|
||||
size_t nU = m_pMBLayout->GetNumParallelSequences();
|
||||
int dir = direction;
|
||||
if (m_timeStep != 1)
|
||||
{
|
||||
// not support yet; give user a hint
|
||||
RuntimeError("Currently importing/exporting state info for timeStep>1 is not supported. Contact erw@microsoft.com for more detail");
|
||||
}
|
||||
if (dir == -1) // we look into past
|
||||
{
|
||||
bool allAtBoundary = true;
|
||||
// if the current last frames are all sentence end or no feature , there is no need to carry on state info
|
||||
if (m_pMBLayout->Is(nT-1, MinibatchPackingFlags::SequenceEnd | MinibatchPackingFlags::NoFeature))
|
||||
{
|
||||
for (size_t u = 0; u < nU; u++)
|
||||
{
|
||||
if (!m_pMBLayout->Is(u, nT - 1, MinibatchPackingFlags::SequenceEnd | MinibatchPackingFlags::NoFeature))
|
||||
{
|
||||
allAtBoundary = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
allAtBoundary = false;
|
||||
}
|
||||
|
||||
if (allAtBoundary)
|
||||
{
|
||||
auto pState = make_shared<DelayedValueNodeState<ElemType>>(m_deviceId);
|
||||
pState->CacheDelayedMBLayout(m_delayedActivationMBLayout);
|
||||
// return an empty one
|
||||
}
|
||||
else
|
||||
{
|
||||
auto pState = make_shared<DelayedValueNodeState<ElemType>>(m_deviceId);
|
||||
//pState->CacheState(FunctionValues().Reshaped(nD*nU, nT).RowSlice(nD*(nT - 1), nD));
|
||||
pState->CacheState(m_delayedActivation.ColumnSlice((nT - 1)*nU, nU));
|
||||
pState->CacheDelayedMBLayout(m_delayedActivationMBLayout);
|
||||
pExportedState = pState;
|
||||
}
|
||||
}
|
||||
if (dir == 1) // we look into future
|
||||
{
|
||||
// TODO: check whether all at boundary and don't carry state if it is the case
|
||||
size_t nT = m_pMBLayout->GetNumTimeSteps();
|
||||
size_t nU = m_pMBLayout->GetNumParallelSequences();
|
||||
bool allAtBoundary = true;
|
||||
if (m_pMBLayout->Is(0, MinibatchPackingFlags::NoFeature | MinibatchPackingFlags::SequenceStart))
|
||||
{
|
||||
for (size_t u = 0; u < nU; u++)
|
||||
{
|
||||
if (!m_pMBLayout->Is(u, 0, MinibatchPackingFlags::SequenceStart | MinibatchPackingFlags::NoFeature))
|
||||
{
|
||||
allAtBoundary = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (allAtBoundary)
|
||||
{
|
||||
auto pState = make_shared<DelayedValueNodeState<ElemType>>(m_deviceId);
|
||||
pState->CacheDelayedMBLayout(m_delayedActivationMBLayout);
|
||||
pExportedState = pState;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto pState = make_shared<DelayedValueNodeState<ElemType>>(m_deviceId);
|
||||
pState->CacheState(m_delayedActivation.ColumnSlice((nT-1)*nU, nU));
|
||||
pState->CacheDelayedMBLayout(m_delayedActivationMBLayout);
|
||||
pExportedState = pState;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
if (dir != -1 && dir != 1)
|
||||
{
|
||||
RuntimeError("Unrecognized direction in DelayedValueNodeBase");
|
||||
}
|
||||
return pExportedState;
|
||||
}
|
||||
virtual void ImportState(const NodeStatePtr& pImportedState) override
|
||||
{
|
||||
DelayedNodeStatePtr pState = dynamic_pointer_cast<DelayedValueNodeState<ElemType>> (pImportedState);
|
||||
|
||||
if (!pState)
|
||||
RuntimeError("Expecting DelayValueNodeState after down casting");
|
||||
|
||||
pState->ExportDelayedMBLayout(m_delayedActivationMBLayout); // pstate copy to m_delayedActivationMBLayout
|
||||
if (pState->IsEmpty())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const Matrix<ElemType>& delayedActivation = pState->ExportCachedActivity();
|
||||
size_t nT = m_delayedActivationMBLayout->GetNumTimeSteps();
|
||||
size_t nU = m_delayedActivationMBLayout->GetNumParallelSequences();
|
||||
|
||||
int dir = direction;
|
||||
if (dir == -1) // looking backward
|
||||
{
|
||||
m_delayedActivation.SetColumnSlice(delayedActivation, (nT - 1)*nU, nU);
|
||||
}
|
||||
if (dir == 1)
|
||||
{
|
||||
//m_delayedActivation.CopyColumnsStrided(delayedActivation, nU, 1, nT);
|
||||
m_delayedActivation.SetColumnSlice(delayedActivation, 0, nU);
|
||||
}
|
||||
if (dir != -1 && dir == 1)
|
||||
{// it is really a compile error ?
|
||||
RuntimeError("Unrecognized direction in DelayedValueNodeBase");
|
||||
}
|
||||
|
||||
}
|
||||
protected:
|
||||
|
||||
ElemType m_initialActivationValue; // starting value for hidden activation vector at boundary
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
#include <map>
|
||||
#include "TrainingCriterionNodes.h"
|
||||
|
||||
//#define SMB_DEBUG
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
/*static*/ struct DataReaderHelpers
|
||||
|
@ -166,4 +168,404 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
};
|
||||
|
||||
// SubminibatchHelpers
|
||||
// Helper for sub-minibatch implementation
|
||||
// A sub-minibathc is a part of a minibatch which helps computing large minibatches that cannot load into GPU memory in one forward-backward computation
|
||||
// The usage would be :
|
||||
// SubminibatchHelpers sbhelper;
|
||||
// for (;;)
|
||||
// {
|
||||
// size_t nsb=sb.GetMinibatchIntoCache(...);
|
||||
// for (size_t i=0; i<nsb; i++)
|
||||
// {
|
||||
// sbhelper.GetSubMinibatchToNet(i);
|
||||
// net.Evaluate(criterionNodes[0]);
|
||||
// sbhelper.DoneWithCurrentSubMinibatch();
|
||||
// }
|
||||
// UpdateWeights(...);
|
||||
// }
|
||||
|
||||
template<class ElemType>
|
||||
class SubminibatchDispatcher
|
||||
{
|
||||
private:
|
||||
typedef std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> Lattice;
|
||||
typedef std::vector<size_t> Uid;
|
||||
typedef std::vector<size_t> ExtrauttMap;
|
||||
|
||||
typedef std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>>* LatticePtr;
|
||||
typedef std::vector<size_t>* UidPtr;
|
||||
typedef std::vector<size_t>* ExtrauttMapPtr;
|
||||
typedef std::map<std::wstring, Matrix<ElemType>*> Matrices;
|
||||
|
||||
|
||||
// member variables served as caching space
|
||||
Matrices m_inputMatricesCache;
|
||||
MBLayoutPtr m_MBLayoutCache;
|
||||
LatticePtr m_LatticeCache;
|
||||
UidPtr m_uidCache;
|
||||
ExtrauttMapPtr m_extrauttmapCache;
|
||||
shared_ptr<Matrix<ElemType>> m_NetCriterionAccumulator;
|
||||
shared_ptr<Matrix<ElemType>> m_NetEvaluationAccumulator;
|
||||
std::map<wstring, vector<shared_ptr<INodeState>>> m_NetStates; // m_NetStatefulNodes[node][i] caches the state of i-th subminibatch of node
|
||||
|
||||
|
||||
Matrices m_CachedGraident;
|
||||
// we also need to remember where to put into the net
|
||||
MBLayoutPtr m_NetMBLayoutPtr;
|
||||
std::map<wstring, shared_ptr<ComputationNode<ElemType>>> m_LearnableNodePtr;
|
||||
// followings are lattice-related
|
||||
Matrices m_NetInputMatrixPtr;
|
||||
LatticePtr m_NetLatticePtr;
|
||||
UidPtr m_NetUidPtr;
|
||||
ExtrauttMapPtr m_NetExtrauttMapPtr;
|
||||
// we remember the pointer to the learnable Nodes so that we can accumulate the gradient once a sub-minibatch is done
|
||||
|
||||
|
||||
size_t m_numParallelSequences; // number of paralle sequence in the cached matrix and MBLayout
|
||||
size_t m_numSubminibatches; // how many subminibatches we are going to use ?
|
||||
|
||||
std::vector<shared_ptr<ComputationNode<ElemType>>> m_NetCriterionNodes;
|
||||
std::vector<shared_ptr<ComputationNode<ElemType>>> m_NetEvaluationNodes;
|
||||
std::map<wstring, shared_ptr<IStateFulNode>> m_NetStatefulNodes; // we need to Export/Import states of stateful nodes when we swtich subminibatches
|
||||
|
||||
private:
|
||||
|
||||
void EnumerateStatefulNodeWithRoot(ComputationNetwork& net, ComputationNodeBasePtr root, std::map<wstring, shared_ptr<IStateFulNode>>& statefulnode)
|
||||
{
|
||||
std::list<ComputationNodeBasePtr> evalorder = net.GetEvalOrder(root, false);
|
||||
for (auto& x : evalorder)
|
||||
{
|
||||
wstring name = x->GetName();
|
||||
if (statefulnode.find(name )!=statefulnode.end()) continue; // already in the list
|
||||
shared_ptr<IStateFulNode> pNode = dynamic_pointer_cast<IStateFulNode>(x);
|
||||
if (pNode)
|
||||
{
|
||||
statefulnode[name] = pNode;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::map<wstring, shared_ptr<IStateFulNode>> EnumerateStatefulNode(ComputationNetwork& net,
|
||||
const std::vector<ComputationNodeBasePtr>& criterionNode,
|
||||
const std::vector<ComputationNodeBasePtr>& evaluationNode)
|
||||
{
|
||||
std::map<wstring, shared_ptr<IStateFulNode>> statefulnodes;
|
||||
for (auto& root : criterionNode)
|
||||
{
|
||||
EnumerateStatefulNodeWithRoot(net, root, statefulnodes);
|
||||
}
|
||||
for (auto& root : evaluationNode)
|
||||
{
|
||||
EnumerateStatefulNodeWithRoot(net, root, statefulnodes);
|
||||
}
|
||||
return statefulnodes;
|
||||
}
|
||||
|
||||
public:
|
||||
SubminibatchDispatcher() :
|
||||
m_MBLayoutCache(nullptr), m_LatticeCache(nullptr), m_uidCache(nullptr), m_extrauttmapCache(nullptr)
|
||||
{ }
|
||||
|
||||
void Init(ComputationNetworkPtr & net,
|
||||
const std::list<ComputationNodeBasePtr>& learnableNodes,
|
||||
const std::vector<ComputationNodeBasePtr>& criterionNodes,
|
||||
const std::vector<ComputationNodeBasePtr>& evaluationNodes)
|
||||
{
|
||||
m_MBLayoutCache = make_shared<MBLayout>();
|
||||
m_NetCriterionAccumulator = make_shared<Matrix<ElemType>>(1, 1, net->GetDeviceId());
|
||||
m_NetEvaluationAccumulator = make_shared<Matrix<ElemType>>(1, evaluationNodes.size(), net->GetDeviceId());
|
||||
// remember ptr to learnableNode
|
||||
for (auto x : learnableNodes)
|
||||
{
|
||||
shared_ptr<ComputationNode<ElemType>> pLearnableNode = dynamic_pointer_cast<ComputationNode<ElemType>>(x);
|
||||
wstring nodename = x->NodeName();
|
||||
m_LearnableNodePtr[nodename] = pLearnableNode;
|
||||
}
|
||||
for (auto& x : criterionNodes)
|
||||
{
|
||||
m_NetCriterionNodes.push_back(dynamic_pointer_cast<ComputationNode<ElemType>>(x));
|
||||
}
|
||||
for (auto& x : evaluationNodes)
|
||||
{
|
||||
m_NetEvaluationNodes.push_back(dynamic_pointer_cast<ComputationNode<ElemType>>(x));
|
||||
}
|
||||
m_NetCriterionAccumulator->SetValue((ElemType)0);
|
||||
m_NetEvaluationAccumulator->SetValue((ElemType)0);
|
||||
|
||||
// emulate all the nodes, find nodes that have state
|
||||
m_NetStatefulNodes = EnumerateStatefulNode(*net, criterionNodes, evaluationNodes);
|
||||
for (auto x : m_NetStatefulNodes)
|
||||
{
|
||||
wstring name = x.first;
|
||||
m_NetStates[name] = vector<shared_ptr<INodeState>>();
|
||||
}
|
||||
}
|
||||
|
||||
~SubminibatchDispatcher()
|
||||
{
|
||||
// TODO: remove these by using shared_ptr
|
||||
delete m_LatticeCache;
|
||||
delete m_uidCache;
|
||||
delete m_extrauttmapCache;
|
||||
|
||||
for (auto x : m_inputMatricesCache)
|
||||
{
|
||||
delete x.second;
|
||||
}
|
||||
|
||||
for (auto x : m_CachedGraident)
|
||||
{
|
||||
delete x.second;
|
||||
}
|
||||
}
|
||||
size_t GetMinibatchIntoCache( IDataReader<ElemType>& trainSetDataReader,
|
||||
ComputationNetwork& net,
|
||||
std::map<std::wstring, Matrix<ElemType>*> & inputMatrices,
|
||||
size_t requestedSubminibatches)
|
||||
{
|
||||
// first, remember interface to the net
|
||||
m_NetMBLayoutPtr = net.GetMBLayoutPtr();
|
||||
m_NetInputMatrixPtr = inputMatrices;
|
||||
|
||||
// second, get data from reader, stored it in cache
|
||||
// 1. for each key, allocate the specific matrix on device
|
||||
for (auto pa : inputMatrices)
|
||||
{
|
||||
wstring name = pa.first;
|
||||
Matrix<ElemType>* M= pa.second;
|
||||
if (m_inputMatricesCache.find(name) == m_inputMatricesCache.end())
|
||||
{
|
||||
m_inputMatricesCache[name] = new Matrix<ElemType>(*M, M->GetDeviceId()); // deep copy from M
|
||||
}
|
||||
else
|
||||
{
|
||||
m_inputMatricesCache[name]->SetValue(*M);
|
||||
}
|
||||
}
|
||||
// 2. MBlayout
|
||||
m_MBLayoutCache->CopyFrom(net.GetMBLayoutPtr());
|
||||
size_t nParallelSequences = m_MBLayoutCache->GetNumParallelSequences();
|
||||
|
||||
if (m_NetCriterionNodes[0] != nullptr && (m_NetCriterionNodes[0]->OperationName() == L"SequenceWithSoftmax"))
|
||||
{
|
||||
// auto node = dynamic_pointer_cast<SequenceWithSoftmaxNode<ElemType>>(criterionNode);
|
||||
NOT_IMPLEMENTED;
|
||||
// TODO: implement this for Sequence training !!!
|
||||
}
|
||||
|
||||
// subminibatches are cutted at the parallel sequence level;
|
||||
// if #requested subminibatch is larger than #parallel sequence,
|
||||
// we cannot split further; instead, each subsequence become a subminibatch
|
||||
size_t actualnumSubminibatches = requestedSubminibatches > nParallelSequences ? nParallelSequences : requestedSubminibatches;
|
||||
|
||||
// 3. third, allocate space for accumulated gradient
|
||||
for (auto& n: m_LearnableNodePtr)
|
||||
{
|
||||
auto node = n.second;
|
||||
if (node->IsParameterUpdateRequired())
|
||||
{
|
||||
wstring nodeName = node->GetName();
|
||||
shared_ptr<ComputationNode<ElemType>> pLearnableNode = node;
|
||||
auto funvalue = pLearnableNode->FunctionValues(); // gradient may not be allocated when this function is first called
|
||||
size_t nrow = funvalue.GetNumRows();
|
||||
size_t ncol = funvalue.GetNumCols();
|
||||
if (m_CachedGraident.find(nodeName) == m_CachedGraident.end())
|
||||
{
|
||||
// not allocated yet
|
||||
m_CachedGraident[nodeName] = new Matrix<ElemType>(nrow, ncol, funvalue.GetDeviceId());
|
||||
m_CachedGraident[nodeName]->SetValue((ElemType)0);
|
||||
}
|
||||
}
|
||||
}
|
||||
// 4. for stateful node
|
||||
for (auto x : m_NetStatefulNodes)
|
||||
{
|
||||
wstring name = x.first;
|
||||
if (m_NetStates[name].empty())
|
||||
{
|
||||
// this only happens in the first minibatch in an epoch
|
||||
m_NetStates[name].resize(actualnumSubminibatches);
|
||||
}
|
||||
}
|
||||
|
||||
return (m_numSubminibatches = actualnumSubminibatches);
|
||||
}
|
||||
|
||||
void GetSubMinibatchToNet(size_t iSubminibatch)
|
||||
{
|
||||
Matrices decimatedMatrices;
|
||||
MBLayoutPtr decimatedLayout;
|
||||
DataReaderHelpers::DecimateMinibatch(m_inputMatricesCache, decimatedMatrices, m_MBLayoutCache, decimatedLayout, m_numSubminibatches, iSubminibatch);
|
||||
// NOTE: decimatedMatrices must be released by caller
|
||||
|
||||
//m_NetInputMatrixPtr = decimatedMatrices;
|
||||
for (auto& x : decimatedMatrices)
|
||||
{
|
||||
wstring name = x.first;
|
||||
m_NetInputMatrixPtr[name]->SetValue(*x.second);
|
||||
delete x.second; // TODO: is it safe to delete here ? Yes! SetValue call cuda memcpy so it is a blocking call
|
||||
x.second = nullptr;
|
||||
}
|
||||
|
||||
m_NetMBLayoutPtr->CopyFrom(decimatedLayout);
|
||||
|
||||
for (auto& x : m_NetStatefulNodes)
|
||||
{
|
||||
wstring name = x.first;
|
||||
shared_ptr<IStateFulNode> pNode = x.second;
|
||||
if (m_NetStates[name][iSubminibatch])
|
||||
pNode->ImportState(m_NetStates[name][iSubminibatch]);
|
||||
}
|
||||
|
||||
}
|
||||
// TODO: encapsulate it into a destructor !!! Note: Cannot throw exceptions in destructor.
|
||||
void DoneWithCurrentSubMinibatch(size_t iSubminibatch)
|
||||
{
|
||||
// accumulate gradient here
|
||||
for (auto x : m_CachedGraident)
|
||||
{
|
||||
wstring nodename = x.first;
|
||||
if (m_LearnableNodePtr.find(nodename) == m_LearnableNodePtr.end())
|
||||
{
|
||||
RuntimeError("ERROR: in DoneWithCurrentSubMinibatch: node %ls not found in LeanrableNode", nodename.c_str());
|
||||
}
|
||||
shared_ptr<ComputationNode<ElemType>> pNode = m_LearnableNodePtr[nodename];
|
||||
m_CachedGraident[nodename]->operator+=(pNode->GradientValues());
|
||||
pNode->GradientValues().SetValue((ElemType)0);
|
||||
}
|
||||
// accumulate criterion value
|
||||
Matrix<ElemType>::AddElementToElement(
|
||||
m_NetCriterionNodes[0]->FunctionValues() , 0, 0,
|
||||
*m_NetCriterionAccumulator, 0, 0
|
||||
);
|
||||
m_NetCriterionNodes[0]->FunctionValues().SetValue((ElemType)0);
|
||||
// accumulate evaluation value
|
||||
for (size_t i = 0; i < m_NetEvaluationNodes.size(); i++)
|
||||
{
|
||||
Matrix<ElemType>::AddElementToElement(
|
||||
m_NetEvaluationNodes[i]->FunctionValues(), 0, 0,
|
||||
*m_NetEvaluationAccumulator, 0, i
|
||||
);
|
||||
m_NetEvaluationNodes[i]->FunctionValues().SetValue((ElemType)0);
|
||||
}
|
||||
|
||||
// Export node state
|
||||
for (auto& x : m_NetStatefulNodes)
|
||||
{
|
||||
wstring name = x.first;
|
||||
m_NetStates[name][iSubminibatch] = x.second->ExportState();
|
||||
}
|
||||
}
|
||||
void DoneWithCurrentMinibatch()
|
||||
{
|
||||
for (auto& x : m_CachedGraident)
|
||||
{
|
||||
wstring name = x.first;
|
||||
Matrix<ElemType>* accumulategrad = x.second;
|
||||
|
||||
if (m_LearnableNodePtr.find(name) == m_LearnableNodePtr.end())
|
||||
{
|
||||
// should never happen, remove this code later
|
||||
RuntimeError("ERROR: in DoneWithCurrentSubMinibatch: node %ls not found in LearnableNode", name.c_str());
|
||||
}
|
||||
m_LearnableNodePtr[name]->GradientValues().SetValue(*accumulategrad);
|
||||
x.second->SetValue((ElemType)0);
|
||||
}
|
||||
// also revert net.m_MBLayoutPtr
|
||||
m_NetMBLayoutPtr->CopyFrom(m_MBLayoutCache);
|
||||
|
||||
//m_NetCriterionNodes[0]->FunctionValues().SetValue((ElemType)0);
|
||||
Matrix<ElemType>::AddElementToElement(
|
||||
*m_NetCriterionAccumulator, 0, 0,
|
||||
m_NetCriterionNodes[0]->FunctionValues(), 0, 0
|
||||
);
|
||||
m_NetCriterionAccumulator->SetValue((ElemType)0);
|
||||
|
||||
for (size_t i = 0; i < m_NetEvaluationNodes.size(); i++)
|
||||
{
|
||||
//m_NetEvaluationNodes[i]->FunctionValues().SetValue((ElemType)0);
|
||||
Matrix<ElemType>::AddElementToElement(
|
||||
*m_NetEvaluationAccumulator, 0, i,
|
||||
m_NetEvaluationNodes[i]->FunctionValues(), 0, 0
|
||||
);
|
||||
}
|
||||
m_NetEvaluationAccumulator->SetValue((ElemType)0);
|
||||
}
|
||||
|
||||
#ifdef SMB_DEBUG
|
||||
|
||||
template<class Matrix, class ElemType>
|
||||
void WriteMatrix(const Matrix& mat, string filename)
|
||||
{
|
||||
ElemType* pArray = mat.CopyToArray();
|
||||
size_t nRows = mat.GetNumRows();
|
||||
size_t nCols = mat.GetNumCols();
|
||||
FILE* fp = fopenOrDie(filename, "w");
|
||||
for (size_t r = 0; r < nRows; r++)
|
||||
{
|
||||
for (size_t c = 0; c < nCols; c++)
|
||||
{
|
||||
fprintf(fp, "%.9f ", pArray[nRows*c + r]);
|
||||
}
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
fcloseOrDie(fp);
|
||||
delete[]pArray;
|
||||
}
|
||||
void WriteMBLayout(MBLayoutPtr pMBLayout, wstring filename)
|
||||
{
|
||||
size_t nT = pMBLayout->GetNumTimeSteps();
|
||||
size_t nU = pMBLayout->GetNumParallelSequences();
|
||||
|
||||
FILE* fp = fopenOrDie(filename, L"w");
|
||||
for (size_t u = 0; u < nU; u++)
|
||||
{
|
||||
for (size_t t = 0; t < nT; t++)
|
||||
{
|
||||
MinibatchPackingFlags flag = pMBLayout->Get(u, t);
|
||||
fprintf(fp, "%d\t", (int)flag);
|
||||
}
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
fcloseOrDie(fp);
|
||||
}
|
||||
void WriteInputMatriceAndMBLayout(size_t mbID, size_t smbID)
|
||||
{
|
||||
wstring node = L"features";
|
||||
wstring filename = msra::strfun::wstrprintf(L"tmp/%s.%d.%d", node.c_str(), mbID, smbID);
|
||||
if (m_NetInputMatrixPtr.find(node) != m_NetInputMatrixPtr.end())
|
||||
{
|
||||
WriteMatrix<Matrix<ElemType>, ElemType>(*m_NetInputMatrixPtr[node], msra::strfun::wcstombs(filename));
|
||||
}
|
||||
wstring fn = msra::strfun::wstrprintf(L"tmp/Layout.%d.%d", mbID, smbID);
|
||||
WriteMBLayout(m_NetMBLayoutPtr, fn);
|
||||
}
|
||||
void WriteInputMatriceAndMBLayout(Matrices m, MBLayoutPtr pMBLayout, size_t mbID)
|
||||
{
|
||||
wstring filename = msra::strfun::wstrprintf(L"tmp/features.%d", mbID);
|
||||
wstring fn = msra::strfun::wstrprintf(L"tmp/layout.%d", mbID);
|
||||
if (m.find(L"features") != m.end())
|
||||
{
|
||||
WriteMatrix<Matrix<ElemType>, ElemType>(*m[L"features"], msra::strfun::wcstombs(filename));
|
||||
}
|
||||
WriteMBLayout(pMBLayout, fn);
|
||||
}
|
||||
|
||||
void WriteGradient(size_t mbID)
|
||||
{
|
||||
wstring node = L"LSTMoutput1.bias";
|
||||
wstring filename = msra::strfun::wstrprintf(L"%s.%d", L"tmp/gradient", mbID);
|
||||
if (m_CachedGraident.find(node) != m_CachedGraident.end())
|
||||
{
|
||||
WriteMatrix<Matrix<ElemType>, ElemType>(*m_CachedGraident[node], msra::strfun::wcstombs(filename));
|
||||
}
|
||||
}
|
||||
|
||||
void WriteGradient(const Matrix<ElemType>& mat, wstring fn)
|
||||
{
|
||||
WriteMatrix<Matrix<ElemType>, ElemType>(mat, msra::strfun::wcstombs(fn));
|
||||
}
|
||||
#endif // SMB_DEBUG
|
||||
};
|
||||
|
||||
}}}
|
||||
|
|
|
@ -110,6 +110,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// truncated = truncation length
|
||||
m_mbSize = configSGD(L"minibatchSize", ConfigRecordType::Array(intargvector(vector<int>{ 256 })));
|
||||
m_truncated = configSGD(L"truncated", false);
|
||||
m_maxSamplesInRAM = configSGD(L"maxSamplesInRAM", ConfigRecordType::Array(intargvector(vector < int > {0})));
|
||||
|
||||
// the number of samples in each epoch (0 means, use all the samples in each epoch).
|
||||
m_epochSize = configSGD(L"epochSize", (size_t)0);
|
||||
|
@ -1697,6 +1698,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
refNet->StartEvaluateMinibatchLoop(refNode);
|
||||
}
|
||||
|
||||
SubminibatchDispatcher<ElemType> smbDisplatcher;
|
||||
size_t samplesInRAM = m_maxSamplesInRAM[epochNumber];
|
||||
// convert it to SubminibatchRequested
|
||||
size_t numSubminibatchRequested = 0;
|
||||
if (samplesInRAM > 0) // if samplesInRAM = 0 , we will not use subminibatch dispatcher
|
||||
{
|
||||
size_t nParallelSequences = trainSetDataReader->GetNumParallelSequences();
|
||||
size_t estimatedMBSize = tunedMBSize * nParallelSequences;
|
||||
numSubminibatchRequested = (size_t)std::ceil(estimatedMBSize / samplesInRAM);
|
||||
}
|
||||
if (numSubminibatchRequested > 1) // only use subminibatch dispatcher if more than 1 subminibatch is required
|
||||
{
|
||||
smbDisplatcher.Init(net, learnableNodes, criterionNodes, evaluationNodes);
|
||||
}
|
||||
size_t actualNumSubminibatch=0;
|
||||
|
||||
// Attemps to compute the error signal for the whole utterance, which will
|
||||
// be fed to the neural network as features. Currently it is a workaround
|
||||
// for the two-forward-pass sequence and ctc training, which allows
|
||||
|
@ -1710,10 +1727,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
fprintf(stderr, ", DataParallelSGD training (MyRank = %d, NumNodes = %d, NumGradientBits = %d)",
|
||||
(int)g_mpi->CurrentNodeRank(), (int)g_mpi->NumNodesInUse(), (int)m_numGradientBits);
|
||||
}
|
||||
|
||||
if (useDistributedMBReading)
|
||||
{
|
||||
fprintf(stderr, ", Distributed reading is ENABLED");
|
||||
fprintf(stderr, ", distributed reading is ENABLED");
|
||||
}
|
||||
if (numSubminibatchRequested > 0)
|
||||
{
|
||||
fprintf(stderr, ", with %d Max Samples in RAM", (int)samplesInRAM);
|
||||
}
|
||||
fprintf(stderr, ".\n");
|
||||
|
||||
|
@ -1735,6 +1755,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
nSamplesSinceLastModelSync += actualMBSize;
|
||||
|
||||
if (numSubminibatchRequested > 0)
|
||||
{
|
||||
actualNumSubminibatch = smbDisplatcher.GetMinibatchIntoCache(*trainSetDataReader, *net, *inputMatrices, numSubminibatchRequested);
|
||||
}
|
||||
else
|
||||
{
|
||||
actualNumSubminibatch = 0;
|
||||
}
|
||||
|
||||
// node data was changed
|
||||
// TODO: move this to that function as well--just tired to pass everything as arguments
|
||||
// TODO: We should do this right after the GetMinibatch() call, since that's where these changed.
|
||||
|
@ -1772,26 +1801,30 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
//compute eval node first since when gradient is computed the forward function values
|
||||
//may be changed and need to be recomputed when gradient and function value share the same matrix
|
||||
net->Evaluate(evaluationNodes);
|
||||
if (actualNumSubminibatch > 0)
|
||||
{
|
||||
for (size_t ismb = 0; ismb < actualNumSubminibatch; ismb++)
|
||||
{
|
||||
smbDisplatcher.GetSubMinibatchToNet(ismb);
|
||||
#ifdef SMB_DEBUG
|
||||
//smbhelper.WriteInputMatriceAndMBLayout(numMBsRun, ismb);
|
||||
#endif
|
||||
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
|
||||
ComputationNetwork::UpdateEvalTimeStamps(labelNodes);
|
||||
ForwardBackward(*net, evaluationNodes, criterionNodes[0], learnRatePerSample > 0.01 * m_minLearnRate);
|
||||
smbDisplatcher.DoneWithCurrentSubMinibatch(ismb);
|
||||
}
|
||||
#ifdef SMB_DEBUG
|
||||
//smbhelper.WriteGradient(numMBsRun);
|
||||
#endif
|
||||
smbDisplatcher.DoneWithCurrentMinibatch();
|
||||
|
||||
// only compute gradient when learning rate is large enough
|
||||
if (learnRatePerSample > m_minLearnRate * 0.01)
|
||||
{
|
||||
// use only the first criterion. Is there any possibility to use more?
|
||||
// ==============================
|
||||
// forward prop, back-prop --this is where the magic happens baby, what we have all be waiting for!
|
||||
// ==============================
|
||||
net->ComputeGradient<ElemType>(criterionNodes[0]);
|
||||
// TODO: we should split Evaluate() out from ComputeGradient(), then call them ForwardProp() and BackProp(), for clarity
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
// use only the first criterion. Is there any possibility to use more?
|
||||
// ==============================
|
||||
// forward prop
|
||||
// ==============================
|
||||
net->Evaluate(criterionNodes[0]);
|
||||
ForwardBackward(*net, evaluationNodes, criterionNodes[0], learnRatePerSample > 0.01 * m_minLearnRate);
|
||||
}
|
||||
|
||||
} // if (actualMBSize > 0)
|
||||
|
||||
// Some labels may be missing (e.g. forced alignment failed, or being gaps due to packing parallel sequences).
|
||||
|
|
|
@ -150,6 +150,14 @@ protected:
|
|||
// We really should only read it in SGD and pass it ourselves on to the Reader, instead of it being a Reader parameter.
|
||||
// BUGBUG: If m_truncated, then m_mbSize is interpreted as truncation length; the actual MB size is a combination of that and the #parallel sequences specified in the reader.
|
||||
// TODO: do not specify 'Truncated' but 'TruncatedLength', set m_truncated so given, and let m_mbSize control how many #parallel sequences the reader is allowed to pack into an MB.
|
||||
intargvector m_maxSamplesInRAM;
|
||||
// This is related with subminibatch implementation
|
||||
// maxSamplesInRAM denotes how many samples we used in forward-backward on net.
|
||||
// Due to the GPU memory limitations, it is sometime not possible to hold the m_mbSize in RAM.
|
||||
// To mitigate this issue, we adopt the sub-minibatch implementation, where
|
||||
// each m_mbSize[epoch] is divided by a few sub-minibatch of which size will be no more than m_maxSamplesInRAM[epoch]
|
||||
// a forward-backward is performed for each sub-minibathch; a model update is performed after each minibatch
|
||||
|
||||
|
||||
// the number of samples in each epoch (0 means, use all the samples in each epoch).
|
||||
size_t m_epochSize;
|
||||
|
@ -485,6 +493,28 @@ protected:
|
|||
|
||||
private:
|
||||
int SGDTrace(FILE *__restrict __stream, const char *__restrict __format, ...);
|
||||
void ForwardBackward(ComputationNetwork& net, const std::vector<ComputationNodeBasePtr>& evalNodes, shared_ptr<ComputationNodeBase> criterionNode, bool dobackpropogate=true)
|
||||
{
|
||||
net.Evaluate(evalNodes);
|
||||
// only compute gradient when learning rate is large enough
|
||||
if (dobackpropogate)
|
||||
{
|
||||
// use only the first criterion. Is there any possibility to use more?
|
||||
// ==============================
|
||||
// forward prop, back-prop --this is where the magic happens baby, what we have all be waiting for!
|
||||
// ==============================
|
||||
net.ComputeGradient<ElemType>(criterionNode);
|
||||
// TODO: we should split Evaluate() out from ComputeGradient(), then call them ForwardProp() and BackProp(), for clarity
|
||||
}
|
||||
else
|
||||
{
|
||||
// use only the first criterion. Is there any possibility to use more?
|
||||
// ==============================
|
||||
// forward prop
|
||||
// ==============================
|
||||
net.Evaluate(criterionNode);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}}}
|
||||
|
|
|
@ -0,0 +1,712 @@
|
|||
//
|
||||
// <copyright file="CPUMatrixUnitTests.cpp" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//
|
||||
#include "stdafx.h"
|
||||
#include "CppUnitTest.h"
|
||||
#include "..\Math\CPUMatrix.h"
|
||||
#define DEBUG_FLAG 1
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
|
||||
#pragma warning (disable: 4305)
|
||||
|
||||
using namespace Microsoft::VisualStudio::CppUnitTestFramework;
|
||||
|
||||
namespace CNTKMathTest
|
||||
{
|
||||
TEST_CLASS(CPUMatrixUnitTest)
|
||||
{
|
||||
//typedef CPUSingleMatrix Matrix;
|
||||
typedef CPUDoubleMatrix Matrix;
|
||||
|
||||
public:
|
||||
static void DebugPrint(FILE* gpuDebugFile, Matrix M, const char* str, const bool colwiseVec = true)
|
||||
{
|
||||
fprintf(gpuDebugFile, "\n %s\n", str);
|
||||
const size_t matNumCol = M.GetNumCols();
|
||||
const size_t elemNum = M.GetNumElements();
|
||||
Matrix M1 = M.Transpose();
|
||||
double* pArray = M1.GetArray();
|
||||
if (colwiseVec)
|
||||
{
|
||||
for (size_t i = 0; i < elemNum; i++)
|
||||
{
|
||||
|
||||
fprintf(gpuDebugFile, "%3d ", (int)pArray[i]);
|
||||
if ( (i+1)% matNumCol == 0)
|
||||
fprintf(gpuDebugFile, "\n");
|
||||
}
|
||||
}
|
||||
//const size_t matNumRow = M.GetNumRows();
|
||||
//for (int i = 0; i < matNumRow; i++)
|
||||
//{
|
||||
// for (int j = 0; j < matNumCol; j++)
|
||||
// {
|
||||
// fprintf(gpuDebugFile, "%3d ", M(i,j));
|
||||
// //if ( (j+1)% matNumCol == 0)
|
||||
// }
|
||||
// fprintf(gpuDebugFile, "\n");
|
||||
//}
|
||||
}
|
||||
TEST_METHOD(CPUMatrixConsturctors)
|
||||
{
|
||||
Matrix M0;
|
||||
Assert::IsTrue(M0.IsEmpty());
|
||||
|
||||
M0.Resize(2,3);
|
||||
Assert::IsFalse(M0.IsEmpty());
|
||||
Assert::AreEqual<size_t>(2,M0.GetNumRows());
|
||||
Assert::AreEqual<size_t>(3,M0.GetNumCols());
|
||||
Assert::AreEqual<size_t>(6,M0.GetNumElements());
|
||||
|
||||
M0(0,0) = 1; M0(1,2) = 2;
|
||||
Assert::IsTrue(M0(0,0) == 1);
|
||||
Assert::IsTrue(M0(1,2) == 2);
|
||||
|
||||
Matrix M1(12,53);
|
||||
Assert::AreEqual<size_t>(12,M1.GetNumRows());
|
||||
Assert::AreEqual<size_t>(53,M1.GetNumCols());
|
||||
|
||||
|
||||
float *fArray = new float[6];
|
||||
fArray[0] = 1; fArray[1] = 2; fArray[2] = 3;
|
||||
fArray[3] = 4; fArray[4] = 5; fArray[5] = 6;
|
||||
CPUMatrix<float> M2(2, 3, fArray, matrixFlagNormal);
|
||||
Assert::AreEqual<float>(M2(0,0), 1);
|
||||
Assert::AreEqual<float>(M2(0,1), 3);
|
||||
Assert::AreEqual<float>(M2(0,2), 5);
|
||||
Assert::AreEqual<float>(M2(1,0), 2);
|
||||
Assert::AreEqual<float>(M2(1,1), 4);
|
||||
Assert::AreEqual<float>(M2(1,2), 6);
|
||||
|
||||
double *dArray = new double[6];
|
||||
dArray[0] = 1; dArray[1] = 2; dArray[2] = 3;
|
||||
dArray[3] = 4; dArray[4] = 5; dArray[5] = 6;
|
||||
CPUMatrix<double> M3(2, 3, dArray, matrixFormatRowMajor);
|
||||
Assert::AreEqual<double>(M3(0,0), 1);
|
||||
Assert::AreEqual<double>(M3(0,1), 2);
|
||||
Assert::AreEqual<double>(M3(0,2), 3);
|
||||
Assert::AreEqual<double>(M3(1,0), 4);
|
||||
Assert::AreEqual<double>(M3(1,1), 5);
|
||||
Assert::AreEqual<double>(M3(1,2), 6);
|
||||
|
||||
Matrix M4(M0);
|
||||
Assert::IsTrue(M4.IsEqualTo(M0));
|
||||
|
||||
Matrix M5 = M0;
|
||||
Assert::IsTrue(M5.IsEqualTo(M0));
|
||||
}
|
||||
|
||||
TEST_METHOD(CPUMatrixAddAndSub)
|
||||
{
|
||||
Matrix M0(2,3);
|
||||
M0(0,0) = 1; M0(0,1) = 2; M0(0,2) = 3;
|
||||
M0(1,0) = 4; M0(1,1) = 5; M0(1,2) = 6;
|
||||
|
||||
Matrix M1(2,3);
|
||||
M1(0,0) = 11; M1(0,1) = 12; M1(0,2) = 13;
|
||||
M1(1,0) = 14; M1(1,1) = 15; M1(1,2) = 16;
|
||||
|
||||
Matrix M2(2,3);
|
||||
M2(0,0) = 12; M2(0,1) = 14; M2(0,2) = 16;
|
||||
M2(1,0) = 18; M2(1,1) = 20; M2(1,2) = 22;
|
||||
|
||||
Matrix MC(2,1);
|
||||
MC(0,0) = 10;
|
||||
MC(1,0) = 10;
|
||||
|
||||
Matrix MR(1,3);
|
||||
MR(0,0) = 10; MR(0,1) = 10; MR(0,2) = 10;
|
||||
|
||||
Matrix MS(1,1);
|
||||
MS(0,0) = 10;
|
||||
|
||||
Matrix M3 = M2 - M0;
|
||||
Assert::IsTrue(M3.IsEqualTo(M1));
|
||||
|
||||
M3 += M0;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2));
|
||||
|
||||
M3 = M0 + 10;
|
||||
Assert::IsTrue(M3.IsEqualTo(M1));
|
||||
|
||||
M3 -= 10;
|
||||
Assert::IsTrue(M3.IsEqualTo(M0));
|
||||
|
||||
M3 = M1 + M0;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2));
|
||||
|
||||
M3 -= M0;
|
||||
Assert::IsTrue(M3.IsEqualTo(M1));
|
||||
|
||||
M3 = M1 - 10;
|
||||
Assert::IsTrue(M3.IsEqualTo(M0));
|
||||
|
||||
M3 += 10;
|
||||
Assert::IsTrue(M3.IsEqualTo(M1));
|
||||
|
||||
M3 -= MC;
|
||||
Assert::IsTrue(M3.IsEqualTo(M0));
|
||||
|
||||
M3 += MC;
|
||||
Assert::IsTrue(M3.IsEqualTo(M1));
|
||||
|
||||
M3 -= MR;
|
||||
Assert::IsTrue(M3.IsEqualTo(M0));
|
||||
|
||||
M3 += MR;
|
||||
Assert::IsTrue(M3.IsEqualTo(M1));
|
||||
|
||||
M3.AssignDifferenceOf(M3, MS);
|
||||
Assert::IsTrue(M3.IsEqualTo(M0));
|
||||
}
|
||||
|
||||
TEST_METHOD(CPUMatrixMultiAndDiv)
|
||||
{
|
||||
Matrix M0(2,3);
|
||||
M0(0,0) = 1; M0(0,1) = 2; M0(0,2) = 3;
|
||||
M0(1,0) = 4; M0(1,1) = 5; M0(1,2) = 6;
|
||||
|
||||
Matrix M00(2,3);
|
||||
M00(0,0) = 10; M00(0,1) = 20; M00(0,2) = 30;
|
||||
M00(1,0) = 40; M00(1,1) = 50; M00(1,2) = 60;
|
||||
|
||||
Matrix M1(2,3);
|
||||
M1.Reshape(3,2);
|
||||
M1(0,0) = 11; M1(0,1) = 15;
|
||||
M1(1,0) = 14; M1(1,1) = 13;
|
||||
M1(2,0) = 12; M1(2,1) = 16;
|
||||
|
||||
Matrix M2(2,2);
|
||||
M2(0,0) = 75; M2(0,1) = 89;
|
||||
M2(1,0) = 186; M2(1,1) = 221;
|
||||
|
||||
Matrix M3 = M0 * M1;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2));
|
||||
|
||||
M3 = M0 * 10;
|
||||
Assert::IsTrue(M3.IsEqualTo(M00));
|
||||
|
||||
M3 = M3 / 10;
|
||||
Assert::IsTrue(M3.IsEqualTo(M0));
|
||||
|
||||
M3 *= 10;
|
||||
Assert::IsTrue(M3.IsEqualTo(M00));
|
||||
|
||||
M3 /= 10;
|
||||
Assert::IsTrue(M3.IsEqualTo(M0));
|
||||
|
||||
Matrix::MultiplyAndWeightedAdd(1, M0, false, M1, false, 0, M3);
|
||||
Assert::IsTrue(M3.IsEqualTo(M2));
|
||||
|
||||
M1.Reshape(2,3);
|
||||
Matrix::MultiplyAndWeightedAdd(1, M0, false, M1, true, 0, M3);
|
||||
M2(0,0) = 74; M2(0,1) = 92;
|
||||
M2(1,0) = 182; M2(1,1) = 227;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2));
|
||||
|
||||
Matrix::MultiplyAndWeightedAdd(10, M0, false, M1, true, 2, M3);
|
||||
M2(0,0) = 888; M2(0,1) = 1104;
|
||||
M2(1,0) = 2184; M2(1,1) = 2724;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2));
|
||||
|
||||
Matrix::MultiplyAndWeightedAdd(1, M0, true, M1, false, 0, M3);
|
||||
M2.Resize(3,3);
|
||||
M2(0,0) = 67; M2(0,1) = 72; M2(0,2) = 77;
|
||||
M2(1,0) = 92; M2(1,1) = 99; M2(1,2) = 106;
|
||||
M2(2,0) = 117; M2(2,1) = 126; M2(2,2) = 135;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2));
|
||||
}
|
||||
|
||||
TEST_METHOD(CPUMatrixElementOps)
|
||||
{
|
||||
Matrix M0(2,3);
|
||||
M0(0,0) = 1; M0(0,1) = 2; M0(0,2) = 3;
|
||||
M0(1,0) = 4; M0(1,1) = 5; M0(1,2) = 6;
|
||||
|
||||
Matrix M00(2,3);
|
||||
M00(0,0) = 1.0; M00(0,1) = 1/2.0; M00(0,2) = 1/3.0;
|
||||
M00(1,0) = 1/4.0; M00(1,1) = 1/5.0; M00(1,2) = 1/6.0;
|
||||
|
||||
Matrix M1(2,3);
|
||||
M1(0,0) = 1; M1(0,1) = 1; M1(0,2) = 1;
|
||||
M1(1,0) = 1; M1(1,1) = 1; M1(1,2) = 1;
|
||||
|
||||
Matrix M3;
|
||||
M3.AssignElementProductOf(M0, M00);
|
||||
Assert::IsTrue(M3.IsEqualTo(M1, 0.0001));
|
||||
|
||||
M3 = M0 ^ 4;
|
||||
Matrix M2(2,3);
|
||||
M2(0,0) = 1; M2(0,1) = 16; M2(0,2) = 81;
|
||||
M2(1,0) = 256; M2(1,1) = 625; M2(1,2) = 1296;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2));
|
||||
|
||||
M3.SetValue(M0);
|
||||
M3 ^= 4;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2));
|
||||
|
||||
M3.SetValue(M0);
|
||||
M3.ElementMultiplyWith(M00);
|
||||
Assert::IsTrue(M3.IsEqualTo(M1));
|
||||
|
||||
M3.SetValue(M0);
|
||||
M3.ElementInverse();
|
||||
Assert::IsTrue(M3.IsEqualTo(M00));
|
||||
|
||||
M2(0,0) = 0.7311; M2(0,1) = 0.8808; M2(0,2) = 0.9526;
|
||||
M2(1,0) = 0.9820; M2(1,1) = 0.9933; M2(1,2) = 0.9975;
|
||||
M3.AssignElementDivisionOf(M2, M0);
|
||||
M2.ElementMultiplyWith(M00);
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M3.SetValue(M0);
|
||||
M3.InplaceSigmoid();
|
||||
M2(0,0) = 0.7311; M2(0,1) = 0.8808; M2(0,2) = 0.9526;
|
||||
M2(1,0) = 0.9820; M2(1,1) = 0.9933; M2(1,2) = 0.9975;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M3.SetValue(M0);
|
||||
M3.InplaceTanh();
|
||||
M2(0,0) = 0.7616; M2(0,1) = 0.9640; M2(0,2) = 0.9951;
|
||||
M2(1,0) = 0.9993; M2(1,1) = 0.9999; M2(1,2) = 1.0000;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M3.SetValue(M0);
|
||||
M3.InplaceLogSoftmax(true);
|
||||
M3.InplaceExp();
|
||||
M2(0,0) = 0.0474; M2(0,1) = 0.0474; M2(0,2) = 0.0474;
|
||||
M2(1,0) = 0.9526; M2(1,1) = 0.9526; M2(1,2) = 0.9526;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M3.SetValue(M0);
|
||||
M3.InplaceLogSoftmax(false);
|
||||
M3.InplaceExp();
|
||||
M2(0,0) = 0.0900; M2(0,1) = 0.2447; M2(0,2) = 0.6652;
|
||||
M2(1,0) = 0.0900; M2(1,1) = 0.2447; M2(1,2) = 0.6652;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M3.SetValue(M0);
|
||||
M3.InplaceHardmax(true);
|
||||
M2(0, 0) = 0.0; M2(0, 1) = 0.0; M2(0, 2) = 0.0;
|
||||
M2(1, 0) = 1.0; M2(1, 1) = 1.0; M2(1, 2) = 1.0;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M3.SetValue(M0);
|
||||
M3.InplaceHardmax(false);
|
||||
M2(0, 0) = 0.0; M2(0, 1) = 0.0; M2(0, 2) = 1.0;
|
||||
M2(1, 0) = 0.0; M2(1, 1) = 0.0; M2(1, 2) = 1.0;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M3.SetValue(M0);
|
||||
M3.InplaceSqrt();
|
||||
M2(0,0) = 1; M2(0,1) = 1.4142; M2(0,2) = 1.7321;
|
||||
M2(1,0) = 2; M2(1,1) = 2.2361; M2(1,2) = 2.4495;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M3.SetValue(M0);
|
||||
M3.InplaceExp();
|
||||
M2(0,0) = 2.7183; M2(0,1) = 7.3891; M2(0,2) = 20.0855;
|
||||
M2(1,0) = 54.5982; M2(1,1) = 148.4132; M2(1,2) = 403.4288;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M3.SetValue(M0);
|
||||
M3.InplaceExp();
|
||||
M2(0,0) = 2.7183; M2(0,1) = 7.3891; M2(0,2) = 20.0855;
|
||||
M2(1,0) = 54.5982; M2(1,1) = 148.4132; M2(1,2) = 403.4288;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M3.InplaceLog();
|
||||
Assert::IsTrue(M3.IsEqualTo(M0, 0.0001));
|
||||
|
||||
M3.SetValue(M0);
|
||||
M3.InplaceTruncateBottom(2);
|
||||
M2(0,0) = 2; M2(0,1) = 2; M2(0,2) = 3;
|
||||
M2(1,0) = 4; M2(1,1) = 5; M2(1,2) = 6;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2));
|
||||
|
||||
M3.SetValue(M0);
|
||||
M3.InplaceTruncateTop(4);
|
||||
M2(0,0) = 1; M2(0,1) = 2; M2(0,2) = 3;
|
||||
M2(1,0) = 4; M2(1,1) = 4; M2(1,2) = 4;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2));
|
||||
|
||||
double pi = 3.14159265358979323846264338327950288419716939937510;
|
||||
|
||||
Matrix M_Trig(2,3);
|
||||
M_Trig(0,0) = 0; M_Trig(0,1) = pi/2.0; M_Trig(0,2) = pi;
|
||||
M_Trig(1,0) = 3.0*pi/2.0; M_Trig(1,1) = 2.0*pi; M_Trig(1,2) = 5.0*pi/2.0;
|
||||
|
||||
Matrix M_Cos(2,3);
|
||||
M_Cos.SetValue(M_Trig);
|
||||
|
||||
Matrix M_Cos_expected(2,3);
|
||||
M_Cos_expected(0,0) = 1; M_Cos_expected(0,1) = 0; M_Cos_expected(0,2) = -1;
|
||||
M_Cos_expected(1,0) = 0; M_Cos_expected(1,1) = 1; M_Cos_expected(1,2) = 0;
|
||||
|
||||
M_Cos.InplaceCosine();
|
||||
Assert::IsTrue(M_Cos.IsEqualTo(M_Cos_expected, 0.0001));
|
||||
|
||||
M_Cos.SetValue(M_Trig);
|
||||
M_Cos.AssignCosineOf(M_Trig);
|
||||
Assert::IsTrue(M_Cos.IsEqualTo(M_Cos_expected, 0.0001));
|
||||
|
||||
Matrix M_NegSine(2,3);
|
||||
M_NegSine.SetValue(M_Trig);
|
||||
|
||||
Matrix M_NegSine_expected(2,3);
|
||||
M_NegSine_expected(0,0) = 0; M_NegSine_expected(0,1) = -1; M_NegSine_expected(0,2) = 0;
|
||||
M_NegSine_expected(1,0) = 1; M_NegSine_expected(1,1) = 0; M_NegSine_expected(1,2) = -1;
|
||||
|
||||
M_NegSine.InplaceNegativeSine();
|
||||
Assert::IsTrue(M_NegSine.IsEqualTo(M_NegSine_expected, 0.0001));
|
||||
|
||||
M_NegSine.SetValue(M_Trig);
|
||||
M_NegSine.AssignNegativeSineOf(M_Trig);
|
||||
Assert::IsTrue(M_NegSine.IsEqualTo(M_NegSine_expected, 0.0001));
|
||||
}
|
||||
|
||||
TEST_METHOD(CPUMatrixNorms)
|
||||
{
|
||||
Matrix M0(2,3);
|
||||
M0(0,0) = 1; M0(0,1) = 2; M0(0,2) = 3;
|
||||
M0(1,0) = 4; M0(1,1) = 5; M0(1,2) = 6;
|
||||
|
||||
Matrix M3;
|
||||
M0.VectorNorm1(M3, true);
|
||||
Matrix M2(1, 3);
|
||||
M2(0,0) = 5; M2(0,1) = 7; M2(0,2) = 9;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2));
|
||||
|
||||
M0.VectorNorm1(M3, false);
|
||||
M2.Resize(2,1);
|
||||
M2(0,0) = 6;
|
||||
M2(1,0) = 15;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2));
|
||||
|
||||
M0.VectorNorm2(M3, true);
|
||||
M2.Resize(1, 3);
|
||||
M2(0,0) = 4.1231; M2(0,1) = 5.3852; M2(0,2) = 6.7082;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M0.VectorNorm2(M3, false);
|
||||
M2.Resize(2,1);
|
||||
M2(0,0) = 3.7417;
|
||||
M2(1,0) = 8.7750;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M0.VectorNormInf(M3, true);
|
||||
M2.Resize(1, 3);
|
||||
M2(0,0) = 4; M2(0,1) = 5; M2(0,2) = 6;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M0.VectorNormInf(M3, false);
|
||||
M2.Resize(2,1);
|
||||
M2(0,0) = 3;
|
||||
M2(1,0) = 6;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2));
|
||||
|
||||
Assert::IsTrue(abs(M0.FrobeniusNorm() - 9.5394) < 0.0001);
|
||||
Assert::IsTrue(abs(M0.MatrixNormInf() - 6) < 0.0001);
|
||||
|
||||
Matrix M1;
|
||||
M0.VectorMax(M1, M3, true);
|
||||
M2.Resize(1, 3);
|
||||
M2(0,0) = 4; M2(0,1) = 5; M2(0,2) = 6;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M0.VectorMax(M1, M3, false);
|
||||
M2.Resize(2,1);
|
||||
M2(0,0) = 3;
|
||||
M2(1,0) = 6;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M0.VectorMin(M1, M3, true);
|
||||
M2.Resize(1, 3);
|
||||
M2(0,0) = 1; M2(0,1) = 2; M2(0,2) = 3;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
|
||||
M0.VectorMin(M1, M3, false);
|
||||
M2.Resize(2,1);
|
||||
M2(0,0) = 1;
|
||||
M2(1,0) = 4;
|
||||
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
|
||||
}
|
||||
|
||||
TEST_METHOD(CPUMatrixSetValues)
|
||||
{
|
||||
Matrix M0(3,3);
|
||||
M0(0,0) = 10; M0(1,1) = 10; M0(2,2) = 10;
|
||||
|
||||
Matrix M1(3,3);
|
||||
M1.SetDiagonalValue(10);
|
||||
Assert::IsTrue(M1.IsEqualTo(M0, 0.0001));
|
||||
|
||||
Matrix M2(3,1);
|
||||
M2(0,0) = 10; M2(1,0) = 10; M2(2,0) = 10;
|
||||
M1.SetDiagonalValue(M2);
|
||||
Assert::IsTrue(M1.IsEqualTo(M0, 0.0001));
|
||||
|
||||
M1.SetUniformRandomValue(-0.01, 0.01);
|
||||
for (int i=0; i<M1.GetNumRows(); i++)
|
||||
for (int j=0; j<M1.GetNumCols(); j++)
|
||||
Assert::IsTrue(M1(i,j) >= -0.01 && M1(i,j) < 0.01);
|
||||
|
||||
M1.SetGaussianRandomValue(0, 0.01);
|
||||
}
|
||||
|
||||
TEST_METHOD(CPUMatrixTranspose)
|
||||
{
|
||||
Matrix M0(2,3);
|
||||
M0(0,0) = 1; M0(0,1) = 2; M0(0,2) = 3;
|
||||
M0(1,0) = 4; M0(1,1) = 5; M0(1,2) = 6;
|
||||
|
||||
Matrix M1(3,2);
|
||||
M1(0,0) = 1; M1(0,1) = 4;
|
||||
M1(1,0) = 2; M1(1,1) = 5;
|
||||
M1(2,0) = 3; M1(2,1) = 6;
|
||||
|
||||
Matrix M2 = M0.Transpose();
|
||||
Assert::IsTrue(M2.IsEqualTo(M1, 0.0001));
|
||||
|
||||
M2.AssignTransposeOf(M1);
|
||||
Assert::IsTrue(M2.IsEqualTo(M0, 0.0001));
|
||||
}
|
||||
|
||||
TEST_METHOD(CPUMatrixColumnSlice)
|
||||
{
|
||||
Matrix M0(2,3);
|
||||
M0(0,0) = 1; M0(0,1) = 2; M0(0,2) = 3;
|
||||
M0(1,0) = 4; M0(1,1) = 5; M0(1,2) = 6;
|
||||
|
||||
Matrix M1(2,2);
|
||||
M1(0,0) = 1; M1(0,1) = 2;
|
||||
M1(1,0) = 4; M1(1,1) = 5;
|
||||
|
||||
Matrix M2 = M0.ColumnSlice(0,2);
|
||||
Assert::IsTrue(M2.IsEqualTo(M1, 0.0001));
|
||||
|
||||
M1(0,0) = 2; M1(0,1) = 3;
|
||||
M1(1,0) = 5; M1(1,1) = 6;
|
||||
|
||||
M2 = M0.ColumnSlice(1,2);
|
||||
Assert::IsTrue(M2.IsEqualTo(M1, 0.0001));
|
||||
|
||||
size_t k=100, n=20, m=50;
|
||||
|
||||
Matrix AG((size_t)k,(size_t)n);
|
||||
AG.SetUniformRandomValue(-1,1);
|
||||
|
||||
Matrix BG((size_t)n,(size_t)m);
|
||||
BG.SetUniformRandomValue(-1,1);
|
||||
|
||||
Matrix CG((size_t)k,(size_t)m);
|
||||
CG.SetUniformRandomValue(-1,1);
|
||||
Matrix DG((size_t)k,(size_t)m);
|
||||
DG.SetValue(CG);
|
||||
|
||||
Matrix::MultiplyAndAdd(AG, false, BG, false, DG);
|
||||
|
||||
for (int i=0; i<m; i++)
|
||||
{
|
||||
Matrix col_BG = BG.ColumnSlice(i,1);
|
||||
Matrix col_CG = CG.ColumnSlice(i,1);
|
||||
Matrix::MultiplyAndAdd(AG, false, col_BG, false, col_CG);
|
||||
}
|
||||
Assert::IsTrue(CG.IsEqualTo(DG, 0.0001));
|
||||
}
|
||||
|
||||
TEST_METHOD(CPUKhatriRaoProduct)
|
||||
{
|
||||
Matrix A(3,4);
|
||||
A(0,0) = 0.8147; A(0,1) = 0.9134; A(0,2) = 0.2785; A(0,3) = 0.9649;
|
||||
A(1,0) = 0.9058; A(1,1) = 0.6324; A(1,2) = 0.5469; A(1,3) = 0.1576;
|
||||
A(2,0) = 0.1270; A(2,1) = 0.0975; A(2,2) = 0.9575; A(2,3) = 0.9706;
|
||||
|
||||
Matrix B(2,4);
|
||||
B(0,0) = 0.9572; B(0,1) = 0.8003; B(0,2) = 0.4218; B(0,3) = 0.7922;
|
||||
B(1,0) = 0.4854; B(1,1) = 0.1419; B(1,2) = 0.9157; B(1,3) = 0.9595;
|
||||
|
||||
Matrix D(6,4);
|
||||
D(0,0) = 0.7798; D(0,1) = 0.7310; D(0,2) = 0.1175; D(0,3) = 0.7644;
|
||||
D(1,0) = 0.8670; D(1,1) = 0.5061; D(1,2) = 0.2307; D(1,3) = 0.1249;
|
||||
D(2,0) = 0.1215; D(2,1) = 0.0781; D(2,2) = 0.4038; D(2,3) = 0.7689;
|
||||
D(3,0) = 0.3954; D(3,1) = 0.1296; D(3,2) = 0.2550; D(3,3) = 0.9258;
|
||||
D(4,0) = 0.4396; D(4,1) = 0.0897; D(4,2) = 0.5008; D(4,3) = 0.1512;
|
||||
D(5,0) = 0.0616; D(5,1) = 0.0138; D(5,2) = 0.8768; D(5,3) = 0.9313;
|
||||
|
||||
Matrix C;
|
||||
C.AssignKhatriRaoProductOf(A, B);
|
||||
Assert::IsTrue(C.IsEqualTo(D, 0.0001));
|
||||
|
||||
}
|
||||
|
||||
TEST_METHOD(CPUAddColumnReshapeProductOf)
|
||||
{
|
||||
Matrix A(6,2);
|
||||
A(0,0) = 0.6557; A(0,1) = 0.7431;
|
||||
A(1,0) = 0.0357; A(1,1) = 0.3922;
|
||||
A(2,0) = 0.8491; A(2,1) = 0.6555;
|
||||
A(3,0) = 0.9340; A(3,1) = 0.1712;
|
||||
A(4,0) = 0.6787; A(4,1) = 0.7060;
|
||||
A(5,0) = 0.7577; A(5,1) = 0.0318;
|
||||
|
||||
Matrix B(3,2);
|
||||
B(0,0) = 0.2769; B(0,1) = 0.8235;
|
||||
B(1,0) = 0.0462; B(1,1) = 0.6948;
|
||||
B(2,0) = 0.0971; B(2,1) = 0.3171;
|
||||
|
||||
Matrix D0(2,2);
|
||||
D0(0,0) = 0.2867; D0(0,1) = 1.2913;
|
||||
D0(1,0) = 0.1266; D0(1,1) = 0.4520;
|
||||
|
||||
Matrix D1(2,2);
|
||||
D1(0,0) = 0.2657; D1(0,1) = 1.0923;
|
||||
D1(1,0) = 0.3636; D1(1,1) = 0.6416;
|
||||
|
||||
Matrix C(2,2);
|
||||
C.SetValue(0);
|
||||
C.AddColumnReshapeProductOf(A, B, false);
|
||||
Assert::IsTrue(C.IsEqualTo(D0, 0.0001));
|
||||
|
||||
C.SetValue(0);
|
||||
C.AddColumnReshapeProductOf(A, B, true);
|
||||
Assert::IsTrue(C.IsEqualTo(D1, 0.0001));
|
||||
}
|
||||
|
||||
TEST_METHOD(CPUMatrixRowSliceAndStack)
|
||||
{
|
||||
Matrix M0(5,3);
|
||||
M0(0,0) = 1; M0(0,1) = 6; M0(0,2) = 11;
|
||||
M0(1,0) = 2; M0(1,1) = 7; M0(1,2) = 12;
|
||||
M0(2,0) = 3; M0(2,1) = 8; M0(2,2) = 13;
|
||||
M0(3,0) = 4; M0(3,1) = 9; M0(3,2) = 14;
|
||||
M0(4,0) = 5; M0(4,1) = 10; M0(4,2) = 15;
|
||||
|
||||
Matrix M1(2,3);
|
||||
M1(0,0) = 3; M1(0,1) = 8; M1(0,2) = 13;
|
||||
M1(1,0) = 4; M1(1,1) = 9; M1(1,2) = 14;
|
||||
|
||||
Matrix M2;
|
||||
M2.AssignRowSliceValuesOf(M0, 2, 2);
|
||||
Assert::IsTrue(M2.IsEqualTo(M1, 0.0001));
|
||||
|
||||
Matrix M3(5,3);
|
||||
M3(0,0) = 0; M3(0,1) = 0; M3(0,2) = 0;
|
||||
M3(1,0) = 0; M3(1,1) = 0; M3(1,2) = 0;
|
||||
M3(2,0) = 3; M3(2,1) = 8; M3(2,2) = 13;
|
||||
M3(3,0) = 4; M3(3,1) = 9; M3(3,2) = 14;
|
||||
M3(4,0) = 0; M3(4,1) = 0; M3(4,2) = 0;
|
||||
|
||||
M3 += M0;
|
||||
M0.AddToRowSliceValuesOf(M1, 2,2);
|
||||
Assert::IsTrue(M3.IsEqualTo(M0, 0.0001));
|
||||
|
||||
M2.AddWithRowSliceValuesOf(M1, 0, 2);
|
||||
Matrix M4(2, 3);
|
||||
M4(0, 0) = 6; M4(0, 1) = 16; M4(0, 2) = 26;
|
||||
M4(1, 0) = 8; M4(1, 1) = 18; M4(1, 2) = 28;
|
||||
Assert::IsTrue(M2.IsEqualTo(M4, 0.0001));
|
||||
|
||||
#if 0
|
||||
Matrix M5, M6, M7, M8;
|
||||
M5.AssignRowSliceValuesOf(M0, 0, 2);
|
||||
M6.AssignRowSliceValuesOf(M0, 2, 1);
|
||||
M7.AssignRowSliceValuesOf(M0, 3, 2);
|
||||
|
||||
std::vector<const Matrix*> inputMatrices;
|
||||
inputMatrices.resize(3);
|
||||
inputMatrices[0] = &M5;
|
||||
inputMatrices[1] = &M6;
|
||||
inputMatrices[2] = &M7;
|
||||
M8.AssignRowStackValuesOf(inputMatrices, 0, 3);
|
||||
|
||||
Assert::IsTrue(M8.IsEqualTo(M0, 0.0001));
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_METHOD(CPUAssignRepeatOf)
|
||||
{
|
||||
Matrix M0(2, 3);
|
||||
M0(0, 0) = 1; M0(0, 1) = 6; M0(0, 2) = 11;
|
||||
M0(1, 0) = 2; M0(1, 1) = 7; M0(1, 2) = 12;
|
||||
|
||||
Matrix M1;
|
||||
M1.AssignRepeatOf(M0, 1, 1);
|
||||
Assert::IsTrue(M1.IsEqualTo(M0, 0.0001));
|
||||
|
||||
Matrix M3(6, 6);
|
||||
M3(0, 0) = 1; M3(0, 1) = 6; M3(0, 2) = 11; M3(0, 3) = 1; M3(0, 4) = 6; M3(0, 5) = 11;
|
||||
M3(1, 0) = 2; M3(1, 1) = 7; M3(1, 2) = 12; M3(1, 3) = 2; M3(1, 4) = 7; M3(1, 5) = 12;
|
||||
M3(2, 0) = 1; M3(2, 1) = 6; M3(2, 2) = 11; M3(2, 3) = 1; M3(2, 4) = 6; M3(2, 5) = 11;
|
||||
M3(3, 0) = 2; M3(3, 1) = 7; M3(3, 2) = 12; M3(3, 3) = 2; M3(3, 4) = 7; M3(3, 5) = 12;
|
||||
M3(4, 0) = 1; M3(4, 1) = 6; M3(4, 2) = 11; M3(4, 3) = 1; M3(4, 4) = 6; M3(4, 5) = 11;
|
||||
M3(5, 0) = 2; M3(5, 1) = 7; M3(5, 2) = 12; M3(5, 3) = 2; M3(5, 4) = 7; M3(5, 5) = 12;
|
||||
|
||||
M1.AssignRepeatOf(M0, 3, 2);
|
||||
Assert::IsTrue(M1.IsEqualTo(M3, 0.0001));
|
||||
}
|
||||
|
||||
TEST_METHOD(CPURowElementOperations)
|
||||
{
|
||||
Matrix M0 = Matrix::RandomUniform(20, 28, -1, 1);
|
||||
Matrix M1 = Matrix::RandomUniform(1, 28, 1, 2);
|
||||
|
||||
Matrix M3;
|
||||
M3.SetValue(M0);
|
||||
M3.RowElementMultiplyWith(M1);
|
||||
M3.RowElementDivideBy(M1);
|
||||
|
||||
Assert::IsTrue(M0.IsEqualTo(M3, 0.0001));
|
||||
}
|
||||
TEST_METHOD(CPUColumnElementOperations)
|
||||
{
|
||||
Matrix M0 = Matrix::RandomUniform(20, 28, -1, 1);
|
||||
Matrix M1 = Matrix::RandomUniform(20, 1, 1, 2);
|
||||
|
||||
Matrix M3;
|
||||
M3.SetValue(M0);
|
||||
M3.ColumnElementMultiplyWith(M1);
|
||||
M3.ColumnElementDivideBy(M1);
|
||||
|
||||
Assert::IsTrue(M0.IsEqualTo(M3, 0.0001));
|
||||
}
|
||||
|
||||
TEST_METHOD(CPUAssignMatrixByColumnSlice)
|
||||
{
|
||||
printf("starts here\n");
|
||||
Matrix M0 = Matrix::RandomUniform(400, 50, -100, 100);
|
||||
|
||||
|
||||
vector<size_t> columnrange = { 0, 3, 5, 4 };
|
||||
Matrix M1;
|
||||
try
|
||||
{
|
||||
M1.AssignMatrixByColumnSlice(M0, columnrange);
|
||||
}
|
||||
catch (exception& e)
|
||||
{
|
||||
printf("%s\n", e.what());
|
||||
Assert::Fail();
|
||||
}
|
||||
|
||||
|
||||
for (size_t des = 0; des < columnrange.size(); des ++)
|
||||
{
|
||||
size_t src = columnrange[des];
|
||||
|
||||
double err = 0;
|
||||
for (size_t r = 0; r < 400; r++)
|
||||
{
|
||||
double diff = (M0(r, src) - M1(r, des));
|
||||
diff *= diff;
|
||||
err += diff;
|
||||
}
|
||||
Assert::AreEqual(err, 0, 1e-7);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
}
|
|
@ -259,6 +259,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return slice;
|
||||
}
|
||||
|
||||
// BUGBUG: Unlike ColumnSlice(), this does not return a view. Must be renamed.
|
||||
template<class ElemType>
|
||||
CPUMatrix<ElemType> CPUMatrix<ElemType>::RowSlice(size_t startRow, size_t numRows) const
|
||||
{
|
||||
if (startRow + numRows > m_numRows )
|
||||
InvalidArgument("The row slice (%d+%d) is out of range of the source matrix (%d).", (int)startRow, (int)numRows, (int)m_numRows);
|
||||
|
||||
CPUMatrix<ElemType> slice;
|
||||
slice.AssignRowSliceValuesOf(*this, startRow, numRows);
|
||||
|
||||
return slice;
|
||||
}
|
||||
|
||||
// set this(:, 0:numCols-1) = fromMatrix(:, startColumn : startColumn+numCols-1)
|
||||
// TODO: why not say *this = ColumnSlice()?
|
||||
template<class ElemType>
|
||||
|
@ -333,7 +346,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//for each column of a, we add all rows of a to this starting from startIndex
|
||||
template<class ElemType>
|
||||
CPUMatrix<ElemType>& CPUMatrix<ElemType>::AssignToRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows)
|
||||
|
|
|
@ -52,6 +52,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
CPUMatrix<ElemType> ColumnSlice(size_t startColumn, size_t numCols) const;
|
||||
CPUMatrix<ElemType>& AssignColumnSlice(const CPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols);
|
||||
CPUMatrix<ElemType>& SetColumnSlice(const CPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols);
|
||||
// BUGBUG: Unlike ColumnSlice(), this does not return a view. Must be renamed.
|
||||
CPUMatrix<ElemType> RowSlice(size_t startRow, size_t numRows) const;
|
||||
|
||||
void CopyColumnsStrided(const CPUMatrix<ElemType>& fromMatrix, size_t numCols, size_t srcNumColsStride, size_t destNumColsStride);
|
||||
|
||||
|
|
|
@ -537,6 +537,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return slice;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
GPUMatrix<ElemType> GPUMatrix<ElemType>::RowSlice(size_t startRow, size_t numRows) const
|
||||
{
|
||||
if (startRow + numRows > m_numRows )
|
||||
InvalidArgument("The row slice (%d+%d) is out of range of the source matrix (%d).", (int)startRow, (int)numRows, (int)m_numRows);
|
||||
|
||||
GPUMatrix<ElemType> slice(GetComputeDeviceId());
|
||||
slice.AssignRowSliceValuesOf(*this, startRow, numRows);
|
||||
return slice;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignColumnSlice(const GPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols)
|
||||
{
|
||||
|
|
|
@ -142,6 +142,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
GPUMatrix<ElemType> ColumnSlice(size_t startColumn, size_t numCols) const;
|
||||
GPUMatrix<ElemType>& AssignColumnSlice(const GPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols);
|
||||
GPUMatrix<ElemType>& SetColumnSlice(const GPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols);
|
||||
// BUGBUG: Unlike ColumnSlice(), this does not return a view. Must be renamed.
|
||||
GPUMatrix<ElemType> RowSlice(size_t startRow, size_t numRows) const;
|
||||
|
||||
void CopyColumnsStrided(const GPUMatrix<ElemType>& fromMatrix, size_t numCols, size_t srcNumColsStride, size_t destNumColsStride);
|
||||
|
||||
|
|
|
@ -766,6 +766,46 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return slice;
|
||||
}
|
||||
|
||||
// BUGBUG: Unlike ColumnSlice(), this does not return a view. Must be renamed.
|
||||
template<class ElemType>
|
||||
Matrix<ElemType> Matrix<ElemType>::RowSlice(size_t startRow, size_t numRow) const
|
||||
{
|
||||
int devId = GetDeviceId();
|
||||
Matrix<ElemType> slice(devId);
|
||||
slice.m_preferredDeviceId = m_preferredDeviceId;
|
||||
if (GetMatrixType() == MatrixType::DENSE)
|
||||
{
|
||||
if (devId == CPUDEVICE)
|
||||
{
|
||||
if (slice.m_CPUMatrix != nullptr)
|
||||
slice.m_CPUMatrix->operator=(static_cast<CPUMatrix<ElemType>&&> (m_CPUMatrix->RowSlice(startRow, numRow)));
|
||||
else
|
||||
slice.m_CPUMatrix = new CPUMatrix<ElemType>(static_cast<CPUMatrix<ElemType>&&> (m_CPUMatrix->RowSlice(startRow, numRow)));
|
||||
slice.SetDataLocation(CPU, DENSE);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (slice.m_GPUMatrix != nullptr)
|
||||
slice.m_GPUMatrix->operator=(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->RowSlice(startRow, numRow)));
|
||||
else
|
||||
slice.m_GPUMatrix = new GPUMatrix<ElemType>(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->RowSlice(startRow, numRow)));
|
||||
slice.SetDataLocation(GPU, DENSE);
|
||||
}
|
||||
|
||||
}
|
||||
else if (GetMatrixType() == MatrixType::SPARSE)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
else
|
||||
{
|
||||
RuntimeError("Unknown matrix type");
|
||||
}
|
||||
return slice;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<class ElemType>
|
||||
Matrix<ElemType>& Matrix<ElemType>::AssignColumnSlice(const Matrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols)
|
||||
{
|
||||
|
|
|
@ -153,7 +153,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
void CopySection(size_t numRows, size_t numCols, ElemType* dst, size_t colStride) const;
|
||||
|
||||
Matrix<ElemType> ColumnSlice(size_t startColumn, size_t numCols) const;
|
||||
|
||||
// BUGBUG: Unlike ColumnSlice(), this does not return a view. Must be renamed.
|
||||
Matrix<ElemType> RowSlice(size_t startRow, size_t numRows) const;
|
||||
|
||||
// difference between AssignColumnSlice and SetColumnSlice
|
||||
// AssignColumnSlice : this(:, startColumn:startColumn+numCols-1) = fromMatrix(:, startColumn: startColumn+numCols-1)
|
||||
|
|
|
@ -490,7 +490,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType> GPUMatrix<ElemType> GPUMatrix<ElemType>::ColumnSlice(size_t startColumn, size_t numCols) const
|
||||
{
|
||||
GPUMatrix<ElemType> slice(0);
|
||||
return slice;
|
||||
}
|
||||
|
||||
template<class ElemType> GPUMatrix<ElemType> GPUMatrix<ElemType>::RowSlice(size_t startRow, size_t numRows) const
|
||||
{
|
||||
GPUMatrix<ElemType> slice(0);
|
||||
return slice;
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче