merged with master. One thing not compiling

This commit is contained in:
Frank Seide 2015-11-30 19:44:47 -08:00
Родитель ea87db0cad d29281b4d7
Коммит 59ae7b2c77
15 изменённых файлов: 1465 добавлений и 23 удалений

Просмотреть файл

@ -1657,7 +1657,7 @@ void PrintBuiltInfo()
fprintf(stderr, "\t\tCUDA_PATH: %s\n", _CUDA_PATH_);
#endif
#ifdef _CUB_PATH_
fprintf(stderr, "\t\tCUDA_PATH: %s\n", _CUB_PATH_);
fprintf(stderr, "\t\tCUB_PATH: %s\n", _CUB_PATH_);
#endif
#ifdef _GIT_EXIST
fprintf(stderr, "\t\tBuild Branch: %s\n", _BUILDBRANCH_);

Просмотреть файл

@ -294,7 +294,7 @@ public:
m_deviceId = EnforceOneGPUOnly(m_deviceId); // see EnforceOneGPUOnly() for comment on what this is
}
DEVICEID_TYPE GetDeviceId() { return m_deviceId; }
DEVICEID_TYPE GetDeviceId() const { return m_deviceId; }
unsigned long GetRandomSeedOffset() { return m_randomSeedOffset; }
void SetRandomSeedOffset(unsigned long value) { m_randomSeedOffset = value; }

Просмотреть файл

@ -119,6 +119,25 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual ~IComputationNode() { }
};
// =======================================================================
// This provide a interface for stateful node (e.g., DelayNodeBase) and definition of state
// This interface allows to Export and Import state from elsewhere
// It is needed when doing sub-minibatch implementation
// =======================================================================
class INodeState: public std::enable_shared_from_this<INodeState>
{
public:
virtual ~INodeState() {}
};
struct /*interface*/ IStateFulNode
{
typedef std::shared_ptr<INodeState> NodeStatePtr;
virtual NodeStatePtr ExportState() = 0;
virtual void ImportState(const NodeStatePtr& pImportedState) = 0;
};
// =======================================================================
// ComputationNetworkOwnedNodeState -- class to collect ComputationNode members that are really owned by ComputationNetwork
// These members are only to be set, changed, and read by ComputationNetwork code.
@ -1475,6 +1494,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
};
// =======================================================================
// helper macro to ease access to base members in presence of C++ two-phase name lookup
// =======================================================================

Просмотреть файл

@ -24,6 +24,51 @@
namespace Microsoft { namespace MSR { namespace CNTK {
// -----------------------------------------------------------------------
// The following defines a state of a delay node which is going to be exported to others (saving for the next minibatch)
// -----------------------------------------------------------------------
template<class ElemType>
class DelayedValueNodeState: public INodeState
{
public:
DelayedValueNodeState(int deviceID) :
m_cachedActivity((size_t)0, (size_t)0, deviceID), m_delayedActivationMBLayout(nullptr), m_isEmpty(true)
{ }
void CacheDelayedMBLayout(const MBLayoutPtr& pMBLayout)
{
m_delayedActivationMBLayout = make_shared<MBLayout>();
m_delayedActivationMBLayout->CopyFrom(pMBLayout);
}
void CacheState(const Matrix<ElemType>& cachedActivity)
{
m_cachedActivity.SetValue(cachedActivity);
m_isEmpty = false;
}
void ExportDelayedMBLayout(MBLayoutPtr& pMBLayout)
{
pMBLayout->CopyFrom(m_delayedActivationMBLayout);
}
bool IsEmpty()
{
return m_isEmpty;
}
const Matrix<ElemType>& ExportCachedActivity()
{
return m_cachedActivity;
}
protected:
Matrix<ElemType> m_cachedActivity; // 1 column per parallel sequence
// MBLayoutPtr m_shiftedMBLayout;
// Currently, we only support saving state for m_timeStep == 1
// there is no need for this m_shiftedMBLayout if m_timeStep == 1
MBLayoutPtr m_delayedActivationMBLayout;
bool m_isEmpty; // in some case
// (e.g., at the boundary of sentence end or begin/full utterance mode), we don't need to store state (but we do need to need know m_delayedActivationMBLayout)
};
// -----------------------------------------------------------------------
// DelayedValueNodeBase (input) -- abstract base class for PastValueNode and FutureValueNode to hold all shared code
// The two differ in the step direction, some loop directions, and sequence-boundary flags.
@ -31,9 +76,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// TODO: 'direction' is really too general. signOfTimeOffset?
template<class ElemType, int direction/*-1 for Past/left-to-right or +1 for Future/right-to-left*/, MinibatchPackingFlags SequenceStart_or_End/*-Start or -End*/>
class DelayedValueNodeBase : public ComputationNode<ElemType>, public ILateAttachingNode, public NumInputs<1>
class DelayedValueNodeBase : public ComputationNode<ElemType>, public
ILateAttachingNode, public IStateFulNode, public NumInputs<1>
{
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
typedef std::shared_ptr<DelayedValueNodeState<ElemType>> DelayedNodeStatePtr;
static const std::wstring TypeName() { return L"DelayedValue"; }
private:
void Init(size_t row_size, size_t col_size, ElemType initialActivationValue = (ElemType)DEFAULT_HIDDEN_ACTIVATION)
@ -352,6 +399,129 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
//========================================
// implement the IStateFulNode interface
//========================================
virtual NodeStatePtr ExportState() override
{
NodeStatePtr pExportedState;
size_t nT = m_pMBLayout->GetNumTimeSteps();
size_t nU = m_pMBLayout->GetNumParallelSequences();
int dir = direction;
if (m_timeStep != 1)
{
// not support yet; give user a hint
RuntimeError("Currently importing/exporting state info for timeStep>1 is not supported. Contact erw@microsoft.com for more detail");
}
if (dir == -1) // we look into past
{
bool allAtBoundary = true;
// if the current last frames are all sentence end or no feature , there is no need to carry on state info
if (m_pMBLayout->Is(nT-1, MinibatchPackingFlags::SequenceEnd | MinibatchPackingFlags::NoFeature))
{
for (size_t u = 0; u < nU; u++)
{
if (!m_pMBLayout->Is(u, nT - 1, MinibatchPackingFlags::SequenceEnd | MinibatchPackingFlags::NoFeature))
{
allAtBoundary = false;
break;
}
}
}
else
{
allAtBoundary = false;
}
if (allAtBoundary)
{
auto pState = make_shared<DelayedValueNodeState<ElemType>>(m_deviceId);
pState->CacheDelayedMBLayout(m_delayedActivationMBLayout);
// return an empty one
}
else
{
auto pState = make_shared<DelayedValueNodeState<ElemType>>(m_deviceId);
//pState->CacheState(FunctionValues().Reshaped(nD*nU, nT).RowSlice(nD*(nT - 1), nD));
pState->CacheState(m_delayedActivation.ColumnSlice((nT - 1)*nU, nU));
pState->CacheDelayedMBLayout(m_delayedActivationMBLayout);
pExportedState = pState;
}
}
if (dir == 1) // we look into future
{
// TODO: check whether all at boundary and don't carry state if it is the case
size_t nT = m_pMBLayout->GetNumTimeSteps();
size_t nU = m_pMBLayout->GetNumParallelSequences();
bool allAtBoundary = true;
if (m_pMBLayout->Is(0, MinibatchPackingFlags::NoFeature | MinibatchPackingFlags::SequenceStart))
{
for (size_t u = 0; u < nU; u++)
{
if (!m_pMBLayout->Is(u, 0, MinibatchPackingFlags::SequenceStart | MinibatchPackingFlags::NoFeature))
{
allAtBoundary = false;
break;
}
}
}
if (allAtBoundary)
{
auto pState = make_shared<DelayedValueNodeState<ElemType>>(m_deviceId);
pState->CacheDelayedMBLayout(m_delayedActivationMBLayout);
pExportedState = pState;
}
else
{
auto pState = make_shared<DelayedValueNodeState<ElemType>>(m_deviceId);
pState->CacheState(m_delayedActivation.ColumnSlice((nT-1)*nU, nU));
pState->CacheDelayedMBLayout(m_delayedActivationMBLayout);
pExportedState = pState;
}
}
if (dir != -1 && dir != 1)
{
RuntimeError("Unrecognized direction in DelayedValueNodeBase");
}
return pExportedState;
}
virtual void ImportState(const NodeStatePtr& pImportedState) override
{
DelayedNodeStatePtr pState = dynamic_pointer_cast<DelayedValueNodeState<ElemType>> (pImportedState);
if (!pState)
RuntimeError("Expecting DelayValueNodeState after down casting");
pState->ExportDelayedMBLayout(m_delayedActivationMBLayout); // pstate copy to m_delayedActivationMBLayout
if (pState->IsEmpty())
{
return;
}
const Matrix<ElemType>& delayedActivation = pState->ExportCachedActivity();
size_t nT = m_delayedActivationMBLayout->GetNumTimeSteps();
size_t nU = m_delayedActivationMBLayout->GetNumParallelSequences();
int dir = direction;
if (dir == -1) // looking backward
{
m_delayedActivation.SetColumnSlice(delayedActivation, (nT - 1)*nU, nU);
}
if (dir == 1)
{
//m_delayedActivation.CopyColumnsStrided(delayedActivation, nU, 1, nT);
m_delayedActivation.SetColumnSlice(delayedActivation, 0, nU);
}
if (dir != -1 && dir == 1)
{// it is really a compile error ?
RuntimeError("Unrecognized direction in DelayedValueNodeBase");
}
}
protected:
ElemType m_initialActivationValue; // starting value for hidden activation vector at boundary

Просмотреть файл

@ -10,6 +10,8 @@
#include <map>
#include "TrainingCriterionNodes.h"
//#define SMB_DEBUG
namespace Microsoft { namespace MSR { namespace CNTK {
/*static*/ struct DataReaderHelpers
@ -166,4 +168,404 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
};
// SubminibatchHelpers
// Helper for sub-minibatch implementation
// A sub-minibathc is a part of a minibatch which helps computing large minibatches that cannot load into GPU memory in one forward-backward computation
// The usage would be :
// SubminibatchHelpers sbhelper;
// for (;;)
// {
// size_t nsb=sb.GetMinibatchIntoCache(...);
// for (size_t i=0; i<nsb; i++)
// {
// sbhelper.GetSubMinibatchToNet(i);
// net.Evaluate(criterionNodes[0]);
// sbhelper.DoneWithCurrentSubMinibatch();
// }
// UpdateWeights(...);
// }
template<class ElemType>
class SubminibatchDispatcher
{
private:
typedef std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> Lattice;
typedef std::vector<size_t> Uid;
typedef std::vector<size_t> ExtrauttMap;
typedef std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>>* LatticePtr;
typedef std::vector<size_t>* UidPtr;
typedef std::vector<size_t>* ExtrauttMapPtr;
typedef std::map<std::wstring, Matrix<ElemType>*> Matrices;
// member variables served as caching space
Matrices m_inputMatricesCache;
MBLayoutPtr m_MBLayoutCache;
LatticePtr m_LatticeCache;
UidPtr m_uidCache;
ExtrauttMapPtr m_extrauttmapCache;
shared_ptr<Matrix<ElemType>> m_NetCriterionAccumulator;
shared_ptr<Matrix<ElemType>> m_NetEvaluationAccumulator;
std::map<wstring, vector<shared_ptr<INodeState>>> m_NetStates; // m_NetStatefulNodes[node][i] caches the state of i-th subminibatch of node
Matrices m_CachedGraident;
// we also need to remember where to put into the net
MBLayoutPtr m_NetMBLayoutPtr;
std::map<wstring, shared_ptr<ComputationNode<ElemType>>> m_LearnableNodePtr;
// followings are lattice-related
Matrices m_NetInputMatrixPtr;
LatticePtr m_NetLatticePtr;
UidPtr m_NetUidPtr;
ExtrauttMapPtr m_NetExtrauttMapPtr;
// we remember the pointer to the learnable Nodes so that we can accumulate the gradient once a sub-minibatch is done
size_t m_numParallelSequences; // number of paralle sequence in the cached matrix and MBLayout
size_t m_numSubminibatches; // how many subminibatches we are going to use ?
std::vector<shared_ptr<ComputationNode<ElemType>>> m_NetCriterionNodes;
std::vector<shared_ptr<ComputationNode<ElemType>>> m_NetEvaluationNodes;
std::map<wstring, shared_ptr<IStateFulNode>> m_NetStatefulNodes; // we need to Export/Import states of stateful nodes when we swtich subminibatches
private:
void EnumerateStatefulNodeWithRoot(ComputationNetwork& net, ComputationNodeBasePtr root, std::map<wstring, shared_ptr<IStateFulNode>>& statefulnode)
{
std::list<ComputationNodeBasePtr> evalorder = net.GetEvalOrder(root, false);
for (auto& x : evalorder)
{
wstring name = x->GetName();
if (statefulnode.find(name )!=statefulnode.end()) continue; // already in the list
shared_ptr<IStateFulNode> pNode = dynamic_pointer_cast<IStateFulNode>(x);
if (pNode)
{
statefulnode[name] = pNode;
}
}
}
std::map<wstring, shared_ptr<IStateFulNode>> EnumerateStatefulNode(ComputationNetwork& net,
const std::vector<ComputationNodeBasePtr>& criterionNode,
const std::vector<ComputationNodeBasePtr>& evaluationNode)
{
std::map<wstring, shared_ptr<IStateFulNode>> statefulnodes;
for (auto& root : criterionNode)
{
EnumerateStatefulNodeWithRoot(net, root, statefulnodes);
}
for (auto& root : evaluationNode)
{
EnumerateStatefulNodeWithRoot(net, root, statefulnodes);
}
return statefulnodes;
}
public:
SubminibatchDispatcher() :
m_MBLayoutCache(nullptr), m_LatticeCache(nullptr), m_uidCache(nullptr), m_extrauttmapCache(nullptr)
{ }
void Init(ComputationNetworkPtr & net,
const std::list<ComputationNodeBasePtr>& learnableNodes,
const std::vector<ComputationNodeBasePtr>& criterionNodes,
const std::vector<ComputationNodeBasePtr>& evaluationNodes)
{
m_MBLayoutCache = make_shared<MBLayout>();
m_NetCriterionAccumulator = make_shared<Matrix<ElemType>>(1, 1, net->GetDeviceId());
m_NetEvaluationAccumulator = make_shared<Matrix<ElemType>>(1, evaluationNodes.size(), net->GetDeviceId());
// remember ptr to learnableNode
for (auto x : learnableNodes)
{
shared_ptr<ComputationNode<ElemType>> pLearnableNode = dynamic_pointer_cast<ComputationNode<ElemType>>(x);
wstring nodename = x->NodeName();
m_LearnableNodePtr[nodename] = pLearnableNode;
}
for (auto& x : criterionNodes)
{
m_NetCriterionNodes.push_back(dynamic_pointer_cast<ComputationNode<ElemType>>(x));
}
for (auto& x : evaluationNodes)
{
m_NetEvaluationNodes.push_back(dynamic_pointer_cast<ComputationNode<ElemType>>(x));
}
m_NetCriterionAccumulator->SetValue((ElemType)0);
m_NetEvaluationAccumulator->SetValue((ElemType)0);
// emulate all the nodes, find nodes that have state
m_NetStatefulNodes = EnumerateStatefulNode(*net, criterionNodes, evaluationNodes);
for (auto x : m_NetStatefulNodes)
{
wstring name = x.first;
m_NetStates[name] = vector<shared_ptr<INodeState>>();
}
}
~SubminibatchDispatcher()
{
// TODO: remove these by using shared_ptr
delete m_LatticeCache;
delete m_uidCache;
delete m_extrauttmapCache;
for (auto x : m_inputMatricesCache)
{
delete x.second;
}
for (auto x : m_CachedGraident)
{
delete x.second;
}
}
size_t GetMinibatchIntoCache( IDataReader<ElemType>& trainSetDataReader,
ComputationNetwork& net,
std::map<std::wstring, Matrix<ElemType>*> & inputMatrices,
size_t requestedSubminibatches)
{
// first, remember interface to the net
m_NetMBLayoutPtr = net.GetMBLayoutPtr();
m_NetInputMatrixPtr = inputMatrices;
// second, get data from reader, stored it in cache
// 1. for each key, allocate the specific matrix on device
for (auto pa : inputMatrices)
{
wstring name = pa.first;
Matrix<ElemType>* M= pa.second;
if (m_inputMatricesCache.find(name) == m_inputMatricesCache.end())
{
m_inputMatricesCache[name] = new Matrix<ElemType>(*M, M->GetDeviceId()); // deep copy from M
}
else
{
m_inputMatricesCache[name]->SetValue(*M);
}
}
// 2. MBlayout
m_MBLayoutCache->CopyFrom(net.GetMBLayoutPtr());
size_t nParallelSequences = m_MBLayoutCache->GetNumParallelSequences();
if (m_NetCriterionNodes[0] != nullptr && (m_NetCriterionNodes[0]->OperationName() == L"SequenceWithSoftmax"))
{
// auto node = dynamic_pointer_cast<SequenceWithSoftmaxNode<ElemType>>(criterionNode);
NOT_IMPLEMENTED;
// TODO: implement this for Sequence training !!!
}
// subminibatches are cutted at the parallel sequence level;
// if #requested subminibatch is larger than #parallel sequence,
// we cannot split further; instead, each subsequence become a subminibatch
size_t actualnumSubminibatches = requestedSubminibatches > nParallelSequences ? nParallelSequences : requestedSubminibatches;
// 3. third, allocate space for accumulated gradient
for (auto& n: m_LearnableNodePtr)
{
auto node = n.second;
if (node->IsParameterUpdateRequired())
{
wstring nodeName = node->GetName();
shared_ptr<ComputationNode<ElemType>> pLearnableNode = node;
auto funvalue = pLearnableNode->FunctionValues(); // gradient may not be allocated when this function is first called
size_t nrow = funvalue.GetNumRows();
size_t ncol = funvalue.GetNumCols();
if (m_CachedGraident.find(nodeName) == m_CachedGraident.end())
{
// not allocated yet
m_CachedGraident[nodeName] = new Matrix<ElemType>(nrow, ncol, funvalue.GetDeviceId());
m_CachedGraident[nodeName]->SetValue((ElemType)0);
}
}
}
// 4. for stateful node
for (auto x : m_NetStatefulNodes)
{
wstring name = x.first;
if (m_NetStates[name].empty())
{
// this only happens in the first minibatch in an epoch
m_NetStates[name].resize(actualnumSubminibatches);
}
}
return (m_numSubminibatches = actualnumSubminibatches);
}
void GetSubMinibatchToNet(size_t iSubminibatch)
{
Matrices decimatedMatrices;
MBLayoutPtr decimatedLayout;
DataReaderHelpers::DecimateMinibatch(m_inputMatricesCache, decimatedMatrices, m_MBLayoutCache, decimatedLayout, m_numSubminibatches, iSubminibatch);
// NOTE: decimatedMatrices must be released by caller
//m_NetInputMatrixPtr = decimatedMatrices;
for (auto& x : decimatedMatrices)
{
wstring name = x.first;
m_NetInputMatrixPtr[name]->SetValue(*x.second);
delete x.second; // TODO: is it safe to delete here ? Yes! SetValue call cuda memcpy so it is a blocking call
x.second = nullptr;
}
m_NetMBLayoutPtr->CopyFrom(decimatedLayout);
for (auto& x : m_NetStatefulNodes)
{
wstring name = x.first;
shared_ptr<IStateFulNode> pNode = x.second;
if (m_NetStates[name][iSubminibatch])
pNode->ImportState(m_NetStates[name][iSubminibatch]);
}
}
// TODO: encapsulate it into a destructor !!! Note: Cannot throw exceptions in destructor.
void DoneWithCurrentSubMinibatch(size_t iSubminibatch)
{
// accumulate gradient here
for (auto x : m_CachedGraident)
{
wstring nodename = x.first;
if (m_LearnableNodePtr.find(nodename) == m_LearnableNodePtr.end())
{
RuntimeError("ERROR: in DoneWithCurrentSubMinibatch: node %ls not found in LeanrableNode", nodename.c_str());
}
shared_ptr<ComputationNode<ElemType>> pNode = m_LearnableNodePtr[nodename];
m_CachedGraident[nodename]->operator+=(pNode->GradientValues());
pNode->GradientValues().SetValue((ElemType)0);
}
// accumulate criterion value
Matrix<ElemType>::AddElementToElement(
m_NetCriterionNodes[0]->FunctionValues() , 0, 0,
*m_NetCriterionAccumulator, 0, 0
);
m_NetCriterionNodes[0]->FunctionValues().SetValue((ElemType)0);
// accumulate evaluation value
for (size_t i = 0; i < m_NetEvaluationNodes.size(); i++)
{
Matrix<ElemType>::AddElementToElement(
m_NetEvaluationNodes[i]->FunctionValues(), 0, 0,
*m_NetEvaluationAccumulator, 0, i
);
m_NetEvaluationNodes[i]->FunctionValues().SetValue((ElemType)0);
}
// Export node state
for (auto& x : m_NetStatefulNodes)
{
wstring name = x.first;
m_NetStates[name][iSubminibatch] = x.second->ExportState();
}
}
void DoneWithCurrentMinibatch()
{
for (auto& x : m_CachedGraident)
{
wstring name = x.first;
Matrix<ElemType>* accumulategrad = x.second;
if (m_LearnableNodePtr.find(name) == m_LearnableNodePtr.end())
{
// should never happen, remove this code later
RuntimeError("ERROR: in DoneWithCurrentSubMinibatch: node %ls not found in LearnableNode", name.c_str());
}
m_LearnableNodePtr[name]->GradientValues().SetValue(*accumulategrad);
x.second->SetValue((ElemType)0);
}
// also revert net.m_MBLayoutPtr
m_NetMBLayoutPtr->CopyFrom(m_MBLayoutCache);
//m_NetCriterionNodes[0]->FunctionValues().SetValue((ElemType)0);
Matrix<ElemType>::AddElementToElement(
*m_NetCriterionAccumulator, 0, 0,
m_NetCriterionNodes[0]->FunctionValues(), 0, 0
);
m_NetCriterionAccumulator->SetValue((ElemType)0);
for (size_t i = 0; i < m_NetEvaluationNodes.size(); i++)
{
//m_NetEvaluationNodes[i]->FunctionValues().SetValue((ElemType)0);
Matrix<ElemType>::AddElementToElement(
*m_NetEvaluationAccumulator, 0, i,
m_NetEvaluationNodes[i]->FunctionValues(), 0, 0
);
}
m_NetEvaluationAccumulator->SetValue((ElemType)0);
}
#ifdef SMB_DEBUG
template<class Matrix, class ElemType>
void WriteMatrix(const Matrix& mat, string filename)
{
ElemType* pArray = mat.CopyToArray();
size_t nRows = mat.GetNumRows();
size_t nCols = mat.GetNumCols();
FILE* fp = fopenOrDie(filename, "w");
for (size_t r = 0; r < nRows; r++)
{
for (size_t c = 0; c < nCols; c++)
{
fprintf(fp, "%.9f ", pArray[nRows*c + r]);
}
fprintf(fp, "\n");
}
fcloseOrDie(fp);
delete[]pArray;
}
void WriteMBLayout(MBLayoutPtr pMBLayout, wstring filename)
{
size_t nT = pMBLayout->GetNumTimeSteps();
size_t nU = pMBLayout->GetNumParallelSequences();
FILE* fp = fopenOrDie(filename, L"w");
for (size_t u = 0; u < nU; u++)
{
for (size_t t = 0; t < nT; t++)
{
MinibatchPackingFlags flag = pMBLayout->Get(u, t);
fprintf(fp, "%d\t", (int)flag);
}
fprintf(fp, "\n");
}
fcloseOrDie(fp);
}
void WriteInputMatriceAndMBLayout(size_t mbID, size_t smbID)
{
wstring node = L"features";
wstring filename = msra::strfun::wstrprintf(L"tmp/%s.%d.%d", node.c_str(), mbID, smbID);
if (m_NetInputMatrixPtr.find(node) != m_NetInputMatrixPtr.end())
{
WriteMatrix<Matrix<ElemType>, ElemType>(*m_NetInputMatrixPtr[node], msra::strfun::wcstombs(filename));
}
wstring fn = msra::strfun::wstrprintf(L"tmp/Layout.%d.%d", mbID, smbID);
WriteMBLayout(m_NetMBLayoutPtr, fn);
}
void WriteInputMatriceAndMBLayout(Matrices m, MBLayoutPtr pMBLayout, size_t mbID)
{
wstring filename = msra::strfun::wstrprintf(L"tmp/features.%d", mbID);
wstring fn = msra::strfun::wstrprintf(L"tmp/layout.%d", mbID);
if (m.find(L"features") != m.end())
{
WriteMatrix<Matrix<ElemType>, ElemType>(*m[L"features"], msra::strfun::wcstombs(filename));
}
WriteMBLayout(pMBLayout, fn);
}
void WriteGradient(size_t mbID)
{
wstring node = L"LSTMoutput1.bias";
wstring filename = msra::strfun::wstrprintf(L"%s.%d", L"tmp/gradient", mbID);
if (m_CachedGraident.find(node) != m_CachedGraident.end())
{
WriteMatrix<Matrix<ElemType>, ElemType>(*m_CachedGraident[node], msra::strfun::wcstombs(filename));
}
}
void WriteGradient(const Matrix<ElemType>& mat, wstring fn)
{
WriteMatrix<Matrix<ElemType>, ElemType>(mat, msra::strfun::wcstombs(fn));
}
#endif // SMB_DEBUG
};
}}}

Просмотреть файл

@ -110,6 +110,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// truncated = truncation length
m_mbSize = configSGD(L"minibatchSize", ConfigRecordType::Array(intargvector(vector<int>{ 256 })));
m_truncated = configSGD(L"truncated", false);
m_maxSamplesInRAM = configSGD(L"maxSamplesInRAM", ConfigRecordType::Array(intargvector(vector < int > {0})));
// the number of samples in each epoch (0 means, use all the samples in each epoch).
m_epochSize = configSGD(L"epochSize", (size_t)0);
@ -1697,6 +1698,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
refNet->StartEvaluateMinibatchLoop(refNode);
}
SubminibatchDispatcher<ElemType> smbDisplatcher;
size_t samplesInRAM = m_maxSamplesInRAM[epochNumber];
// convert it to SubminibatchRequested
size_t numSubminibatchRequested = 0;
if (samplesInRAM > 0) // if samplesInRAM = 0 , we will not use subminibatch dispatcher
{
size_t nParallelSequences = trainSetDataReader->GetNumParallelSequences();
size_t estimatedMBSize = tunedMBSize * nParallelSequences;
numSubminibatchRequested = (size_t)std::ceil(estimatedMBSize / samplesInRAM);
}
if (numSubminibatchRequested > 1) // only use subminibatch dispatcher if more than 1 subminibatch is required
{
smbDisplatcher.Init(net, learnableNodes, criterionNodes, evaluationNodes);
}
size_t actualNumSubminibatch=0;
// Attemps to compute the error signal for the whole utterance, which will
// be fed to the neural network as features. Currently it is a workaround
// for the two-forward-pass sequence and ctc training, which allows
@ -1710,10 +1727,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fprintf(stderr, ", DataParallelSGD training (MyRank = %d, NumNodes = %d, NumGradientBits = %d)",
(int)g_mpi->CurrentNodeRank(), (int)g_mpi->NumNodesInUse(), (int)m_numGradientBits);
}
if (useDistributedMBReading)
{
fprintf(stderr, ", Distributed reading is ENABLED");
fprintf(stderr, ", distributed reading is ENABLED");
}
if (numSubminibatchRequested > 0)
{
fprintf(stderr, ", with %d Max Samples in RAM", (int)samplesInRAM);
}
fprintf(stderr, ".\n");
@ -1735,6 +1755,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
nSamplesSinceLastModelSync += actualMBSize;
if (numSubminibatchRequested > 0)
{
actualNumSubminibatch = smbDisplatcher.GetMinibatchIntoCache(*trainSetDataReader, *net, *inputMatrices, numSubminibatchRequested);
}
else
{
actualNumSubminibatch = 0;
}
// node data was changed
// TODO: move this to that function as well--just tired to pass everything as arguments
// TODO: We should do this right after the GetMinibatch() call, since that's where these changed.
@ -1772,26 +1801,30 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//compute eval node first since when gradient is computed the forward function values
//may be changed and need to be recomputed when gradient and function value share the same matrix
net->Evaluate(evaluationNodes);
if (actualNumSubminibatch > 0)
{
for (size_t ismb = 0; ismb < actualNumSubminibatch; ismb++)
{
smbDisplatcher.GetSubMinibatchToNet(ismb);
#ifdef SMB_DEBUG
//smbhelper.WriteInputMatriceAndMBLayout(numMBsRun, ismb);
#endif
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
ComputationNetwork::UpdateEvalTimeStamps(labelNodes);
ForwardBackward(*net, evaluationNodes, criterionNodes[0], learnRatePerSample > 0.01 * m_minLearnRate);
smbDisplatcher.DoneWithCurrentSubMinibatch(ismb);
}
#ifdef SMB_DEBUG
//smbhelper.WriteGradient(numMBsRun);
#endif
smbDisplatcher.DoneWithCurrentMinibatch();
// only compute gradient when learning rate is large enough
if (learnRatePerSample > m_minLearnRate * 0.01)
{
// use only the first criterion. Is there any possibility to use more?
// ==============================
// forward prop, back-prop --this is where the magic happens baby, what we have all be waiting for!
// ==============================
net->ComputeGradient<ElemType>(criterionNodes[0]);
// TODO: we should split Evaluate() out from ComputeGradient(), then call them ForwardProp() and BackProp(), for clarity
}
else
else
{
// use only the first criterion. Is there any possibility to use more?
// ==============================
// forward prop
// ==============================
net->Evaluate(criterionNodes[0]);
ForwardBackward(*net, evaluationNodes, criterionNodes[0], learnRatePerSample > 0.01 * m_minLearnRate);
}
} // if (actualMBSize > 0)
// Some labels may be missing (e.g. forced alignment failed, or being gaps due to packing parallel sequences).

Просмотреть файл

@ -150,6 +150,14 @@ protected:
// We really should only read it in SGD and pass it ourselves on to the Reader, instead of it being a Reader parameter.
// BUGBUG: If m_truncated, then m_mbSize is interpreted as truncation length; the actual MB size is a combination of that and the #parallel sequences specified in the reader.
// TODO: do not specify 'Truncated' but 'TruncatedLength', set m_truncated so given, and let m_mbSize control how many #parallel sequences the reader is allowed to pack into an MB.
intargvector m_maxSamplesInRAM;
// This is related with subminibatch implementation
// maxSamplesInRAM denotes how many samples we used in forward-backward on net.
// Due to the GPU memory limitations, it is sometime not possible to hold the m_mbSize in RAM.
// To mitigate this issue, we adopt the sub-minibatch implementation, where
// each m_mbSize[epoch] is divided by a few sub-minibatch of which size will be no more than m_maxSamplesInRAM[epoch]
// a forward-backward is performed for each sub-minibathch; a model update is performed after each minibatch
// the number of samples in each epoch (0 means, use all the samples in each epoch).
size_t m_epochSize;
@ -485,6 +493,28 @@ protected:
private:
int SGDTrace(FILE *__restrict __stream, const char *__restrict __format, ...);
void ForwardBackward(ComputationNetwork& net, const std::vector<ComputationNodeBasePtr>& evalNodes, shared_ptr<ComputationNodeBase> criterionNode, bool dobackpropogate=true)
{
net.Evaluate(evalNodes);
// only compute gradient when learning rate is large enough
if (dobackpropogate)
{
// use only the first criterion. Is there any possibility to use more?
// ==============================
// forward prop, back-prop --this is where the magic happens baby, what we have all be waiting for!
// ==============================
net.ComputeGradient<ElemType>(criterionNode);
// TODO: we should split Evaluate() out from ComputeGradient(), then call them ForwardProp() and BackProp(), for clarity
}
else
{
// use only the first criterion. Is there any possibility to use more?
// ==============================
// forward prop
// ==============================
net.Evaluate(criterionNode);
}
}
};
}}}

Просмотреть файл

@ -0,0 +1,712 @@
//
// <copyright file="CPUMatrixUnitTests.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#include "stdafx.h"
#include "CppUnitTest.h"
#include "..\Math\CPUMatrix.h"
#define DEBUG_FLAG 1
using namespace Microsoft::MSR::CNTK;
#pragma warning (disable: 4305)
using namespace Microsoft::VisualStudio::CppUnitTestFramework;
namespace CNTKMathTest
{
TEST_CLASS(CPUMatrixUnitTest)
{
//typedef CPUSingleMatrix Matrix;
typedef CPUDoubleMatrix Matrix;
public:
static void DebugPrint(FILE* gpuDebugFile, Matrix M, const char* str, const bool colwiseVec = true)
{
fprintf(gpuDebugFile, "\n %s\n", str);
const size_t matNumCol = M.GetNumCols();
const size_t elemNum = M.GetNumElements();
Matrix M1 = M.Transpose();
double* pArray = M1.GetArray();
if (colwiseVec)
{
for (size_t i = 0; i < elemNum; i++)
{
fprintf(gpuDebugFile, "%3d ", (int)pArray[i]);
if ( (i+1)% matNumCol == 0)
fprintf(gpuDebugFile, "\n");
}
}
//const size_t matNumRow = M.GetNumRows();
//for (int i = 0; i < matNumRow; i++)
//{
// for (int j = 0; j < matNumCol; j++)
// {
// fprintf(gpuDebugFile, "%3d ", M(i,j));
// //if ( (j+1)% matNumCol == 0)
// }
// fprintf(gpuDebugFile, "\n");
//}
}
TEST_METHOD(CPUMatrixConsturctors)
{
Matrix M0;
Assert::IsTrue(M0.IsEmpty());
M0.Resize(2,3);
Assert::IsFalse(M0.IsEmpty());
Assert::AreEqual<size_t>(2,M0.GetNumRows());
Assert::AreEqual<size_t>(3,M0.GetNumCols());
Assert::AreEqual<size_t>(6,M0.GetNumElements());
M0(0,0) = 1; M0(1,2) = 2;
Assert::IsTrue(M0(0,0) == 1);
Assert::IsTrue(M0(1,2) == 2);
Matrix M1(12,53);
Assert::AreEqual<size_t>(12,M1.GetNumRows());
Assert::AreEqual<size_t>(53,M1.GetNumCols());
float *fArray = new float[6];
fArray[0] = 1; fArray[1] = 2; fArray[2] = 3;
fArray[3] = 4; fArray[4] = 5; fArray[5] = 6;
CPUMatrix<float> M2(2, 3, fArray, matrixFlagNormal);
Assert::AreEqual<float>(M2(0,0), 1);
Assert::AreEqual<float>(M2(0,1), 3);
Assert::AreEqual<float>(M2(0,2), 5);
Assert::AreEqual<float>(M2(1,0), 2);
Assert::AreEqual<float>(M2(1,1), 4);
Assert::AreEqual<float>(M2(1,2), 6);
double *dArray = new double[6];
dArray[0] = 1; dArray[1] = 2; dArray[2] = 3;
dArray[3] = 4; dArray[4] = 5; dArray[5] = 6;
CPUMatrix<double> M3(2, 3, dArray, matrixFormatRowMajor);
Assert::AreEqual<double>(M3(0,0), 1);
Assert::AreEqual<double>(M3(0,1), 2);
Assert::AreEqual<double>(M3(0,2), 3);
Assert::AreEqual<double>(M3(1,0), 4);
Assert::AreEqual<double>(M3(1,1), 5);
Assert::AreEqual<double>(M3(1,2), 6);
Matrix M4(M0);
Assert::IsTrue(M4.IsEqualTo(M0));
Matrix M5 = M0;
Assert::IsTrue(M5.IsEqualTo(M0));
}
TEST_METHOD(CPUMatrixAddAndSub)
{
Matrix M0(2,3);
M0(0,0) = 1; M0(0,1) = 2; M0(0,2) = 3;
M0(1,0) = 4; M0(1,1) = 5; M0(1,2) = 6;
Matrix M1(2,3);
M1(0,0) = 11; M1(0,1) = 12; M1(0,2) = 13;
M1(1,0) = 14; M1(1,1) = 15; M1(1,2) = 16;
Matrix M2(2,3);
M2(0,0) = 12; M2(0,1) = 14; M2(0,2) = 16;
M2(1,0) = 18; M2(1,1) = 20; M2(1,2) = 22;
Matrix MC(2,1);
MC(0,0) = 10;
MC(1,0) = 10;
Matrix MR(1,3);
MR(0,0) = 10; MR(0,1) = 10; MR(0,2) = 10;
Matrix MS(1,1);
MS(0,0) = 10;
Matrix M3 = M2 - M0;
Assert::IsTrue(M3.IsEqualTo(M1));
M3 += M0;
Assert::IsTrue(M3.IsEqualTo(M2));
M3 = M0 + 10;
Assert::IsTrue(M3.IsEqualTo(M1));
M3 -= 10;
Assert::IsTrue(M3.IsEqualTo(M0));
M3 = M1 + M0;
Assert::IsTrue(M3.IsEqualTo(M2));
M3 -= M0;
Assert::IsTrue(M3.IsEqualTo(M1));
M3 = M1 - 10;
Assert::IsTrue(M3.IsEqualTo(M0));
M3 += 10;
Assert::IsTrue(M3.IsEqualTo(M1));
M3 -= MC;
Assert::IsTrue(M3.IsEqualTo(M0));
M3 += MC;
Assert::IsTrue(M3.IsEqualTo(M1));
M3 -= MR;
Assert::IsTrue(M3.IsEqualTo(M0));
M3 += MR;
Assert::IsTrue(M3.IsEqualTo(M1));
M3.AssignDifferenceOf(M3, MS);
Assert::IsTrue(M3.IsEqualTo(M0));
}
TEST_METHOD(CPUMatrixMultiAndDiv)
{
Matrix M0(2,3);
M0(0,0) = 1; M0(0,1) = 2; M0(0,2) = 3;
M0(1,0) = 4; M0(1,1) = 5; M0(1,2) = 6;
Matrix M00(2,3);
M00(0,0) = 10; M00(0,1) = 20; M00(0,2) = 30;
M00(1,0) = 40; M00(1,1) = 50; M00(1,2) = 60;
Matrix M1(2,3);
M1.Reshape(3,2);
M1(0,0) = 11; M1(0,1) = 15;
M1(1,0) = 14; M1(1,1) = 13;
M1(2,0) = 12; M1(2,1) = 16;
Matrix M2(2,2);
M2(0,0) = 75; M2(0,1) = 89;
M2(1,0) = 186; M2(1,1) = 221;
Matrix M3 = M0 * M1;
Assert::IsTrue(M3.IsEqualTo(M2));
M3 = M0 * 10;
Assert::IsTrue(M3.IsEqualTo(M00));
M3 = M3 / 10;
Assert::IsTrue(M3.IsEqualTo(M0));
M3 *= 10;
Assert::IsTrue(M3.IsEqualTo(M00));
M3 /= 10;
Assert::IsTrue(M3.IsEqualTo(M0));
Matrix::MultiplyAndWeightedAdd(1, M0, false, M1, false, 0, M3);
Assert::IsTrue(M3.IsEqualTo(M2));
M1.Reshape(2,3);
Matrix::MultiplyAndWeightedAdd(1, M0, false, M1, true, 0, M3);
M2(0,0) = 74; M2(0,1) = 92;
M2(1,0) = 182; M2(1,1) = 227;
Assert::IsTrue(M3.IsEqualTo(M2));
Matrix::MultiplyAndWeightedAdd(10, M0, false, M1, true, 2, M3);
M2(0,0) = 888; M2(0,1) = 1104;
M2(1,0) = 2184; M2(1,1) = 2724;
Assert::IsTrue(M3.IsEqualTo(M2));
Matrix::MultiplyAndWeightedAdd(1, M0, true, M1, false, 0, M3);
M2.Resize(3,3);
M2(0,0) = 67; M2(0,1) = 72; M2(0,2) = 77;
M2(1,0) = 92; M2(1,1) = 99; M2(1,2) = 106;
M2(2,0) = 117; M2(2,1) = 126; M2(2,2) = 135;
Assert::IsTrue(M3.IsEqualTo(M2));
}
TEST_METHOD(CPUMatrixElementOps)
{
Matrix M0(2,3);
M0(0,0) = 1; M0(0,1) = 2; M0(0,2) = 3;
M0(1,0) = 4; M0(1,1) = 5; M0(1,2) = 6;
Matrix M00(2,3);
M00(0,0) = 1.0; M00(0,1) = 1/2.0; M00(0,2) = 1/3.0;
M00(1,0) = 1/4.0; M00(1,1) = 1/5.0; M00(1,2) = 1/6.0;
Matrix M1(2,3);
M1(0,0) = 1; M1(0,1) = 1; M1(0,2) = 1;
M1(1,0) = 1; M1(1,1) = 1; M1(1,2) = 1;
Matrix M3;
M3.AssignElementProductOf(M0, M00);
Assert::IsTrue(M3.IsEqualTo(M1, 0.0001));
M3 = M0 ^ 4;
Matrix M2(2,3);
M2(0,0) = 1; M2(0,1) = 16; M2(0,2) = 81;
M2(1,0) = 256; M2(1,1) = 625; M2(1,2) = 1296;
Assert::IsTrue(M3.IsEqualTo(M2));
M3.SetValue(M0);
M3 ^= 4;
Assert::IsTrue(M3.IsEqualTo(M2));
M3.SetValue(M0);
M3.ElementMultiplyWith(M00);
Assert::IsTrue(M3.IsEqualTo(M1));
M3.SetValue(M0);
M3.ElementInverse();
Assert::IsTrue(M3.IsEqualTo(M00));
M2(0,0) = 0.7311; M2(0,1) = 0.8808; M2(0,2) = 0.9526;
M2(1,0) = 0.9820; M2(1,1) = 0.9933; M2(1,2) = 0.9975;
M3.AssignElementDivisionOf(M2, M0);
M2.ElementMultiplyWith(M00);
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M3.SetValue(M0);
M3.InplaceSigmoid();
M2(0,0) = 0.7311; M2(0,1) = 0.8808; M2(0,2) = 0.9526;
M2(1,0) = 0.9820; M2(1,1) = 0.9933; M2(1,2) = 0.9975;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M3.SetValue(M0);
M3.InplaceTanh();
M2(0,0) = 0.7616; M2(0,1) = 0.9640; M2(0,2) = 0.9951;
M2(1,0) = 0.9993; M2(1,1) = 0.9999; M2(1,2) = 1.0000;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M3.SetValue(M0);
M3.InplaceLogSoftmax(true);
M3.InplaceExp();
M2(0,0) = 0.0474; M2(0,1) = 0.0474; M2(0,2) = 0.0474;
M2(1,0) = 0.9526; M2(1,1) = 0.9526; M2(1,2) = 0.9526;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M3.SetValue(M0);
M3.InplaceLogSoftmax(false);
M3.InplaceExp();
M2(0,0) = 0.0900; M2(0,1) = 0.2447; M2(0,2) = 0.6652;
M2(1,0) = 0.0900; M2(1,1) = 0.2447; M2(1,2) = 0.6652;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M3.SetValue(M0);
M3.InplaceHardmax(true);
M2(0, 0) = 0.0; M2(0, 1) = 0.0; M2(0, 2) = 0.0;
M2(1, 0) = 1.0; M2(1, 1) = 1.0; M2(1, 2) = 1.0;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M3.SetValue(M0);
M3.InplaceHardmax(false);
M2(0, 0) = 0.0; M2(0, 1) = 0.0; M2(0, 2) = 1.0;
M2(1, 0) = 0.0; M2(1, 1) = 0.0; M2(1, 2) = 1.0;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M3.SetValue(M0);
M3.InplaceSqrt();
M2(0,0) = 1; M2(0,1) = 1.4142; M2(0,2) = 1.7321;
M2(1,0) = 2; M2(1,1) = 2.2361; M2(1,2) = 2.4495;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M3.SetValue(M0);
M3.InplaceExp();
M2(0,0) = 2.7183; M2(0,1) = 7.3891; M2(0,2) = 20.0855;
M2(1,0) = 54.5982; M2(1,1) = 148.4132; M2(1,2) = 403.4288;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M3.SetValue(M0);
M3.InplaceExp();
M2(0,0) = 2.7183; M2(0,1) = 7.3891; M2(0,2) = 20.0855;
M2(1,0) = 54.5982; M2(1,1) = 148.4132; M2(1,2) = 403.4288;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M3.InplaceLog();
Assert::IsTrue(M3.IsEqualTo(M0, 0.0001));
M3.SetValue(M0);
M3.InplaceTruncateBottom(2);
M2(0,0) = 2; M2(0,1) = 2; M2(0,2) = 3;
M2(1,0) = 4; M2(1,1) = 5; M2(1,2) = 6;
Assert::IsTrue(M3.IsEqualTo(M2));
M3.SetValue(M0);
M3.InplaceTruncateTop(4);
M2(0,0) = 1; M2(0,1) = 2; M2(0,2) = 3;
M2(1,0) = 4; M2(1,1) = 4; M2(1,2) = 4;
Assert::IsTrue(M3.IsEqualTo(M2));
double pi = 3.14159265358979323846264338327950288419716939937510;
Matrix M_Trig(2,3);
M_Trig(0,0) = 0; M_Trig(0,1) = pi/2.0; M_Trig(0,2) = pi;
M_Trig(1,0) = 3.0*pi/2.0; M_Trig(1,1) = 2.0*pi; M_Trig(1,2) = 5.0*pi/2.0;
Matrix M_Cos(2,3);
M_Cos.SetValue(M_Trig);
Matrix M_Cos_expected(2,3);
M_Cos_expected(0,0) = 1; M_Cos_expected(0,1) = 0; M_Cos_expected(0,2) = -1;
M_Cos_expected(1,0) = 0; M_Cos_expected(1,1) = 1; M_Cos_expected(1,2) = 0;
M_Cos.InplaceCosine();
Assert::IsTrue(M_Cos.IsEqualTo(M_Cos_expected, 0.0001));
M_Cos.SetValue(M_Trig);
M_Cos.AssignCosineOf(M_Trig);
Assert::IsTrue(M_Cos.IsEqualTo(M_Cos_expected, 0.0001));
Matrix M_NegSine(2,3);
M_NegSine.SetValue(M_Trig);
Matrix M_NegSine_expected(2,3);
M_NegSine_expected(0,0) = 0; M_NegSine_expected(0,1) = -1; M_NegSine_expected(0,2) = 0;
M_NegSine_expected(1,0) = 1; M_NegSine_expected(1,1) = 0; M_NegSine_expected(1,2) = -1;
M_NegSine.InplaceNegativeSine();
Assert::IsTrue(M_NegSine.IsEqualTo(M_NegSine_expected, 0.0001));
M_NegSine.SetValue(M_Trig);
M_NegSine.AssignNegativeSineOf(M_Trig);
Assert::IsTrue(M_NegSine.IsEqualTo(M_NegSine_expected, 0.0001));
}
TEST_METHOD(CPUMatrixNorms)
{
Matrix M0(2,3);
M0(0,0) = 1; M0(0,1) = 2; M0(0,2) = 3;
M0(1,0) = 4; M0(1,1) = 5; M0(1,2) = 6;
Matrix M3;
M0.VectorNorm1(M3, true);
Matrix M2(1, 3);
M2(0,0) = 5; M2(0,1) = 7; M2(0,2) = 9;
Assert::IsTrue(M3.IsEqualTo(M2));
M0.VectorNorm1(M3, false);
M2.Resize(2,1);
M2(0,0) = 6;
M2(1,0) = 15;
Assert::IsTrue(M3.IsEqualTo(M2));
M0.VectorNorm2(M3, true);
M2.Resize(1, 3);
M2(0,0) = 4.1231; M2(0,1) = 5.3852; M2(0,2) = 6.7082;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M0.VectorNorm2(M3, false);
M2.Resize(2,1);
M2(0,0) = 3.7417;
M2(1,0) = 8.7750;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M0.VectorNormInf(M3, true);
M2.Resize(1, 3);
M2(0,0) = 4; M2(0,1) = 5; M2(0,2) = 6;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M0.VectorNormInf(M3, false);
M2.Resize(2,1);
M2(0,0) = 3;
M2(1,0) = 6;
Assert::IsTrue(M3.IsEqualTo(M2));
Assert::IsTrue(abs(M0.FrobeniusNorm() - 9.5394) < 0.0001);
Assert::IsTrue(abs(M0.MatrixNormInf() - 6) < 0.0001);
Matrix M1;
M0.VectorMax(M1, M3, true);
M2.Resize(1, 3);
M2(0,0) = 4; M2(0,1) = 5; M2(0,2) = 6;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M0.VectorMax(M1, M3, false);
M2.Resize(2,1);
M2(0,0) = 3;
M2(1,0) = 6;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M0.VectorMin(M1, M3, true);
M2.Resize(1, 3);
M2(0,0) = 1; M2(0,1) = 2; M2(0,2) = 3;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
M0.VectorMin(M1, M3, false);
M2.Resize(2,1);
M2(0,0) = 1;
M2(1,0) = 4;
Assert::IsTrue(M3.IsEqualTo(M2, 0.0001));
}
TEST_METHOD(CPUMatrixSetValues)
{
Matrix M0(3,3);
M0(0,0) = 10; M0(1,1) = 10; M0(2,2) = 10;
Matrix M1(3,3);
M1.SetDiagonalValue(10);
Assert::IsTrue(M1.IsEqualTo(M0, 0.0001));
Matrix M2(3,1);
M2(0,0) = 10; M2(1,0) = 10; M2(2,0) = 10;
M1.SetDiagonalValue(M2);
Assert::IsTrue(M1.IsEqualTo(M0, 0.0001));
M1.SetUniformRandomValue(-0.01, 0.01);
for (int i=0; i<M1.GetNumRows(); i++)
for (int j=0; j<M1.GetNumCols(); j++)
Assert::IsTrue(M1(i,j) >= -0.01 && M1(i,j) < 0.01);
M1.SetGaussianRandomValue(0, 0.01);
}
TEST_METHOD(CPUMatrixTranspose)
{
Matrix M0(2,3);
M0(0,0) = 1; M0(0,1) = 2; M0(0,2) = 3;
M0(1,0) = 4; M0(1,1) = 5; M0(1,2) = 6;
Matrix M1(3,2);
M1(0,0) = 1; M1(0,1) = 4;
M1(1,0) = 2; M1(1,1) = 5;
M1(2,0) = 3; M1(2,1) = 6;
Matrix M2 = M0.Transpose();
Assert::IsTrue(M2.IsEqualTo(M1, 0.0001));
M2.AssignTransposeOf(M1);
Assert::IsTrue(M2.IsEqualTo(M0, 0.0001));
}
TEST_METHOD(CPUMatrixColumnSlice)
{
Matrix M0(2,3);
M0(0,0) = 1; M0(0,1) = 2; M0(0,2) = 3;
M0(1,0) = 4; M0(1,1) = 5; M0(1,2) = 6;
Matrix M1(2,2);
M1(0,0) = 1; M1(0,1) = 2;
M1(1,0) = 4; M1(1,1) = 5;
Matrix M2 = M0.ColumnSlice(0,2);
Assert::IsTrue(M2.IsEqualTo(M1, 0.0001));
M1(0,0) = 2; M1(0,1) = 3;
M1(1,0) = 5; M1(1,1) = 6;
M2 = M0.ColumnSlice(1,2);
Assert::IsTrue(M2.IsEqualTo(M1, 0.0001));
size_t k=100, n=20, m=50;
Matrix AG((size_t)k,(size_t)n);
AG.SetUniformRandomValue(-1,1);
Matrix BG((size_t)n,(size_t)m);
BG.SetUniformRandomValue(-1,1);
Matrix CG((size_t)k,(size_t)m);
CG.SetUniformRandomValue(-1,1);
Matrix DG((size_t)k,(size_t)m);
DG.SetValue(CG);
Matrix::MultiplyAndAdd(AG, false, BG, false, DG);
for (int i=0; i<m; i++)
{
Matrix col_BG = BG.ColumnSlice(i,1);
Matrix col_CG = CG.ColumnSlice(i,1);
Matrix::MultiplyAndAdd(AG, false, col_BG, false, col_CG);
}
Assert::IsTrue(CG.IsEqualTo(DG, 0.0001));
}
TEST_METHOD(CPUKhatriRaoProduct)
{
Matrix A(3,4);
A(0,0) = 0.8147; A(0,1) = 0.9134; A(0,2) = 0.2785; A(0,3) = 0.9649;
A(1,0) = 0.9058; A(1,1) = 0.6324; A(1,2) = 0.5469; A(1,3) = 0.1576;
A(2,0) = 0.1270; A(2,1) = 0.0975; A(2,2) = 0.9575; A(2,3) = 0.9706;
Matrix B(2,4);
B(0,0) = 0.9572; B(0,1) = 0.8003; B(0,2) = 0.4218; B(0,3) = 0.7922;
B(1,0) = 0.4854; B(1,1) = 0.1419; B(1,2) = 0.9157; B(1,3) = 0.9595;
Matrix D(6,4);
D(0,0) = 0.7798; D(0,1) = 0.7310; D(0,2) = 0.1175; D(0,3) = 0.7644;
D(1,0) = 0.8670; D(1,1) = 0.5061; D(1,2) = 0.2307; D(1,3) = 0.1249;
D(2,0) = 0.1215; D(2,1) = 0.0781; D(2,2) = 0.4038; D(2,3) = 0.7689;
D(3,0) = 0.3954; D(3,1) = 0.1296; D(3,2) = 0.2550; D(3,3) = 0.9258;
D(4,0) = 0.4396; D(4,1) = 0.0897; D(4,2) = 0.5008; D(4,3) = 0.1512;
D(5,0) = 0.0616; D(5,1) = 0.0138; D(5,2) = 0.8768; D(5,3) = 0.9313;
Matrix C;
C.AssignKhatriRaoProductOf(A, B);
Assert::IsTrue(C.IsEqualTo(D, 0.0001));
}
TEST_METHOD(CPUAddColumnReshapeProductOf)
{
Matrix A(6,2);
A(0,0) = 0.6557; A(0,1) = 0.7431;
A(1,0) = 0.0357; A(1,1) = 0.3922;
A(2,0) = 0.8491; A(2,1) = 0.6555;
A(3,0) = 0.9340; A(3,1) = 0.1712;
A(4,0) = 0.6787; A(4,1) = 0.7060;
A(5,0) = 0.7577; A(5,1) = 0.0318;
Matrix B(3,2);
B(0,0) = 0.2769; B(0,1) = 0.8235;
B(1,0) = 0.0462; B(1,1) = 0.6948;
B(2,0) = 0.0971; B(2,1) = 0.3171;
Matrix D0(2,2);
D0(0,0) = 0.2867; D0(0,1) = 1.2913;
D0(1,0) = 0.1266; D0(1,1) = 0.4520;
Matrix D1(2,2);
D1(0,0) = 0.2657; D1(0,1) = 1.0923;
D1(1,0) = 0.3636; D1(1,1) = 0.6416;
Matrix C(2,2);
C.SetValue(0);
C.AddColumnReshapeProductOf(A, B, false);
Assert::IsTrue(C.IsEqualTo(D0, 0.0001));
C.SetValue(0);
C.AddColumnReshapeProductOf(A, B, true);
Assert::IsTrue(C.IsEqualTo(D1, 0.0001));
}
TEST_METHOD(CPUMatrixRowSliceAndStack)
{
Matrix M0(5,3);
M0(0,0) = 1; M0(0,1) = 6; M0(0,2) = 11;
M0(1,0) = 2; M0(1,1) = 7; M0(1,2) = 12;
M0(2,0) = 3; M0(2,1) = 8; M0(2,2) = 13;
M0(3,0) = 4; M0(3,1) = 9; M0(3,2) = 14;
M0(4,0) = 5; M0(4,1) = 10; M0(4,2) = 15;
Matrix M1(2,3);
M1(0,0) = 3; M1(0,1) = 8; M1(0,2) = 13;
M1(1,0) = 4; M1(1,1) = 9; M1(1,2) = 14;
Matrix M2;
M2.AssignRowSliceValuesOf(M0, 2, 2);
Assert::IsTrue(M2.IsEqualTo(M1, 0.0001));
Matrix M3(5,3);
M3(0,0) = 0; M3(0,1) = 0; M3(0,2) = 0;
M3(1,0) = 0; M3(1,1) = 0; M3(1,2) = 0;
M3(2,0) = 3; M3(2,1) = 8; M3(2,2) = 13;
M3(3,0) = 4; M3(3,1) = 9; M3(3,2) = 14;
M3(4,0) = 0; M3(4,1) = 0; M3(4,2) = 0;
M3 += M0;
M0.AddToRowSliceValuesOf(M1, 2,2);
Assert::IsTrue(M3.IsEqualTo(M0, 0.0001));
M2.AddWithRowSliceValuesOf(M1, 0, 2);
Matrix M4(2, 3);
M4(0, 0) = 6; M4(0, 1) = 16; M4(0, 2) = 26;
M4(1, 0) = 8; M4(1, 1) = 18; M4(1, 2) = 28;
Assert::IsTrue(M2.IsEqualTo(M4, 0.0001));
#if 0
Matrix M5, M6, M7, M8;
M5.AssignRowSliceValuesOf(M0, 0, 2);
M6.AssignRowSliceValuesOf(M0, 2, 1);
M7.AssignRowSliceValuesOf(M0, 3, 2);
std::vector<const Matrix*> inputMatrices;
inputMatrices.resize(3);
inputMatrices[0] = &M5;
inputMatrices[1] = &M6;
inputMatrices[2] = &M7;
M8.AssignRowStackValuesOf(inputMatrices, 0, 3);
Assert::IsTrue(M8.IsEqualTo(M0, 0.0001));
#endif
}
TEST_METHOD(CPUAssignRepeatOf)
{
Matrix M0(2, 3);
M0(0, 0) = 1; M0(0, 1) = 6; M0(0, 2) = 11;
M0(1, 0) = 2; M0(1, 1) = 7; M0(1, 2) = 12;
Matrix M1;
M1.AssignRepeatOf(M0, 1, 1);
Assert::IsTrue(M1.IsEqualTo(M0, 0.0001));
Matrix M3(6, 6);
M3(0, 0) = 1; M3(0, 1) = 6; M3(0, 2) = 11; M3(0, 3) = 1; M3(0, 4) = 6; M3(0, 5) = 11;
M3(1, 0) = 2; M3(1, 1) = 7; M3(1, 2) = 12; M3(1, 3) = 2; M3(1, 4) = 7; M3(1, 5) = 12;
M3(2, 0) = 1; M3(2, 1) = 6; M3(2, 2) = 11; M3(2, 3) = 1; M3(2, 4) = 6; M3(2, 5) = 11;
M3(3, 0) = 2; M3(3, 1) = 7; M3(3, 2) = 12; M3(3, 3) = 2; M3(3, 4) = 7; M3(3, 5) = 12;
M3(4, 0) = 1; M3(4, 1) = 6; M3(4, 2) = 11; M3(4, 3) = 1; M3(4, 4) = 6; M3(4, 5) = 11;
M3(5, 0) = 2; M3(5, 1) = 7; M3(5, 2) = 12; M3(5, 3) = 2; M3(5, 4) = 7; M3(5, 5) = 12;
M1.AssignRepeatOf(M0, 3, 2);
Assert::IsTrue(M1.IsEqualTo(M3, 0.0001));
}
TEST_METHOD(CPURowElementOperations)
{
Matrix M0 = Matrix::RandomUniform(20, 28, -1, 1);
Matrix M1 = Matrix::RandomUniform(1, 28, 1, 2);
Matrix M3;
M3.SetValue(M0);
M3.RowElementMultiplyWith(M1);
M3.RowElementDivideBy(M1);
Assert::IsTrue(M0.IsEqualTo(M3, 0.0001));
}
TEST_METHOD(CPUColumnElementOperations)
{
Matrix M0 = Matrix::RandomUniform(20, 28, -1, 1);
Matrix M1 = Matrix::RandomUniform(20, 1, 1, 2);
Matrix M3;
M3.SetValue(M0);
M3.ColumnElementMultiplyWith(M1);
M3.ColumnElementDivideBy(M1);
Assert::IsTrue(M0.IsEqualTo(M3, 0.0001));
}
TEST_METHOD(CPUAssignMatrixByColumnSlice)
{
printf("starts here\n");
Matrix M0 = Matrix::RandomUniform(400, 50, -100, 100);
vector<size_t> columnrange = { 0, 3, 5, 4 };
Matrix M1;
try
{
M1.AssignMatrixByColumnSlice(M0, columnrange);
}
catch (exception& e)
{
printf("%s\n", e.what());
Assert::Fail();
}
for (size_t des = 0; des < columnrange.size(); des ++)
{
size_t src = columnrange[des];
double err = 0;
for (size_t r = 0; r < 400; r++)
{
double diff = (M0(r, src) - M1(r, des));
diff *= diff;
err += diff;
}
Assert::AreEqual(err, 0, 1e-7);
}
}
};
}

Просмотреть файл

@ -259,6 +259,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return slice;
}
// BUGBUG: Unlike ColumnSlice(), this does not return a view. Must be renamed.
template<class ElemType>
CPUMatrix<ElemType> CPUMatrix<ElemType>::RowSlice(size_t startRow, size_t numRows) const
{
if (startRow + numRows > m_numRows )
InvalidArgument("The row slice (%d+%d) is out of range of the source matrix (%d).", (int)startRow, (int)numRows, (int)m_numRows);
CPUMatrix<ElemType> slice;
slice.AssignRowSliceValuesOf(*this, startRow, numRows);
return slice;
}
// set this(:, 0:numCols-1) = fromMatrix(:, startColumn : startColumn+numCols-1)
// TODO: why not say *this = ColumnSlice()?
template<class ElemType>
@ -333,7 +346,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
}
//for each column of a, we add all rows of a to this starting from startIndex
template<class ElemType>
CPUMatrix<ElemType>& CPUMatrix<ElemType>::AssignToRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows)

Просмотреть файл

@ -52,6 +52,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
CPUMatrix<ElemType> ColumnSlice(size_t startColumn, size_t numCols) const;
CPUMatrix<ElemType>& AssignColumnSlice(const CPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols);
CPUMatrix<ElemType>& SetColumnSlice(const CPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols);
// BUGBUG: Unlike ColumnSlice(), this does not return a view. Must be renamed.
CPUMatrix<ElemType> RowSlice(size_t startRow, size_t numRows) const;
void CopyColumnsStrided(const CPUMatrix<ElemType>& fromMatrix, size_t numCols, size_t srcNumColsStride, size_t destNumColsStride);

Просмотреть файл

@ -537,6 +537,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return slice;
}
template<class ElemType>
GPUMatrix<ElemType> GPUMatrix<ElemType>::RowSlice(size_t startRow, size_t numRows) const
{
if (startRow + numRows > m_numRows )
InvalidArgument("The row slice (%d+%d) is out of range of the source matrix (%d).", (int)startRow, (int)numRows, (int)m_numRows);
GPUMatrix<ElemType> slice(GetComputeDeviceId());
slice.AssignRowSliceValuesOf(*this, startRow, numRows);
return slice;
}
template<class ElemType>
GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignColumnSlice(const GPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols)
{

Просмотреть файл

@ -142,6 +142,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
GPUMatrix<ElemType> ColumnSlice(size_t startColumn, size_t numCols) const;
GPUMatrix<ElemType>& AssignColumnSlice(const GPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols);
GPUMatrix<ElemType>& SetColumnSlice(const GPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols);
// BUGBUG: Unlike ColumnSlice(), this does not return a view. Must be renamed.
GPUMatrix<ElemType> RowSlice(size_t startRow, size_t numRows) const;
void CopyColumnsStrided(const GPUMatrix<ElemType>& fromMatrix, size_t numCols, size_t srcNumColsStride, size_t destNumColsStride);

Просмотреть файл

@ -766,6 +766,46 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return slice;
}
// BUGBUG: Unlike ColumnSlice(), this does not return a view. Must be renamed.
template<class ElemType>
Matrix<ElemType> Matrix<ElemType>::RowSlice(size_t startRow, size_t numRow) const
{
int devId = GetDeviceId();
Matrix<ElemType> slice(devId);
slice.m_preferredDeviceId = m_preferredDeviceId;
if (GetMatrixType() == MatrixType::DENSE)
{
if (devId == CPUDEVICE)
{
if (slice.m_CPUMatrix != nullptr)
slice.m_CPUMatrix->operator=(static_cast<CPUMatrix<ElemType>&&> (m_CPUMatrix->RowSlice(startRow, numRow)));
else
slice.m_CPUMatrix = new CPUMatrix<ElemType>(static_cast<CPUMatrix<ElemType>&&> (m_CPUMatrix->RowSlice(startRow, numRow)));
slice.SetDataLocation(CPU, DENSE);
}
else
{
if (slice.m_GPUMatrix != nullptr)
slice.m_GPUMatrix->operator=(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->RowSlice(startRow, numRow)));
else
slice.m_GPUMatrix = new GPUMatrix<ElemType>(static_cast<GPUMatrix<ElemType>&&>(m_GPUMatrix->RowSlice(startRow, numRow)));
slice.SetDataLocation(GPU, DENSE);
}
}
else if (GetMatrixType() == MatrixType::SPARSE)
{
NOT_IMPLEMENTED;
}
else
{
RuntimeError("Unknown matrix type");
}
return slice;
}
template<class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignColumnSlice(const Matrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols)
{

Просмотреть файл

@ -153,7 +153,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
void CopySection(size_t numRows, size_t numCols, ElemType* dst, size_t colStride) const;
Matrix<ElemType> ColumnSlice(size_t startColumn, size_t numCols) const;
// BUGBUG: Unlike ColumnSlice(), this does not return a view. Must be renamed.
Matrix<ElemType> RowSlice(size_t startRow, size_t numRows) const;
// difference between AssignColumnSlice and SetColumnSlice
// AssignColumnSlice : this(:, startColumn:startColumn+numCols-1) = fromMatrix(:, startColumn: startColumn+numCols-1)

Просмотреть файл

@ -490,7 +490,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType> GPUMatrix<ElemType> GPUMatrix<ElemType>::ColumnSlice(size_t startColumn, size_t numCols) const
{
GPUMatrix<ElemType> slice(0);
return slice;
}
template<class ElemType> GPUMatrix<ElemType> GPUMatrix<ElemType>::RowSlice(size_t startRow, size_t numRows) const
{
GPUMatrix<ElemType> slice(0);
return slice;
}