Merge with new changes including RowStackNode

This commit is contained in:
kaisheny 2015-06-16 16:01:19 -07:00
Родитель 01468f3fb6 99af4139a5
Коммит f332421b7b
32 изменённых файлов: 810 добавлений и 134 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -15,6 +15,7 @@ x64/
build/ build/
[Bb]in/ [Bb]in/
[Oo]bj/ [Oo]bj/
.run-*
# Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets # Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets
!packages/*/build/ !packages/*/build/

Просмотреть файл

@ -47,8 +47,8 @@ BinaryWriter<ElemType>::~BinaryWriter()
// miniBatchMode=Partial // miniBatchMode=Partial
// randomize=None // randomize=None
// wfile=c:\speech\mnist\mnist_test.bin // wfile=c:\speech\mnist\mnist_test.bin
// #wsize - inital size of the file in MB // #wsize - inital size of the file in MB default to 256
// # if calculated size would be bigger, that is used instead // # has to be large enough for your dataset. the file will shrink to the actual size when closed.
// #wsize=256 // #wsize=256
// #wrecords - number of records we should allocate space for in the file // #wrecords - number of records we should allocate space for in the file
// # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file // # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file

Просмотреть файл

@ -980,8 +980,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ {
// dereference matrix that corresponds to key (input/output name) and // dereference matrix that corresponds to key (input/output name) and
// populate based on whether its a feature or a label // populate based on whether its a feature or a label
//Matrix<ElemType>& data = //Matrix<ElemType>& data = *matrices[iter->first]; // can be features or labels
*matrices[iter->first]; // can be features or labels
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real) if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
{ {
@ -1058,8 +1057,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ {
// dereference matrix that corresponds to key (input/output name) and // dereference matrix that corresponds to key (input/output name) and
// populate based on whether its a feature or a label // populate based on whether its a feature or a label
//Matrix<ElemType>& data = //Matrix<ElemType>& data =*matrices[iter->first]; // can be features or labels
*matrices[iter->first]; // can be features or labels
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real) if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
{ {
@ -1134,8 +1132,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ {
// dereference matrix that corresponds to key (input/output name) and // dereference matrix that corresponds to key (input/output name) and
// populate based on whether its a feature or a label // populate based on whether its a feature or a label
//Matrix<ElemType>& data = //Matrix<ElemType>& data = *matrices[iter->first]; // can be features or labels
*matrices[iter->first]; // can be features or labels
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real) if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
{ {

Просмотреть файл

@ -142,6 +142,15 @@ extern void _CHECKED_ASSERT_error(const char * file, int line, const char * exp)
#endif #endif
#endif #endif
/**
These macros are used for sentence segmentation information.
*/
#define SENTENCE_BEGIN 0
#define SENTENCE_MIDDLE 1
#define NO_LABELS -1
#define EXISTS_SENTENCE_BEGIN_OR_NO_LABELS 0
#define NO_EXISTS_SENTENCE_BEGIN_OR_NO_LABELS 1
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// basic data types // basic data types
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------

Просмотреть файл

@ -382,47 +382,58 @@ public:
// TODO: we can store labels more efficiently now since we don't do frame-wise random access anymore. // TODO: we can store labels more efficiently now since we don't do frame-wise random access anymore.
// OK, utterance has all we need --remember it // OK, utterance has all we need --remember it
utteranceset.push_back (std::move (utterance));
if (m==0) if (m==0)
{ {
_totalframes += uttframes;
framesaccum.push_back(uttframes); //track number of frames in each utterance - first feature is the reference
if (!labels.empty() && !lacksmlf) if (!labels.empty() && !lacksmlf)
//if (!labels.empty() && labelsiter != labels[0].end()) //if (!labels.empty() && labelsiter != labels[0].end())
{ {
foreach_index (j, labels) // first verify that all the label files have the proper duration
bool durationmatch = true;
foreach_index(j, labels)
{ {
const auto & labseq = labels[j].find(key)->second; const auto & labseq = labels[j].find(key)->second;
// check if durations match; skip if not // check if durations match; skip if not
size_t labframes = labseq.empty() ? 0 : (labseq[labseq.size()-1].firstframe + labseq[labseq.size()-1].numframes); size_t labframes = labseq.empty() ? 0 : (labseq[labseq.size() - 1].firstframe + labseq[labseq.size() - 1].numframes);
if (labframes != uttframes) if (labframes != uttframes)
{ {
fprintf (stderr, " [duration mismatch (%d in label vs. %d in feat file), skipping %S]", labframes, uttframes, key.c_str()); fprintf(stderr, " [duration mismatch (%d in label vs. %d in feat file), skipping %S]", labframes, uttframes, key.c_str());
nomlf++; nomlf++;
continue; // skip this utterance at all durationmatch = false;
break; // continue; // skip this utterance at all
} }
// expand classid sequence into flat array }
foreach_index (i, labseq) if (durationmatch){
utteranceset.push_back(std::move(utterance));
_totalframes += uttframes;
framesaccum.push_back(uttframes); //track number of frames in each utterance - first feature is the reference
// then parse each mlf if the durations are consistent
foreach_index(j, labels)
{ {
const auto & e = labseq[i]; const auto & labseq = labels[j].find(key)->second;
if ((i > 0 && labseq[i-1].firstframe + labseq[i-1].numframes != e.firstframe) || (i == 0 && e.firstframe != 0)) // expand classid sequence into flat array
throw std::runtime_error (msra::strfun::strprintf ("minibatchutterancesource: labels not in consecutive order MLF in label set: %S", key.c_str())); foreach_index(i, labseq)
if (e.classid >= udim[j]) {
throw std::runtime_error (msra::strfun::strprintf ("minibatchutterancesource: class id %d exceeds model output dimension %d in file %S", e.classid, udim, key.c_str())); const auto & e = labseq[i];
if (e.classid != (CLASSIDTYPE) e.classid) if ((i > 0 && labseq[i - 1].firstframe + labseq[i - 1].numframes != e.firstframe) || (i == 0 && e.firstframe != 0))
throw std::runtime_error ("CLASSIDTYPE has too few bits"); throw std::runtime_error(msra::strfun::strprintf("minibatchutterancesource: labels not in consecutive order MLF in label set: %S", key.c_str()));
for (size_t t = e.firstframe; t < e.firstframe + e.numframes; t++) if (e.classid >= udim[j])
classids[j]->push_back ((CLASSIDTYPE) e.classid); throw std::runtime_error(msra::strfun::strprintf("minibatchutterancesource: class id %d exceeds model output dimension %d in file %S", e.classid, udim, key.c_str()));
numclasses[j] = max (numclasses[j], 1u + e.classid); if (e.classid != (CLASSIDTYPE)e.classid)
counts[j].resize (numclasses[j], 0); throw std::runtime_error("CLASSIDTYPE has too few bits");
counts[j][e.classid] += e.numframes; for (size_t t = e.firstframe; t < e.firstframe + e.numframes; t++)
} classids[j]->push_back((CLASSIDTYPE)e.classid);
classids[j]->push_back ((CLASSIDTYPE) -1); // append a boundary marker marker for checking numclasses[j] = max(numclasses[j], 1u + e.classid);
counts[j].resize(numclasses[j], 0);
counts[j][e.classid] += e.numframes;
}
if (!labels[j].empty() && classids[j]->size() != _totalframes + utteranceset.size()) classids[j]->push_back((CLASSIDTYPE)-1); // append a boundary marker marker for checking
throw std::logic_error (msra::strfun::strprintf ("minibatchutterancesource: label duration inconsistent with feature file in MLF label set: %S", key.c_str()));
assert (labels[j].empty() || classids[j]->size() == _totalframes + utteranceset.size()); if (!labels[j].empty() && classids[j]->size() != _totalframes + utteranceset.size())
throw std::logic_error(msra::strfun::strprintf("minibatchutterancesource: label duration inconsistent with feature file in MLF label set: %S", key.c_str()));
assert(labels[j].empty() || classids[j]->size() == _totalframes + utteranceset.size());
}
} }
} }
else{ else{
@ -451,7 +462,7 @@ public:
} }
if (nomlf + nolat > 0) if (nomlf + nolat > 0)
{ {
fprintf (stderr, "minibatchutterancesource: out of %d files, %d files not found in label set and %d have no lattice\n", infiles.size(), nomlf, nolat); fprintf (stderr, "minibatchutterancesource: out of %d files, %d files not found in label set and %d have no lattice\n", infiles[0].size(), nomlf, nolat);
if (nomlf + nolat > infiles[m].size() / 2) if (nomlf + nolat > infiles[m].size() / 2)
throw std::runtime_error ("minibatchutterancesource: too many files not found in label set--assuming broken configuration\n"); throw std::runtime_error ("minibatchutterancesource: too many files not found in label set--assuming broken configuration\n");
} }

Просмотреть файл

@ -24,6 +24,7 @@
#define DATAREADER_EXPORTS // creating the exports here #define DATAREADER_EXPORTS // creating the exports here
#include "DataReader.h" #include "DataReader.h"
#include "HTKMLFReader.h" #include "HTKMLFReader.h"
#include "commandArgUtil.h"
#ifdef LEAKDETECT #ifdef LEAKDETECT
#include <vld.h> // for memory leak detection #include <vld.h> // for memory leak detection
#endif #endif

Просмотреть файл

@ -28,6 +28,7 @@
#include "DataWriter.h" #include "DataWriter.h"
#include "commandArgUtil.h" #include "commandArgUtil.h"
#include "HTKMLFWriter.h" #include "HTKMLFWriter.h"
#include "commandArgUtil.h"
#ifdef LEAKDETECT #ifdef LEAKDETECT
#include <vld.h> // for memory leak detection #include <vld.h> // for memory leak detection
#endif #endif

Просмотреть файл

@ -2048,6 +2048,10 @@ void BatchSequenceReader<ElemType>::GetLabelOutput(std::map<std::wstring,
{ {
RuntimeError("GetLabelOutput::should use CPU for labels "); RuntimeError("GetLabelOutput::should use CPU for labels ");
} }
if (curDevId != CPUDEVICE)
{
labels->TransferFromDeviceToDevice(CPUDEVICE, curDevId, true, false, false);
}
} }
template<class ElemType> template<class ElemType>

Просмотреть файл

@ -11,6 +11,11 @@
#include <stdexcept> #include <stdexcept>
#include <stdint.h> #include <stdint.h>
#if WIN32
#define ftell64 _ftelli64
#else
#define ftell64 ftell
#endif
// SetState for a particular value // SetState for a particular value
template <typename NumType, typename LabelType> template <typename NumType, typename LabelType>
@ -362,10 +367,10 @@ void UCIParser<NumType, LabelType>::ParseInit(LPCWSTR fileName, size_t startFeat
errno_t err = _wfopen_s( &m_pFile, fileName, L"rb" ); errno_t err = _wfopen_s( &m_pFile, fileName, L"rb" );
if (err) if (err)
std::runtime_error("UCIParser::ParseInit - error opening file"); throw std::runtime_error("UCIParser::ParseInit - error opening file");
int rc = _fseeki64(m_pFile, 0, SEEK_END); int rc = _fseeki64(m_pFile, 0, SEEK_END);
if (rc) if (rc)
std::runtime_error("UCIParser::ParseInit - error seeking in file"); throw std::runtime_error("UCIParser::ParseInit - error seeking in file");
m_fileSize = GetFilePosition(); m_fileSize = GetFilePosition();
m_fileBuffer = new BYTE[m_bufferSize]; m_fileBuffer = new BYTE[m_bufferSize];
@ -377,9 +382,9 @@ void UCIParser<NumType, LabelType>::ParseInit(LPCWSTR fileName, size_t startFeat
template <typename NumType, typename LabelType> template <typename NumType, typename LabelType>
int64_t UCIParser<NumType, LabelType>::GetFilePosition() int64_t UCIParser<NumType, LabelType>::GetFilePosition()
{ {
int64_t position = _ftelli64(m_pFile); int64_t position = ftell64(m_pFile);
if (position == -1L) if (position == -1L)
std::runtime_error("UCIParser::GetFilePosition - error retrieving file position in file"); throw std::runtime_error("UCIParser::GetFilePosition - error retrieving file position in file");
return position; return position;
} }
@ -392,7 +397,7 @@ void UCIParser<NumType, LabelType>::SetFilePosition(int64_t position)
{ {
int rc = _fseeki64(m_pFile, position, SEEK_SET); int rc = _fseeki64(m_pFile, position, SEEK_SET);
if (rc) if (rc)
std::runtime_error("UCIParser::SetFilePosition - error seeking in file"); throw std::runtime_error("UCIParser::SetFilePosition - error seeking in file");
// setup state machine to start at this position // setup state machine to start at this position
PrepareStartPosition(position); PrepareStartPosition(position);
@ -445,7 +450,7 @@ size_t UCIParser<NumType, LabelType>::UpdateBuffer()
size_t bytesToRead = min(m_bufferSize, m_fileSize-m_bufferStart)-saveBytes; size_t bytesToRead = min(m_bufferSize, m_fileSize-m_bufferStart)-saveBytes;
size_t bytesRead = fread(m_fileBuffer+saveBytes, 1, bytesToRead, m_pFile); size_t bytesRead = fread(m_fileBuffer+saveBytes, 1, bytesToRead, m_pFile);
if (bytesRead == 0 && ferror(m_pFile)) if (bytesRead == 0 && ferror(m_pFile))
std::runtime_error("UCIParser::UpdateBuffer - error reading file"); throw std::runtime_error("UCIParser::UpdateBuffer - error reading file");
return bytesRead; return bytesRead;
} }

Просмотреть файл

@ -90,8 +90,8 @@ private:
int m_elementsConvertedThisLine; int m_elementsConvertedThisLine;
// global stats // global stats
int m_totalNumbersConverted; int64_t m_totalNumbersConverted;
int m_totalLabelsConverted; int64_t m_totalLabelsConverted;
// file positions/buffer // file positions/buffer
FILE * m_pFile; FILE * m_pFile;

Просмотреть файл

@ -1,8 +1,9 @@
# command=Simple_Demo_Output RootDir=..
command=Simple_Demo:Simple_Demo_Output command=Simple_Demo:Simple_Demo_Output
# deviceId=-1 for CPU, >=0 for GPU devices # deviceId=-1 for CPU, >=0 for GPU devices
DeviceNumber=-1 DeviceNumber=-1
#stderr=Demo #stderr=Demo
precision=float precision=float
@ -13,7 +14,6 @@ deviceId=$DeviceNumber$
outputNodeNames=ScaledLogLikelihood outputNodeNames=ScaledLogLikelihood
traceLevel=1 traceLevel=1
####################################### #######################################
# TRAINING CONFIG (Simple, Fixed LR) # # TRAINING CONFIG (Simple, Fixed LR) #
####################################### #######################################
@ -52,22 +52,22 @@ Simple_Demo=[
reader=[ reader=[
# reader to use # reader to use
readerType=UCIFastReader readerType=UCIFastReader
file=../Demos/Simple/SimpleDataTrain.txt file=$RootDir$/Demos/Simple/SimpleDataTrain.txt
miniBatchMode=Partial miniBatchMode=Partial
randomize=Auto randomize=Auto
verbosity=1 verbosity=1
features=[ features=[
dim=2 # two-dimensional input data dim=2 # two-dimensional input data
start=0 # Start with first element on line start=0 # Start with first element on line
] ]
labels=[ labels=[
start=2 # Skip two elements start=2 # Skip two elements
dim=1 # One label dimension dim=1 # One label dimension
labelDim=2 # Two labels possible labelDim=2 # Two labels possible
labelMappingFile=../Demos/Simple/SimpleMapping.txt labelMappingFile=$RootDir$/Demos/Simple/SimpleMapping.txt
] ]
] ]
] ]
@ -84,16 +84,16 @@ Simple_Demo_Output=[
reader=[ reader=[
# reader to use # reader to use
readerType=UCIFastReader readerType=UCIFastReader
file=../Demos/Simple/SimpleDataTest.txt file=$RootDir$/Demos/Simple/SimpleDataTest.txt
features=[ features=[
dim=2 dim=2
start=0 start=0
] ]
labels=[ labels=[
start=2 start=2
dim=1 dim=1
labelDim=2 labelDim=2
labelMappingFile=../Demos/Simple/SimpleMapping.txt labelMappingFile=$RootDir$/Demos/Simple/SimpleMapping.txt
] ]
] ]
outputPath=SimpleOutput # Dump output as text outputPath=SimpleOutput # Dump output as text

Просмотреть файл

@ -550,41 +550,38 @@ public:
} }
ComputationNodePtr nodePtr = GetNodeFromName(nodeName); ComputationNodePtr nodePtr = GetNodeFromName(nodeName);
ComputationNodePtr childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3, childNodePtr4; std::vector<ComputationNodePtr> childrenNodes;
switch (numChildren) childrenNodes.resize(numChildren);
for (int j = 0; j < numChildren; j++)
childrenNodes[j] = GetNodeFromName(childrenNames[j]);
if (nodePtr->OperationName() == RowStackNode<ElemType>::TypeName()) //allow for variable input nodes
nodePtr->AttachInputs(childrenNodes);
else //fixed input nodes
{ {
case 1: switch (numChildren)
childNodePtr0 = GetNodeFromName(childrenNames[0]); {
nodePtr->AttachInputs(childNodePtr0); case 1:
break; nodePtr->AttachInputs(childrenNodes[0]);
case 2: break;
childNodePtr0 = GetNodeFromName(childrenNames[0]); case 2:
childNodePtr1 = GetNodeFromName(childrenNames[1]); nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1]);
nodePtr->AttachInputs(childNodePtr0, childNodePtr1); break;
break; case 3:
case 3: nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2]);
childNodePtr0 = GetNodeFromName(childrenNames[0]); break;
childNodePtr1 = GetNodeFromName(childrenNames[1]); case 4:
childNodePtr2 = GetNodeFromName(childrenNames[2]); nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2], childrenNodes[3]);
nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2); break;
break; case 5:
case 4: nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2], childrenNodes[3], childrenNodes[4]);
childNodePtr0 = GetNodeFromName(childrenNames[0]); break;
childNodePtr1 = GetNodeFromName(childrenNames[1]); case 6:
childNodePtr2 = GetNodeFromName(childrenNames[2]); nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2], childrenNodes[3], childrenNodes[4], childrenNodes[5]);
childNodePtr3 = GetNodeFromName(childrenNames[3]); break;
nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3); default:
break; throw std::logic_error("Invalid number of children.");
case 5: }
childNodePtr0 = GetNodeFromName(childrenNames[0]);
childNodePtr1 = GetNodeFromName(childrenNames[1]);
childNodePtr2 = GetNodeFromName(childrenNames[2]);
childNodePtr3 = GetNodeFromName(childrenNames[3]);
childNodePtr4 = GetNodeFromName(childrenNames[4]);
nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3, childNodePtr4);
break;
default:
throw std::logic_error("Invalid number of children.");
} }
} }
} }
@ -1028,6 +1025,8 @@ public:
newNode = new LookupTableNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName); newNode = new LookupTableNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
else if (nodeType == RowSliceNode<ElemType>::TypeName()) else if (nodeType == RowSliceNode<ElemType>::TypeName())
newNode = new RowSliceNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName); newNode = new RowSliceNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
else if (nodeType == RowStackNode<ElemType>::TypeName())
newNode = new RowStackNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
else if (nodeType == GMMLogLikelihoodNode<ElemType>::TypeName()) else if (nodeType == GMMLogLikelihoodNode<ElemType>::TypeName())
newNode = new GMMLogLikelihoodNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName); newNode = new GMMLogLikelihoodNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
else if (nodeType == SequenceDecoderNode<ElemType>::TypeName()) else if (nodeType == SequenceDecoderNode<ElemType>::TypeName())
@ -1209,6 +1208,8 @@ public:
newNode = new CosDistanceWithNegativeSamplesNode<ElemType>(m_deviceId, nodeName); newNode = new CosDistanceWithNegativeSamplesNode<ElemType>(m_deviceId, nodeName);
else if (nodeType == ParallelNode<ElemType>::TypeName()) else if (nodeType == ParallelNode<ElemType>::TypeName())
newNode = new ParallelNode<ElemType>(m_deviceId, nodeName); newNode = new ParallelNode<ElemType>(m_deviceId, nodeName);
else if (nodeType == RowStackNode<ElemType>::TypeName())
newNode = new RowStackNode<ElemType>(m_deviceId, nodeName);
else else
{ {
fprintf(stderr, "Error creating new ComputationNode of type %ls, with name %ls\n", nodeType.c_str(), nodeName.c_str()); fprintf(stderr, "Error creating new ComputationNode of type %ls, with name %ls\n", nodeType.c_str(), nodeName.c_str());
@ -1582,6 +1583,15 @@ public:
return newNode; return newNode;
} }
ComputationNodePtr RowStack(const std::vector<ComputationNodePtr> inputs, const std::wstring nodeName = L"")
{
ComputationNodePtr newNode(new RowStackNode<ElemType>(m_deviceId, nodeName));
newNode->AttachInputs(inputs);
AddNodeToNet(newNode);
return newNode;
}
ComputationNodePtr GMMLogLikelihood(const ComputationNodePtr unnormedPrior, const ComputationNodePtr mean, const ComputationNodePtr logStddev, const ComputationNodePtr feature, const std::wstring nodeName = L"") ComputationNodePtr GMMLogLikelihood(const ComputationNodePtr unnormedPrior, const ComputationNodePtr mean, const ComputationNodePtr logStddev, const ComputationNodePtr feature, const std::wstring nodeName = L"")
{ {
ComputationNodePtr newNode(new GMMLogLikelihoodNode<ElemType>(m_deviceId, nodeName)); ComputationNodePtr newNode(new GMMLogLikelihoodNode<ElemType>(m_deviceId, nodeName));

Просмотреть файл

@ -158,6 +158,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
throw std::logic_error("This operation does not support six inputs."); throw std::logic_error("This operation does not support six inputs.");
} }
virtual void AttachInputs(const std::vector<ComputationNodePtr>& /*inputs*/)
{
throw std::logic_error("This operation does not support variable-length inputs.");
}
virtual void DetachInputs() virtual void DetachInputs()
{ {
m_children.resize(0); m_children.resize(0);

Просмотреть файл

@ -399,6 +399,167 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template class RowSliceNode<float>; template class RowSliceNode<float>;
template class RowSliceNode<double>; template class RowSliceNode<double>;
//this node is used to extract part of the input by rows as the output
//it has to be continuous segments of rows since each column is treated as one sample
template<class ElemType>
class RowStackNode : public ComputationNode<ElemType>
{
UsingComputationNodeMembers;
public:
RowStackNode(const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId)
{
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
m_deviceId = deviceId;
MoveMatricesToDevice(deviceId);
InitRecurrentNode();
}
RowStackNode(File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId)
{
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
LoadFromFile(fstream, modelVersion, deviceId);
}
// copy constructor
RowStackNode(const RowStackNode<ElemType>* node, const std::wstring& newName, const CopyNodeFlags flags) : ComputationNode<ElemType>(node->m_deviceId)
{
node->CopyTo(this, newName, flags);
}
virtual ComputationNodePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) const
{
const std::wstring& name = (newName == L"") ? NodeName() : newName;
ComputationNodePtr node = new RowStackNode<ElemType>(this, name, flags);
return node;
}
virtual void CopyTo(const ComputationNodePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const
{
ComputationNode<ElemType>::CopyTo(nodeP, newName, flags);
RowStackNode<ElemType>* node = (RowStackNode<ElemType>*) nodeP;
if (flags & CopyNodeFlags::copyNodeChildren)
{
node->m_children = m_children;
node->m_startRowIndeces = m_startRowIndeces;
node->m_inputMatrices = m_inputMatrices;
}
}
virtual const std::wstring OperationName() const { return TypeName(); }
static const std::wstring TypeName() { return L"RowStack"; }
virtual void ComputeInputPartial(const size_t inputIndex)
{
if (inputIndex >= ChildrenSize())
throw std::invalid_argument("RowStack-ComputeInputPartial: inputIndex out of range.");
ComputeInputPartialS(Inputs(inputIndex)->GradientValues(), GradientValues(), m_startRowIndeces[inputIndex], m_startRowIndeces[inputIndex + 1] - m_startRowIndeces[inputIndex]);
}
virtual void ComputeInputPartial(const size_t inputIndex, const size_t timeIdxInSeq)
{
if (inputIndex >= ChildrenSize())
throw std::invalid_argument("RowStack-ComputeInputPartial: inputIndex out of range.");
Matrix<ElemType> sliceInputGrad = Inputs(inputIndex)->GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
Matrix<ElemType> sliceOutputGrad = GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startRowIndeces[inputIndex], m_startRowIndeces[inputIndex+1] - m_startRowIndeces[inputIndex]);
}
static void WINAPI ComputeInputPartialS(Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const size_t startIndex, const size_t numRows)
{
inputGradientValues.AddWithRowSliceValuesOf(gradientValues, startIndex, numRows);
}
virtual void EvaluateThisNode()
{
EvaluateThisNodeS(m_functionValues, m_inputMatrices, 0, Inputs(0)->FunctionValues().GetNumCols());
}
virtual void EvaluateThisNode(const size_t timeIdxInSeq)
{
Matrix<ElemType> sliceFunctionValues = FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
EvaluateThisNodeS(sliceFunctionValues, m_inputMatrices, timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
}
static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const std::vector<const Matrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
{
functionValues.AssignRowStackValuesOf(inputMatrices, sliceStartCol, sliceNumCols);
#if NANCHECK
functionValues.HasNan("RowStack");
#endif
}
virtual void Validate()
{
PrintSelfBeforeValidation();
unsigned int numInputs = ChildrenSize();
if (numInputs < 2)
LogicError("RowStack operation: must have two or more inputs.");
if (Inputs(0) == nullptr)
LogicError("RowStack operation: the input node is NULL.");
size_t numCols = Inputs(0)->FunctionValues().GetNumCols();
m_startRowIndeces.resize(ChildrenSize()+1);
m_inputMatrices.resize(ChildrenSize());
size_t totalRows = 0;
m_startRowIndeces[0] = 0;
for (int i = 0; i < ChildrenSize(); i++)
{
if (Inputs(i) == nullptr)
LogicError("RowStack operation: the input node is NULL.");
Matrix<ElemType>& childMatrix = Inputs(i)->FunctionValues();
size_t numRows = childMatrix.GetNumRows();
if (numRows == 0)
LogicError("RowStack operation: the input node %ls has 0 rows.", Inputs(i)->NodeName().c_str());
if (childMatrix.GetNumCols() != numCols)
LogicError("RowStack operation: the input node %ls has different number of columns.", Inputs(i)->NodeName().c_str());
totalRows += numRows;
m_inputMatrices[i] = &childMatrix;
m_startRowIndeces[i + 1] = m_startRowIndeces[i] + numRows;
}
FunctionValues().Resize(totalRows, numCols);
CopyImageSizeFromInputs();
}
virtual void CopyImageSizeFromInputs()
{
CopyImageSizeFromInput(0, true);
m_outputHeight = FunctionValues().GetNumRows();
//WARNING: this node will destroy the image size information from the child
if (m_inputWidth * m_inputChannels != 1)
fprintf(stderr, "WARNING: RowStack operation cannot inherit image size information from its child. Image size info is lost.\n");
}
virtual void AttachInputs(const std::vector<ComputationNodePtr>& inputs)
{
unsigned int numInputs = inputs.size();
m_children.resize(numInputs);
for (unsigned int i = 0; i < numInputs; i++)
m_children[i] = inputs[i];
}
private:
std::vector<size_t> m_startRowIndeces; //start row number in the stacked matrix of each input (child)
std::vector<const Matrix<ElemType>*> m_inputMatrices;
};
template class RowStackNode<float>;
template class RowStackNode<double>;
template<class ElemType> template<class ElemType>
class ScaleNode : public ComputationNode<ElemType> class ScaleNode : public ComputationNode<ElemType>
{ {

Просмотреть файл

@ -222,6 +222,8 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
ret = true; ret = true;
else if (EqualInsensitive(nodeType, RowSliceNode<ElemType>::TypeName())) else if (EqualInsensitive(nodeType, RowSliceNode<ElemType>::TypeName()))
ret = true; ret = true;
else if (EqualInsensitive(nodeType, RowStackNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, LookupTableNode<ElemType>::TypeName())) else if (EqualInsensitive(nodeType, LookupTableNode<ElemType>::TypeName()))
ret = true; ret = true;
else if (EqualInsensitive(nodeType, GMMLogLikelihoodNode<ElemType>::TypeName(), L"GMMLL")) else if (EqualInsensitive(nodeType, GMMLogLikelihoodNode<ElemType>::TypeName(), L"GMMLL"))

Просмотреть файл

@ -218,10 +218,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ {
if (colBegin(i,0) == SENTENCE_MIDDLE) if (colBegin(i,0) == SENTENCE_MIDDLE)
{ {
Matrix<ElemType> to1 = inputGradientValues.ColumnSlice((timeIdxInSeq - delay)*mNbr + i, 1); Matrix<ElemType> frm = gradientValues.ColumnSlice(timeIdxInSeq * mNbr + i, 1);
Matrix<ElemType> frm1= gradientValues.ColumnSlice(timeIdxInSeq * mNbr + i, 1); Matrix<ElemType> to = inputGradientValues.ColumnSlice((timeIdxInSeq - delay)*mNbr + i, 1);
to1 += frm1; to += frm;
} }
} }

Просмотреть файл

@ -1810,8 +1810,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]); w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
double val = w->FunctionValues()(0, 0);
/// the label is a dense matrix. each element is the word index /// the label is a dense matrix. each element is the word index
label = m_net->CreateInputNode(L"labels", 2 * (this->nce_noises + 1), mbSize); label = m_net->CreateInputNode(L"labels", 2 * (this->nce_noises + 1), mbSize);

Просмотреть файл

@ -391,29 +391,43 @@ public:
{ {
std::vector<void*> inputs = EvaluateParameters(node, baseName, nodeParamStart, nodeParamCount, pass); std::vector<void*> inputs = EvaluateParameters(node, baseName, nodeParamStart, nodeParamCount, pass);
switch (inputs.size()) if (cnNodeType == RowStackNode<ElemType>::TypeName()) //support variable length inputs
{ {
case 1: std::vector<ComputationNodePtr> inputNodes;
nodePtr->AttachInputs(ComputationNodePtr(inputs[0])); inputNodes.resize(inputs.size());
break; for (int i = 0; i < inputs.size(); i++)
case 2: inputNodes[i] = ComputationNodePtr(inputs[i]);
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]));
break;
case 3:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]));
break;
case 4:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]));
break;
case 5:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]), ComputationNodePtr(inputs[4]));
break;
default:
if (nodeParamCount > 0)
RuntimeError("Invalid number of parameters name = '%s' call = '%s'\n", node->GetName().c_str(), node->GetValue().c_str());
break;
}
nodePtr->AttachInputs(inputNodes);
}
else
{
switch (inputs.size())
{
case 1:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]));
break;
case 2:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]));
break;
case 3:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]));
break;
case 4:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]));
break;
case 5:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]), ComputationNodePtr(inputs[4]));
break;
case 6:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]), ComputationNodePtr(inputs[4]), ComputationNodePtr(inputs[5]));
break;
default:
if (nodeParamCount > 0)
RuntimeError("Invalid number of parameters name = '%s' call = '%s'\n", node->GetName().c_str(), node->GetValue().c_str());
break;
}
}
// process common optional parameters (like "tag"); // process common optional parameters (like "tag");
ProcessOptionalParameters(node); ProcessOptionalParameters(node);
break; break;

Просмотреть файл

@ -32,11 +32,11 @@ DEVICE = gpu
BUILDTYPE = debug BUILDTYPE = debug
#BUILDTYPE = release #BUILDTYPE = release
# comment following and uncomment the next one to enable MKL library # comment following and uncomment the next one to enable MKL library
#MATHLIB = acml MATHLIB = acml
MATHLIB = mkl #MATHLIB = mkl
# modify relevant path below for your system # modify relevant path below for your system
MKL_PATH = /usr/users/chiaying/intel/composer_xe_2013.2.146 MKL_PATH = /usr/users/chiaying/intel/composer_xe_2013.2.146
ACML_PATH = /usr/local/acml5.3.0/gfortran64 ACML_PATH = /usr/local/acml5.3.1/ifort64
####### #######
BUILDFOR = $(ARCH).$(DEVICE).$(BUILDTYPE).$(MATHLIB) BUILDFOR = $(ARCH).$(DEVICE).$(BUILDTYPE).$(MATHLIB)
@ -48,8 +48,8 @@ ifeq ($(BUILDTYPE),debug)
BUILDTYPE_OPT = -g BUILDTYPE_OPT = -g
GPU_BUILDTYPE_OPT = -G GPU_BUILDTYPE_OPT = -G
else else
BUILDTYPE_OPT = -O4 BUILDTYPE_OPT = -O3 -flto
GPU_BUILDTYPE_OPT = GPU_BUILDTYPE_OPT = -O3
endif endif
ifeq ($(MATHLIB),mkl) ifeq ($(MATHLIB),mkl)
@ -142,7 +142,7 @@ $(OBJDIR)/%.o : %.cu Makefile
@echo $(SEPARATOR) @echo $(SEPARATOR)
@echo creating $@ for $(ARCH) with build type $(BUILDTYPE) @echo creating $@ for $(ARCH) with build type $(BUILDTYPE)
@mkdir -p $(dir $@) @mkdir -p $(dir $@)
$(NVCC) -c $< -o $@ $(BUILDTYPE_OPT) $(GPU_BUILDTYPE_OPT) $(NVCCFLAGS) $(INCFLAGS) -Xcompiler -fPIC $(NVCC) -c $< -o $@ $(GPU_BUILDTYPE_OPT) $(NVCCFLAGS) $(INCFLAGS) -Xcompiler -fPIC
$(OBJDIR)/%.o : %.cpp Makefile $(OBJDIR)/%.o : %.cpp Makefile
@echo $(SEPARATOR) @echo $(SEPARATOR)

Просмотреть файл

@ -31,8 +31,8 @@ DEVICE = cpu
#BUILDTYPE = debug #BUILDTYPE = debug
BUILDTYPE = release BUILDTYPE = release
# comment following and uncomment the next one to enable MKL library # comment following and uncomment the next one to enable MKL library
#MATHLIB = acml MATHLIB = acml
MATHLIB = mkl #MATHLIB = mkl
# modify relevant path below for your system # modify relevant path below for your system
MKL_PATH = /usr/users/chiaying/intel/composer_xe_2013.2.146 MKL_PATH = /usr/users/chiaying/intel/composer_xe_2013.2.146
ACML_PATH = /usr/users/yzhang87/code/acml/gfortran64 ACML_PATH = /usr/users/yzhang87/code/acml/gfortran64

Просмотреть файл

@ -563,7 +563,7 @@ namespace CNTKMathTest
Assert::IsTrue(C.IsEqualTo(D1, 0.0001)); Assert::IsTrue(C.IsEqualTo(D1, 0.0001));
} }
TEST_METHOD(CPUMatrixRowSlice) TEST_METHOD(CPUMatrixRowSliceAndStack)
{ {
Matrix M0(5,3); Matrix M0(5,3);
M0(0,0) = 1; M0(0,1) = 6; M0(0,2) = 11; M0(0,0) = 1; M0(0,1) = 6; M0(0,2) = 11;
@ -590,6 +590,26 @@ namespace CNTKMathTest
M3 += M0; M3 += M0;
M0.AddToRowSliceValuesOf(M1, 2,2); M0.AddToRowSliceValuesOf(M1, 2,2);
Assert::IsTrue(M3.IsEqualTo(M0, 0.0001)); Assert::IsTrue(M3.IsEqualTo(M0, 0.0001));
M2.AddWithRowSliceValuesOf(M1, 0, 2);
Matrix M4(2, 3);
M4(0, 0) = 6; M4(0, 1) = 16; M4(0, 2) = 26;
M4(1, 0) = 8; M4(1, 1) = 18; M4(1, 2) = 28;
Assert::IsTrue(M2.IsEqualTo(M4, 0.0001));
Matrix M5, M6, M7, M8;
M5.AssignRowSliceValuesOf(M0, 0, 2);
M6.AssignRowSliceValuesOf(M0, 2, 1);
M7.AssignRowSliceValuesOf(M0, 3, 2);
std::vector<const Matrix*> inputMatrices;
inputMatrices.resize(3);
inputMatrices[0] = &M5;
inputMatrices[1] = &M6;
inputMatrices[2] = &M7;
M8.AssignRowStackValuesOf(inputMatrices, 0, 3);
Assert::IsTrue(M8.IsEqualTo(M0, 0.0001));
} }
TEST_METHOD(CPUAssignRepeatOf) TEST_METHOD(CPUAssignRepeatOf)

Просмотреть файл

@ -278,7 +278,7 @@ namespace CNTKMathTest
Assert::IsTrue(M2.IsEqualTo(M3, 0.0001f)); Assert::IsTrue(M2.IsEqualTo(M3, 0.0001f));
} }
TEST_METHOD(GPUMatrixRowSlice) TEST_METHOD(GPUMatrixRowSliceAndStack)
{ {
float *fArray = new float[15]; float *fArray = new float[15];
fArray[0] = 1; fArray[5] = 6; fArray[10] = 11; fArray[0] = 1; fArray[5] = 6; fArray[10] = 11;
@ -308,6 +308,27 @@ namespace CNTKMathTest
M3 += M0; M3 += M0;
M0.AddToRowSliceValuesOf(M1, 2,2); M0.AddToRowSliceValuesOf(M1, 2,2);
Assert::IsTrue(M3.IsEqualTo(M0, 0.0001)); Assert::IsTrue(M3.IsEqualTo(M0, 0.0001));
M2.AddWithRowSliceValuesOf(M1, 0, 2);
float *fArray4 = new float[6];
fArray4[0] = 6; fArray4[2] = 16; fArray4[4] = 26;
fArray4[1] = 8; fArray4[3] = 18; fArray4[5] = 28;
GPUMatrix<float> M4(2, 3, fArray4, matrixFlagNormal);
Assert::IsTrue(M2.IsEqualTo(M4, 0.0001));
GPUMatrix<float> M5, M6, M7, M8;
M5.AssignRowSliceValuesOf(M0, 0, 2);
M6.AssignRowSliceValuesOf(M0, 2, 1);
M7.AssignRowSliceValuesOf(M0, 3, 2);
std::vector<const GPUMatrix<float> *> inputMatrices;
inputMatrices.resize(3);
inputMatrices[0] = &M5;
inputMatrices[1] = &M6;
inputMatrices[2] = &M7;
M8.AssignRowStackValuesOf(inputMatrices, 0, 3);
Assert::IsTrue(M8.IsEqualTo(M0, 0.0001));
} }
TEST_METHOD(GPUKhatriRaoProduct) TEST_METHOD(GPUKhatriRaoProduct)

Просмотреть файл

@ -429,6 +429,48 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return *this; return *this;
} }
//stack the columns in inputMatrices (starting from sliceStartCol for sliceNumCols columns) and assign it to [this] object.
template<class ElemType>
CPUMatrix<ElemType>& CPUMatrix<ElemType>::AssignRowStackValuesOf(const std::vector<const CPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
{
if (sliceNumCols == 0)
LogicError("AssignRowStackValuesOf: sliceNumCols should > 0.");
size_t totalRows = 0;
size_t* startRowIndeces = new size_t[inputMatrices.size()];
startRowIndeces[0] = 0;
for (int i = 0; i < inputMatrices.size(); i++)
{
const CPUMatrix<ElemType>& a = *inputMatrices[i];
if (a.IsEmpty())
LogicError("AssignRowStackValuesOf: input matrix (%d) is empty.", i);
if (a.GetNumCols() < sliceStartCol + sliceNumCols)
LogicError("AssignRowStackValuesOf: input matrix (%d) GetNumCols() < sliceStartCol + sliceNumCols.", i);
totalRows += a.GetNumRows();
if (i<inputMatrices.size()-1)
startRowIndeces[i + 1] = startRowIndeces[i] + a.GetNumRows();
}
Resize(totalRows, sliceNumCols);
auto& us = *this;
#pragma omp parallel for
for (long j = 0; j<sliceNumCols; j++)
{
for (int i = 0; i < inputMatrices.size(); i++)
{
memcpy(&us(startRowIndeces[i], j), &(*inputMatrices[i])(0, sliceStartCol+j), inputMatrices[i]->GetNumRows() * sizeof(ElemType));
}
}
delete [] startRowIndeces;
return *this;
}
template<class ElemType> template<class ElemType>
void CPUMatrix<ElemType>::MinusOneAt(CPUMatrix<ElemType>& c, const size_t position) void CPUMatrix<ElemType>::MinusOneAt(CPUMatrix<ElemType>& c, const size_t position)
{ {
@ -672,16 +714,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// if it's externally managed, then populate the structure // if it's externally managed, then populate the structure
if (matrixFlags&matrixFlagDontOwnBuffer) if (matrixFlags&matrixFlagDontOwnBuffer)
{ {
// free previous array allocation if any before overwriting
if (m_pArray != nullptr) if (m_pArray != nullptr)
delete [] m_pArray; delete [] m_pArray;
m_pArray = pArray; m_pArray = pArray;
m_numRows = numRows; m_numRows = numRows;
m_numCols = numCols; m_numCols = numCols;
// free previous array allocation if any before overwriting
if (m_pArray != nullptr)
delete[] m_pArray;
m_pArray = pArray;
m_elemSizeAllocated = GetNumElements(); m_elemSizeAllocated = GetNumElements();
m_externalBuffer = true; m_externalBuffer = true;
} }
@ -3877,7 +3916,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType> template<class ElemType>
void CPUMatrix<ElemType>::AssignNoiseContrastiveEstimation(const CPUMatrix<ElemType>& a, void CPUMatrix<ElemType>::AssignNoiseContrastiveEstimation(const CPUMatrix<ElemType>& a,
const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias, size_t sampleCount, CPUMatrix<ElemType>& tmp, CPUMatrix<ElemType>& c) const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias, CPUMatrix<ElemType>& tmp, CPUMatrix<ElemType>& c)
//this: samples+probs //this: samples+probs
// a: hidden // a: hidden
// b: embedding // b: embedding
@ -3892,7 +3931,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
std::cerr << endl; std::cerr << endl;
} }
*/ */
sampleCount *= 1;
double log_likelihood = 0.0; double log_likelihood = 0.0;
size_t sample_size = this->GetNumRows() / 2; size_t sample_size = this->GetNumRows() / 2;
size_t batch_size = this->GetNumCols(); size_t batch_size = this->GetNumCols();

Просмотреть файл

@ -216,7 +216,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
CPUMatrix<ElemType>& AssignVectorNorm2Of(CPUMatrix<ElemType>& a, const bool isColWise); CPUMatrix<ElemType>& AssignVectorNorm2Of(CPUMatrix<ElemType>& a, const bool isColWise);
void AssignNoiseContrastiveEstimation(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias, void AssignNoiseContrastiveEstimation(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias,
size_t sampleCount, CPUMatrix<ElemType>& tmp, CPUMatrix<ElemType>& c); CPUMatrix<ElemType>& tmp, CPUMatrix<ElemType>& c);
void AssignNCEUnnormalizedEval(const CPUMatrix<ElemType>& a, void AssignNCEUnnormalizedEval(const CPUMatrix<ElemType>& a,
const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias, CPUMatrix<ElemType>& c); const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias, CPUMatrix<ElemType>& c);
@ -244,6 +244,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
CPUMatrix<ElemType>& AssignRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows); CPUMatrix<ElemType>& AssignRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
CPUMatrix<ElemType>& AddToRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows); CPUMatrix<ElemType>& AddToRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
CPUMatrix<ElemType>& AddWithRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows); CPUMatrix<ElemType>& AddWithRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
CPUMatrix<ElemType>& AssignRowStackValuesOf(const std::vector<const CPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
CPUMatrix<ElemType>& AssignToRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows); CPUMatrix<ElemType>& AssignToRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);

Просмотреть файл

@ -678,6 +678,63 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return *this; return *this;
} }
//stack the columns in inputMatrices (starting from sliceStartCol for sliceNumCols columns) and assign it to [this] object.
template<class ElemType>
GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRowStackValuesOf(const std::vector<const GPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
{
if (sliceNumCols == 0)
LogicError("AssignRowStackValuesOf: sliceNumCols should > 0.");
size_t totalRows = 0;
size_t* startRowIndeces = new size_t[inputMatrices.size()+1];
ElemType ** bufferPointersInInputMatrices = new ElemType*[inputMatrices.size()];
startRowIndeces[0] = 0;
for (int i = 0; i < inputMatrices.size(); i++)
{
const GPUMatrix<ElemType>& a = *inputMatrices[i];
if (a.IsEmpty())
LogicError("AssignRowStackValuesOf: input matrix (%d) is empty.", i);
if (a.GetNumCols() < sliceStartCol + sliceNumCols)
LogicError("AssignRowStackValuesOf: input matrix (%d) GetNumCols() < sliceStartCol + sliceNumCols.", i);
totalRows += a.GetNumRows();
startRowIndeces[i + 1] = startRowIndeces[i] + a.GetNumRows();
bufferPointersInInputMatrices[i] = a.m_pArray + a.LocateColumn(sliceStartCol);
}
Resize(totalRows, sliceNumCols);
PrepareDevice();
ElemType** bufferPointersInGPU = NULL;
CUDA_CALL(cudaMalloc((void***)&bufferPointersInGPU, inputMatrices.size()*sizeof(ElemType*)));
CUDA_CALL(cudaMemcpy(bufferPointersInGPU, bufferPointersInInputMatrices, inputMatrices.size()*sizeof(ElemType*), cudaMemcpyHostToDevice));
delete[] bufferPointersInInputMatrices;
size_t* startRowIndecesInGPU = NULL;
CUDA_CALL(cudaMalloc((void**)&startRowIndecesInGPU, (1+inputMatrices.size())*sizeof(size_t)));
CUDA_CALL(cudaMemcpy(startRowIndecesInGPU, startRowIndeces, (1+inputMatrices.size())*sizeof(size_t), cudaMemcpyHostToDevice));
delete[] startRowIndeces;
LONG64 N = (LONG64)GetNumElements();
int blocksPerGrid = (int)ceil(1.0*N / threadsPerBlock);
cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
_assignRowStackValuesOf<ElemType> << <blocksPerGrid, threadsPerBlock, 0, t_stream >> >(m_pArray, bufferPointersInGPU, startRowIndecesInGPU, (long) inputMatrices.size(), N, (long)GetNumRows(), (long)GetNumCols());
if (do_sync) CUDA_CALL(cudaEventRecord(done));
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
CUDA_CALL(cudaFree(bufferPointersInGPU));
CUDA_CALL(cudaFree(startRowIndecesInGPU));
return *this;
}
/// c = c - 1.0 for a specific position /// c = c - 1.0 for a specific position
template<class ElemType> template<class ElemType>
void GPUMatrix<ElemType>::MinusOneAt(GPUMatrix<ElemType>& c, const size_t position) void GPUMatrix<ElemType>::MinusOneAt(GPUMatrix<ElemType>& c, const size_t position)

Просмотреть файл

@ -274,6 +274,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
GPUMatrix<ElemType>& AssignRowSliceValuesOf(const GPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows); GPUMatrix<ElemType>& AssignRowSliceValuesOf(const GPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
GPUMatrix<ElemType>& AddToRowSliceValuesOf(const GPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows); GPUMatrix<ElemType>& AddToRowSliceValuesOf(const GPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
GPUMatrix<ElemType>& AddWithRowSliceValuesOf(const GPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows); GPUMatrix<ElemType>& AddWithRowSliceValuesOf(const GPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
GPUMatrix<ElemType>& AssignRowStackValuesOf(const std::vector<const GPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
GPUMatrix<ElemType>& AssignRepeatOf(const GPUMatrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats); GPUMatrix<ElemType>& AssignRepeatOf(const GPUMatrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats);
GPUMatrix<ElemType>& AssignPositiveAndShiftedNegSample(const GPUMatrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber); GPUMatrix<ElemType>& AssignPositiveAndShiftedNegSample(const GPUMatrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber);

Просмотреть файл

@ -377,6 +377,27 @@ __global__ void _addWithRowSliceValuesOf(ElemType * dest, ElemType * src, const
dest[id] += src[IDX2C(row + startIndex, col, srcRows)]; dest[id] += src[IDX2C(row + startIndex, col, srcRows)];
} }
template<class ElemType>
__global__ void _assignRowStackValuesOf(ElemType * dest, ElemType ** srces, size_t* startRowIndeces, const LONG64 numSrces, const LONG64 N, const long destRows, const long destCols)
{
LONG64 id = blockDim.x * blockIdx.x + threadIdx.x;
if (id >= N)
return;
long col = id / destRows; //dest is the full matrix, rowslice is taken from the src
long row = id - (col * destRows);
//can we replace the for loop with something better?
int srcId = 0;
for (; srcId < numSrces; srcId++)
{
if (startRowIndeces[srcId + 1]>row)
break;
}
dest[id] = srces[srcId][IDX2C(row - startRowIndeces[srcId], col, startRowIndeces[srcId+1] - startRowIndeces[srcId])];
}
template<class ElemType> template<class ElemType>
__global__ void _assignRepeatOf(ElemType * dest, ElemType * src, const LONG64 N, const long srcRows, const long srcCols, const long destRows) __global__ void _assignRepeatOf(ElemType * dest, ElemType * src, const LONG64 N, const long srcRows, const long srcCols, const long destRows)
{ {

Просмотреть файл

@ -79,16 +79,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t MajorIndexCount() const size_t MajorIndexCount() const
{ {
return MajorIndexCount(m_numRows, m_numCols, m_elemSizeAllocated, m_format); return MajorIndexCount(m_numRows, m_numCols, m_nz, m_format);
} }
size_t MajorIndexCount(const size_t numRows, const size_t numCols, const size_t numNZReserved, const MatrixFormat format) const size_t MajorIndexCount(const size_t numRows, const size_t numCols, const size_t numNZ, const MatrixFormat format) const
{ {
if (format == matrixFormatSparseBlockCol) if (format == matrixFormatSparseBlockCol)
return numCols; return numCols;
else if (format == matrixFormatSparseBlockRow) else if (format == matrixFormatSparseBlockRow)
return numRows; return numRows;
else else
return numNZReserved; return numNZ;
} }
size_t MajorIndexSize() const // actual number of major index bytes in use size_t MajorIndexSize() const // actual number of major index bytes in use
{ {

Просмотреть файл

@ -1520,6 +1520,68 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return *this; return *this;
} }
//stack the columns in inputMatrices (starting from sliceStartCol for sliceNumCols columns) and assign it to [this] object.
template<class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignRowStackValuesOf(const std::vector<const Matrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
{
for (int i = 0; i < inputMatrices.size(); i++)
{
const Matrix<ElemType>& a = *inputMatrices[i];
DecideAndMoveToRightDevice(*this, a);
//WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
}
CurrentDataLocation curLocation = GetCurrentMatrixLocation();
if (curLocation == CurrentDataLocation::GPU || curLocation == CurrentDataLocation::BOTH)
{
if (GetMatrixType() != MatrixType::SPARSE)
{
//GPUDense;
std::vector<const GPUMatrix<ElemType>*> gpuInputMatrices;
gpuInputMatrices.resize(inputMatrices.size());
for (int i = 0; i < inputMatrices.size(); i++)
gpuInputMatrices[i] = inputMatrices[i]->m_GPUMatrix;
m_GPUMatrix->AssignRowStackValuesOf(gpuInputMatrices, sliceStartCol, sliceNumCols);
SetDataLocation(CurrentDataLocation::GPU, MatrixType::DENSE);
}
else
{
NOT_IMPLEMENTED;
}
}
else if (curLocation == CurrentDataLocation::CPU)
{
if (GetMatrixType() != MatrixType::SPARSE)
{
//CPUDense;
std::vector<const CPUMatrix<ElemType>*> cpuInputMatrices;
cpuInputMatrices.resize(inputMatrices.size());
for (int i = 0; i < inputMatrices.size(); i++)
cpuInputMatrices[i] = inputMatrices[i]->m_CPUMatrix;
m_CPUMatrix->AssignRowStackValuesOf(cpuInputMatrices, sliceStartCol, sliceNumCols);
SetDataLocation(CurrentDataLocation::CPU, MatrixType::DENSE);
}
else
{
NOT_IMPLEMENTED;
}
}
else
{
throw std::runtime_error("Matrices do not exist in either CPU or GPU.");
}
return *this;
}
template<class ElemType> template<class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignRepeatOf(const Matrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats) Matrix<ElemType>& Matrix<ElemType>::AssignRepeatOf(const Matrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats)
{ {
@ -3600,7 +3662,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ {
size_t sampleCount = a.m_CPUMatrix->GetNumElements() / a.m_CPUMatrix->GetNumRows(); size_t sampleCount = a.m_CPUMatrix->GetNumElements() / a.m_CPUMatrix->GetNumRows();
tmp.Resize(a.GetNumRows() / 2, sampleCount); tmp.Resize(a.GetNumRows() / 2, sampleCount);
a.m_CPUMatrix->AssignNoiseContrastiveEstimation(*b.m_CPUMatrix, *c.m_CPUMatrix, *bias.m_CPUMatrix, sampleCount, *tmp.m_CPUMatrix, *this->m_CPUMatrix); a.m_CPUMatrix->AssignNoiseContrastiveEstimation(*b.m_CPUMatrix, *c.m_CPUMatrix, *bias.m_CPUMatrix, *tmp.m_CPUMatrix, *this->m_CPUMatrix);
} }
else else
{ {

Просмотреть файл

@ -259,6 +259,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType>& AssignRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows); Matrix<ElemType>& AssignRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
Matrix<ElemType>& AddToRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows); Matrix<ElemType>& AddToRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
Matrix<ElemType>& AddWithRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows); Matrix<ElemType>& AddWithRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
Matrix<ElemType>& AssignRowStackValuesOf(const std::vector<const Matrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
Matrix<ElemType>& AssignRepeatOf(const Matrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats); Matrix<ElemType>& AssignRepeatOf(const Matrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats);
Matrix<ElemType>& AssignPositiveAndShiftedNegSample(const Matrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber); Matrix<ElemType>& AssignPositiveAndShiftedNegSample(const Matrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber);

Просмотреть файл

@ -479,6 +479,7 @@ namespace Microsoft {
//for each column of a, we add all rows of a to this starting from startIndex //for each column of a, we add all rows of a to this starting from startIndex
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AddToRowSliceValuesOf(const GPUMatrix<ElemType>& /*a*/, const size_t startIndex, const size_t numRows) { return *this; } template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AddToRowSliceValuesOf(const GPUMatrix<ElemType>& /*a*/, const size_t startIndex, const size_t numRows) { return *this; }
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AddWithRowSliceValuesOf(const GPUMatrix<ElemType>& /*a*/, const size_t startIndex, const size_t numRows) { return *this; } template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AddWithRowSliceValuesOf(const GPUMatrix<ElemType>& /*a*/, const size_t startIndex, const size_t numRows) { return *this; }
GPUMatrix<ElemType>& AssignRowStackValuesOf(const std::vector<const GPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols) { return *this; }
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRepeatOf(const GPUMatrix<ElemType>& /*a*/, const size_t numRowRepeats, const size_t numColRepeats) { return *this; } template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRepeatOf(const GPUMatrix<ElemType>& /*a*/, const size_t numRowRepeats, const size_t numColRepeats) { return *this; }
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignPositiveAndShiftedNegSample(const GPUMatrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber) { return *this; } template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignPositiveAndShiftedNegSample(const GPUMatrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber) { return *this; }

234
Scripts/build-and-test Normal file
Просмотреть файл

@ -0,0 +1,234 @@
#!/bin/bash
# Setting some default values
BUILD=1
RUN=1
CLEAN_AFTER=0
CLEAN_BEFORE=0
# parsing command line arguments:
while [[ $# > 0 ]]
do
key="$1"
case $key in
-h|--help)
echo "Usage: build-and-test [options]"
echo "Options:"
echo " -q|--quiet-build - redirect build output to file (by default those will be in <cntk_root>.run-<operating_system>-*)"
echo " -r|--run-only - elides build step, runs the binaries that have already been built"
echo " -b|--build-only - just build, do not run"
echo " -cb|--clean-build - clean up the enlistment binaries before build"
echo " -o|--output-directory <output_dir> - specify output directory to use"
echo "The root directory used to build and run CNTK is hosts the Scripts directory that contains this script"
exit 1
;;
-q|--quiet)
QUIET_BUILD=1
;;
-r|--run-only)
BUILD=0
RUN=1
;;
-b|--build-only)
BUILD=1
RUN=0
;;
-cb|--clean-build)
CLEAN_BEFORE=1
BUILD=1
;;
-o|--output-directory)
OUTPUT_DIR="$2"
shift # past argument
;;
*)
echo Unkown option $key
exit 1
;;
esac
shift # past argument or value
done
# Step 0 -- Validate all necessary prerequisites and check for incompatible options
# It is possible to use this script on Windows to build CNTK
# from Cygwin window with Visual C++ environment loaded.
# In that case OS environment variable will be set and we
# can use it to differentiate from Linux.
if [[ $CLEAN_BEFORE == 1 && $RUN == 1 && $BUILD == 0 ]]; then
echo "============ ERROR: Incompatible options RUN and CLEAN_BEFORE set without BUILD ============"
exit 1
fi
if [[ $OS == "Windows_NT" && $OSTYPE == "cygwin" ]]; then
DEBUG_DIR=Debug
RELEASE_DIR=Release
PREFIX_DIR=x64
BIN_NAME=CNTK.exe
BUILD_OS="windows"
if [[ $VS120COMNTOOLS == "" ]]; then
echo "============ Visual Studio 12.0 environment not properly setup or VS not installed ============"
echo "============ Please find and run the appropriate vcvarsall.bat script ============"
exit 1
fi
if [[ $ACML_PATH == "" ]]; then
echo "============ ACML path not set ============"
echo "============ ACML libraries are needed to successfully build CNTK ============"
exit 1
fi
elif [[ $OSTYPE == "linux-gnu" ]]; then
DEBUG_DIR=x86_64.gpu.debug.acml
RELEASE_DIR=x86_64.gpu.release.acml
PREFIX_DIR=bin
BIN_NAME=cntk
MAKEFILE=Makefile.gpu
BUILD_OS="linux"
else
echo "============ ERROR: Unsupported OS ============"
echo "============ Scripts supports only building from Linux and Windows through Cygwin ============"
exit 1
fi
# Step 1 -- Prepare temporary folders and files, tweak settings if necessary
# Get to the root path from which we know how to build and run
SCRIPT=`readlink -f $0`
SCRIPT_DIR=`dirname $SCRIPT`
CNTK_ROOT=`dirname $SCRIPT_DIR`
# Setup the output directory
if [[ $OUTPUT_DIR == "" ]]; then
OUTPUT_DIR="$CNTK_ROOT/.run-$BUILD_OS-$RANDOM"
fi
echo "============ Creating CNTK temp directory in $TMP_ROOT ============"
mkdir -p $OUTPUT_DIR || exit $?
CONF_FILE="$OUTPUT_DIR/Simple.conf"
BUILD_FILE="$OUTPUT_DIR/Build"
RUN_FILE="$OUTPUT_DIR/Result"
if ! [[ -d "$CNTK_ROOT/MachineLearning" ]]; then
echo "============ ERROR: Build script located in the wrong directory ($SCRIPT_DIR) ============"
exit 1
fi
cd $CNTK_ROOT
if ! [[ -f $CONF_FILE ]]; then
cp Demos/Simple/Simple.config $CONF_FILE || exit $?
# This chmod is necessary due to restrictive Cygwin interpretation of Windows permissions.
# Cygwin interprets Windows permissions as ----rwx---, which lacks read permissions for user.
chmod a+r $CONF_FILE || exit $?
fi
if [[ $QUIET_BUILD == 1 ]]; then
echo "============ WARNING: You have selected quiet build. All build output will be placed in ($OUTPUT_DIR) ============"
fi
# Step 2 -- Build the project debug and release, if requested
if [[ $BUILD == 1 ]]; then
# Step 2 -- Perform necessary builds
for FLAVOR in debug release
do
# Our make is too noisy right now and it is difficult to spot
# issues from stdout and stderr. In the quiet mode these are
# redirected to a file where they could be examined after the fact
if [[ $QUIET_BUILD == 1 ]]; then
exec 6>$BUILD_FILE.$FLAVOR.out || exit $?
exec 7>$BUILD_FILE.$FLAVOR.err || exit $?
else
exec 6>&1 || exit $?
exec 7>&2 || exit $?
fi
echo "============ Building CNTK $FLAVOR (clean=$CLEAN_BEFORE) ============"
if [[ $OS == "Windows_NT" ]]; then
if [[ $CLEAN_BEFORE == 1 ]]; then
msbuild.exe /property:Configuration=$FLAVOR /t:Clean 1>&6 2>&7 || exit $?
fi
msbuild.exe /property:Configuration=$FLAVOR /m 1>&6 2>&7 || exit $?
else
if [[ $CLEAN_BEFORE == 1 ]]; then
make BUILDTYPE=$FLAVOR -f $MAKEFILE clean 1>&6 2>&7 || exit $?
fi
make BUILDTYPE=$FLAVOR -j -f $MAKEFILE 1>&6 2>&7 || exit $?
fi
chmod a+r $BUILD_FILE.*
done
fi
# Step 3 -- Run the project tests, both debug and release, if requested
if [[ $RUN == 1 ]]; then
if ! [[ -f "$CNTK_ROOT/$PREFIX_DIR/$DEBUG_DIR/$BIN_NAME" && -f "$CNTK_ROOT/$PREFIX_DIR/$RELEASE_DIR/$BIN_NAME" ]]; then
echo "============ ERROR: CNTK did not build properly ============"
exit 1
fi
cd $PREFIX_DIR
for TARGET in CPU GPU
do
# These sed scripts are simply toggling DeviceNumber argument in the config file
# If it is set to Auto, it will pick GPU over CPU. At -1 CPU is selected.
if [[ $TARGET == CPU ]]; then
sed -i -e 's/^DeviceNumber.*/DeviceNumber=-1/g' $CONF_FILE || exit $?
else
sed -i -e 's/^DeviceNumber.*/DeviceNumber=Auto/g' $CONF_FILE || exit $?
fi
for FLAVOR in debug release
do
if [[ FLAVOR == "debug" ]]; then
FLAVOR_DIR="$DEBUG_DIR"
else
FLAVOR_DIR="$RELEASE_DIR"
fi
OUT_FILE="$RUN_FILE.$FLAVOR.out"
echo "============ Running CNTK for ($FLAVOR) ($TARGET), output in ($RUN_FILE.*) ============"
rm -rf models
if [[ $OS == "Windows_NT" ]]; then
# We have to use cygpath on Windows to modify the file paths into the format readable by cntk.
time ./$FLAVOR_DIR/$BIN_NAME configFile="`cygpath -w $CONF_FILE`" &>$OUT_FILE || exit $?
else
time ./$FLAVOR_DIR/$BIN_NAME configFile=$CONF_FILE &>$OUT_FILE || exit $?
fi
chmod a+r $RUN_FILE.*
# Check if execution was successful
grep -q "Using $TARGET" "$OUT_FILE" || {
echo "============ ERROR: Run output (in $OUT_FILE) did not contain information about target device ($TARGET) ============"
exit 1
}
grep -q "EXCEPTION" "$OUT_FILE" && {
echo "============ ERROR: Run output in ($OUT_FILE) contains exceptions ============"
grep "EXCEPTION" "$OUT_FILE"
exit 1
}
done
done
fi
# Step 5 -- Optionally clean after builds and tests
if [[ $CLEAN_AFTER == 1 ]]; then
rm -rf models
cd $CNTK_ROOT
for FLAVOR in debug release
do
echo "============ Cleaning up CNTK $FLAVOR ============"
if [[ $OS == "Windows_NT" ]]; then
msbuild.exe /property:Configuration=$FLAVOR /t:clean 1>&6 2>&7 || exit $?
else
make BUILDTYPE=$FLAVOR -f $MAKEFILE clean 1>&6 2>&7 || exit $?
fi
done
rm -rf $OUTPUT_DIR
fi
echo "============ Build and test of CNTK was successful! ============"