Merge with new changes including RowStackNode
This commit is contained in:
Коммит
f332421b7b
|
@ -15,6 +15,7 @@ x64/
|
|||
build/
|
||||
[Bb]in/
|
||||
[Oo]bj/
|
||||
.run-*
|
||||
|
||||
# Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets
|
||||
!packages/*/build/
|
||||
|
|
|
@ -47,8 +47,8 @@ BinaryWriter<ElemType>::~BinaryWriter()
|
|||
// miniBatchMode=Partial
|
||||
// randomize=None
|
||||
// wfile=c:\speech\mnist\mnist_test.bin
|
||||
// #wsize - inital size of the file in MB
|
||||
// # if calculated size would be bigger, that is used instead
|
||||
// #wsize - inital size of the file in MB default to 256
|
||||
// # has to be large enough for your dataset. the file will shrink to the actual size when closed.
|
||||
// #wsize=256
|
||||
// #wrecords - number of records we should allocate space for in the file
|
||||
// # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
|
||||
|
|
|
@ -980,8 +980,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
// dereference matrix that corresponds to key (input/output name) and
|
||||
// populate based on whether its a feature or a label
|
||||
//Matrix<ElemType>& data =
|
||||
*matrices[iter->first]; // can be features or labels
|
||||
//Matrix<ElemType>& data = *matrices[iter->first]; // can be features or labels
|
||||
|
||||
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
|
||||
{
|
||||
|
@ -1058,8 +1057,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
// dereference matrix that corresponds to key (input/output name) and
|
||||
// populate based on whether its a feature or a label
|
||||
//Matrix<ElemType>& data =
|
||||
*matrices[iter->first]; // can be features or labels
|
||||
//Matrix<ElemType>& data =*matrices[iter->first]; // can be features or labels
|
||||
|
||||
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
|
||||
{
|
||||
|
@ -1134,8 +1132,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
// dereference matrix that corresponds to key (input/output name) and
|
||||
// populate based on whether its a feature or a label
|
||||
//Matrix<ElemType>& data =
|
||||
*matrices[iter->first]; // can be features or labels
|
||||
//Matrix<ElemType>& data = *matrices[iter->first]; // can be features or labels
|
||||
|
||||
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
|
||||
{
|
||||
|
|
|
@ -142,6 +142,15 @@ extern void _CHECKED_ASSERT_error(const char * file, int line, const char * exp)
|
|||
#endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
These macros are used for sentence segmentation information.
|
||||
*/
|
||||
#define SENTENCE_BEGIN 0
|
||||
#define SENTENCE_MIDDLE 1
|
||||
#define NO_LABELS -1
|
||||
#define EXISTS_SENTENCE_BEGIN_OR_NO_LABELS 0
|
||||
#define NO_EXISTS_SENTENCE_BEGIN_OR_NO_LABELS 1
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// basic data types
|
||||
// ----------------------------------------------------------------------------
|
||||
|
|
|
@ -382,47 +382,58 @@ public:
|
|||
// TODO: we can store labels more efficiently now since we don't do frame-wise random access anymore.
|
||||
|
||||
// OK, utterance has all we need --remember it
|
||||
utteranceset.push_back (std::move (utterance));
|
||||
|
||||
if (m==0)
|
||||
{
|
||||
_totalframes += uttframes;
|
||||
framesaccum.push_back(uttframes); //track number of frames in each utterance - first feature is the reference
|
||||
if (!labels.empty() && !lacksmlf)
|
||||
//if (!labels.empty() && labelsiter != labels[0].end())
|
||||
{
|
||||
foreach_index (j, labels)
|
||||
// first verify that all the label files have the proper duration
|
||||
bool durationmatch = true;
|
||||
foreach_index(j, labels)
|
||||
{
|
||||
const auto & labseq = labels[j].find(key)->second;
|
||||
// check if durations match; skip if not
|
||||
size_t labframes = labseq.empty() ? 0 : (labseq[labseq.size()-1].firstframe + labseq[labseq.size()-1].numframes);
|
||||
size_t labframes = labseq.empty() ? 0 : (labseq[labseq.size() - 1].firstframe + labseq[labseq.size() - 1].numframes);
|
||||
if (labframes != uttframes)
|
||||
{
|
||||
fprintf (stderr, " [duration mismatch (%d in label vs. %d in feat file), skipping %S]", labframes, uttframes, key.c_str());
|
||||
fprintf(stderr, " [duration mismatch (%d in label vs. %d in feat file), skipping %S]", labframes, uttframes, key.c_str());
|
||||
nomlf++;
|
||||
continue; // skip this utterance at all
|
||||
durationmatch = false;
|
||||
break; // continue; // skip this utterance at all
|
||||
}
|
||||
// expand classid sequence into flat array
|
||||
foreach_index (i, labseq)
|
||||
}
|
||||
if (durationmatch){
|
||||
utteranceset.push_back(std::move(utterance));
|
||||
_totalframes += uttframes;
|
||||
framesaccum.push_back(uttframes); //track number of frames in each utterance - first feature is the reference
|
||||
// then parse each mlf if the durations are consistent
|
||||
foreach_index(j, labels)
|
||||
{
|
||||
const auto & e = labseq[i];
|
||||
if ((i > 0 && labseq[i-1].firstframe + labseq[i-1].numframes != e.firstframe) || (i == 0 && e.firstframe != 0))
|
||||
throw std::runtime_error (msra::strfun::strprintf ("minibatchutterancesource: labels not in consecutive order MLF in label set: %S", key.c_str()));
|
||||
if (e.classid >= udim[j])
|
||||
throw std::runtime_error (msra::strfun::strprintf ("minibatchutterancesource: class id %d exceeds model output dimension %d in file %S", e.classid, udim, key.c_str()));
|
||||
if (e.classid != (CLASSIDTYPE) e.classid)
|
||||
throw std::runtime_error ("CLASSIDTYPE has too few bits");
|
||||
for (size_t t = e.firstframe; t < e.firstframe + e.numframes; t++)
|
||||
classids[j]->push_back ((CLASSIDTYPE) e.classid);
|
||||
numclasses[j] = max (numclasses[j], 1u + e.classid);
|
||||
counts[j].resize (numclasses[j], 0);
|
||||
counts[j][e.classid] += e.numframes;
|
||||
}
|
||||
classids[j]->push_back ((CLASSIDTYPE) -1); // append a boundary marker marker for checking
|
||||
const auto & labseq = labels[j].find(key)->second;
|
||||
// expand classid sequence into flat array
|
||||
foreach_index(i, labseq)
|
||||
{
|
||||
const auto & e = labseq[i];
|
||||
if ((i > 0 && labseq[i - 1].firstframe + labseq[i - 1].numframes != e.firstframe) || (i == 0 && e.firstframe != 0))
|
||||
throw std::runtime_error(msra::strfun::strprintf("minibatchutterancesource: labels not in consecutive order MLF in label set: %S", key.c_str()));
|
||||
if (e.classid >= udim[j])
|
||||
throw std::runtime_error(msra::strfun::strprintf("minibatchutterancesource: class id %d exceeds model output dimension %d in file %S", e.classid, udim, key.c_str()));
|
||||
if (e.classid != (CLASSIDTYPE)e.classid)
|
||||
throw std::runtime_error("CLASSIDTYPE has too few bits");
|
||||
for (size_t t = e.firstframe; t < e.firstframe + e.numframes; t++)
|
||||
classids[j]->push_back((CLASSIDTYPE)e.classid);
|
||||
numclasses[j] = max(numclasses[j], 1u + e.classid);
|
||||
counts[j].resize(numclasses[j], 0);
|
||||
counts[j][e.classid] += e.numframes;
|
||||
}
|
||||
|
||||
if (!labels[j].empty() && classids[j]->size() != _totalframes + utteranceset.size())
|
||||
throw std::logic_error (msra::strfun::strprintf ("minibatchutterancesource: label duration inconsistent with feature file in MLF label set: %S", key.c_str()));
|
||||
assert (labels[j].empty() || classids[j]->size() == _totalframes + utteranceset.size());
|
||||
classids[j]->push_back((CLASSIDTYPE)-1); // append a boundary marker marker for checking
|
||||
|
||||
if (!labels[j].empty() && classids[j]->size() != _totalframes + utteranceset.size())
|
||||
throw std::logic_error(msra::strfun::strprintf("minibatchutterancesource: label duration inconsistent with feature file in MLF label set: %S", key.c_str()));
|
||||
assert(labels[j].empty() || classids[j]->size() == _totalframes + utteranceset.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
else{
|
||||
|
@ -451,7 +462,7 @@ public:
|
|||
}
|
||||
if (nomlf + nolat > 0)
|
||||
{
|
||||
fprintf (stderr, "minibatchutterancesource: out of %d files, %d files not found in label set and %d have no lattice\n", infiles.size(), nomlf, nolat);
|
||||
fprintf (stderr, "minibatchutterancesource: out of %d files, %d files not found in label set and %d have no lattice\n", infiles[0].size(), nomlf, nolat);
|
||||
if (nomlf + nolat > infiles[m].size() / 2)
|
||||
throw std::runtime_error ("minibatchutterancesource: too many files not found in label set--assuming broken configuration\n");
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#define DATAREADER_EXPORTS // creating the exports here
|
||||
#include "DataReader.h"
|
||||
#include "HTKMLFReader.h"
|
||||
#include "commandArgUtil.h"
|
||||
#ifdef LEAKDETECT
|
||||
#include <vld.h> // for memory leak detection
|
||||
#endif
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "DataWriter.h"
|
||||
#include "commandArgUtil.h"
|
||||
#include "HTKMLFWriter.h"
|
||||
#include "commandArgUtil.h"
|
||||
#ifdef LEAKDETECT
|
||||
#include <vld.h> // for memory leak detection
|
||||
#endif
|
||||
|
|
|
@ -2048,6 +2048,10 @@ void BatchSequenceReader<ElemType>::GetLabelOutput(std::map<std::wstring,
|
|||
{
|
||||
RuntimeError("GetLabelOutput::should use CPU for labels ");
|
||||
}
|
||||
if (curDevId != CPUDEVICE)
|
||||
{
|
||||
labels->TransferFromDeviceToDevice(CPUDEVICE, curDevId, true, false, false);
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
|
|
|
@ -11,6 +11,11 @@
|
|||
#include <stdexcept>
|
||||
#include <stdint.h>
|
||||
|
||||
#if WIN32
|
||||
#define ftell64 _ftelli64
|
||||
#else
|
||||
#define ftell64 ftell
|
||||
#endif
|
||||
|
||||
// SetState for a particular value
|
||||
template <typename NumType, typename LabelType>
|
||||
|
@ -362,10 +367,10 @@ void UCIParser<NumType, LabelType>::ParseInit(LPCWSTR fileName, size_t startFeat
|
|||
|
||||
errno_t err = _wfopen_s( &m_pFile, fileName, L"rb" );
|
||||
if (err)
|
||||
std::runtime_error("UCIParser::ParseInit - error opening file");
|
||||
throw std::runtime_error("UCIParser::ParseInit - error opening file");
|
||||
int rc = _fseeki64(m_pFile, 0, SEEK_END);
|
||||
if (rc)
|
||||
std::runtime_error("UCIParser::ParseInit - error seeking in file");
|
||||
throw std::runtime_error("UCIParser::ParseInit - error seeking in file");
|
||||
|
||||
m_fileSize = GetFilePosition();
|
||||
m_fileBuffer = new BYTE[m_bufferSize];
|
||||
|
@ -377,9 +382,9 @@ void UCIParser<NumType, LabelType>::ParseInit(LPCWSTR fileName, size_t startFeat
|
|||
template <typename NumType, typename LabelType>
|
||||
int64_t UCIParser<NumType, LabelType>::GetFilePosition()
|
||||
{
|
||||
int64_t position = _ftelli64(m_pFile);
|
||||
int64_t position = ftell64(m_pFile);
|
||||
if (position == -1L)
|
||||
std::runtime_error("UCIParser::GetFilePosition - error retrieving file position in file");
|
||||
throw std::runtime_error("UCIParser::GetFilePosition - error retrieving file position in file");
|
||||
return position;
|
||||
}
|
||||
|
||||
|
@ -392,7 +397,7 @@ void UCIParser<NumType, LabelType>::SetFilePosition(int64_t position)
|
|||
{
|
||||
int rc = _fseeki64(m_pFile, position, SEEK_SET);
|
||||
if (rc)
|
||||
std::runtime_error("UCIParser::SetFilePosition - error seeking in file");
|
||||
throw std::runtime_error("UCIParser::SetFilePosition - error seeking in file");
|
||||
|
||||
// setup state machine to start at this position
|
||||
PrepareStartPosition(position);
|
||||
|
@ -445,7 +450,7 @@ size_t UCIParser<NumType, LabelType>::UpdateBuffer()
|
|||
size_t bytesToRead = min(m_bufferSize, m_fileSize-m_bufferStart)-saveBytes;
|
||||
size_t bytesRead = fread(m_fileBuffer+saveBytes, 1, bytesToRead, m_pFile);
|
||||
if (bytesRead == 0 && ferror(m_pFile))
|
||||
std::runtime_error("UCIParser::UpdateBuffer - error reading file");
|
||||
throw std::runtime_error("UCIParser::UpdateBuffer - error reading file");
|
||||
return bytesRead;
|
||||
}
|
||||
|
||||
|
|
|
@ -90,8 +90,8 @@ private:
|
|||
int m_elementsConvertedThisLine;
|
||||
|
||||
// global stats
|
||||
int m_totalNumbersConverted;
|
||||
int m_totalLabelsConverted;
|
||||
int64_t m_totalNumbersConverted;
|
||||
int64_t m_totalLabelsConverted;
|
||||
|
||||
// file positions/buffer
|
||||
FILE * m_pFile;
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
# command=Simple_Demo_Output
|
||||
RootDir=..
|
||||
command=Simple_Demo:Simple_Demo_Output
|
||||
|
||||
# deviceId=-1 for CPU, >=0 for GPU devices
|
||||
DeviceNumber=-1
|
||||
|
||||
#stderr=Demo
|
||||
|
||||
precision=float
|
||||
|
@ -13,7 +14,6 @@ deviceId=$DeviceNumber$
|
|||
outputNodeNames=ScaledLogLikelihood
|
||||
traceLevel=1
|
||||
|
||||
|
||||
#######################################
|
||||
# TRAINING CONFIG (Simple, Fixed LR) #
|
||||
#######################################
|
||||
|
@ -52,22 +52,22 @@ Simple_Demo=[
|
|||
reader=[
|
||||
# reader to use
|
||||
readerType=UCIFastReader
|
||||
file=../Demos/Simple/SimpleDataTrain.txt
|
||||
file=$RootDir$/Demos/Simple/SimpleDataTrain.txt
|
||||
|
||||
miniBatchMode=Partial
|
||||
randomize=Auto
|
||||
verbosity=1
|
||||
|
||||
features=[
|
||||
dim=2 # two-dimensional input data
|
||||
dim=2 # two-dimensional input data
|
||||
start=0 # Start with first element on line
|
||||
]
|
||||
|
||||
labels=[
|
||||
start=2 # Skip two elements
|
||||
start=2 # Skip two elements
|
||||
dim=1 # One label dimension
|
||||
labelDim=2 # Two labels possible
|
||||
labelMappingFile=../Demos/Simple/SimpleMapping.txt
|
||||
labelMappingFile=$RootDir$/Demos/Simple/SimpleMapping.txt
|
||||
]
|
||||
]
|
||||
]
|
||||
|
@ -84,16 +84,16 @@ Simple_Demo_Output=[
|
|||
reader=[
|
||||
# reader to use
|
||||
readerType=UCIFastReader
|
||||
file=../Demos/Simple/SimpleDataTest.txt
|
||||
file=$RootDir$/Demos/Simple/SimpleDataTest.txt
|
||||
features=[
|
||||
dim=2
|
||||
start=0
|
||||
start=0
|
||||
]
|
||||
labels=[
|
||||
start=2
|
||||
start=2
|
||||
dim=1
|
||||
labelDim=2
|
||||
labelMappingFile=../Demos/Simple/SimpleMapping.txt
|
||||
labelMappingFile=$RootDir$/Demos/Simple/SimpleMapping.txt
|
||||
]
|
||||
]
|
||||
outputPath=SimpleOutput # Dump output as text
|
||||
|
|
|
@ -550,41 +550,38 @@ public:
|
|||
}
|
||||
|
||||
ComputationNodePtr nodePtr = GetNodeFromName(nodeName);
|
||||
ComputationNodePtr childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3, childNodePtr4;
|
||||
switch (numChildren)
|
||||
std::vector<ComputationNodePtr> childrenNodes;
|
||||
childrenNodes.resize(numChildren);
|
||||
for (int j = 0; j < numChildren; j++)
|
||||
childrenNodes[j] = GetNodeFromName(childrenNames[j]);
|
||||
|
||||
if (nodePtr->OperationName() == RowStackNode<ElemType>::TypeName()) //allow for variable input nodes
|
||||
nodePtr->AttachInputs(childrenNodes);
|
||||
else //fixed input nodes
|
||||
{
|
||||
case 1:
|
||||
childNodePtr0 = GetNodeFromName(childrenNames[0]);
|
||||
nodePtr->AttachInputs(childNodePtr0);
|
||||
break;
|
||||
case 2:
|
||||
childNodePtr0 = GetNodeFromName(childrenNames[0]);
|
||||
childNodePtr1 = GetNodeFromName(childrenNames[1]);
|
||||
nodePtr->AttachInputs(childNodePtr0, childNodePtr1);
|
||||
break;
|
||||
case 3:
|
||||
childNodePtr0 = GetNodeFromName(childrenNames[0]);
|
||||
childNodePtr1 = GetNodeFromName(childrenNames[1]);
|
||||
childNodePtr2 = GetNodeFromName(childrenNames[2]);
|
||||
nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2);
|
||||
break;
|
||||
case 4:
|
||||
childNodePtr0 = GetNodeFromName(childrenNames[0]);
|
||||
childNodePtr1 = GetNodeFromName(childrenNames[1]);
|
||||
childNodePtr2 = GetNodeFromName(childrenNames[2]);
|
||||
childNodePtr3 = GetNodeFromName(childrenNames[3]);
|
||||
nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3);
|
||||
break;
|
||||
case 5:
|
||||
childNodePtr0 = GetNodeFromName(childrenNames[0]);
|
||||
childNodePtr1 = GetNodeFromName(childrenNames[1]);
|
||||
childNodePtr2 = GetNodeFromName(childrenNames[2]);
|
||||
childNodePtr3 = GetNodeFromName(childrenNames[3]);
|
||||
childNodePtr4 = GetNodeFromName(childrenNames[4]);
|
||||
nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3, childNodePtr4);
|
||||
break;
|
||||
default:
|
||||
throw std::logic_error("Invalid number of children.");
|
||||
switch (numChildren)
|
||||
{
|
||||
case 1:
|
||||
nodePtr->AttachInputs(childrenNodes[0]);
|
||||
break;
|
||||
case 2:
|
||||
nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1]);
|
||||
break;
|
||||
case 3:
|
||||
nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2]);
|
||||
break;
|
||||
case 4:
|
||||
nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2], childrenNodes[3]);
|
||||
break;
|
||||
case 5:
|
||||
nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2], childrenNodes[3], childrenNodes[4]);
|
||||
break;
|
||||
case 6:
|
||||
nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2], childrenNodes[3], childrenNodes[4], childrenNodes[5]);
|
||||
break;
|
||||
default:
|
||||
throw std::logic_error("Invalid number of children.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1028,6 +1025,8 @@ public:
|
|||
newNode = new LookupTableNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
|
||||
else if (nodeType == RowSliceNode<ElemType>::TypeName())
|
||||
newNode = new RowSliceNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
|
||||
else if (nodeType == RowStackNode<ElemType>::TypeName())
|
||||
newNode = new RowStackNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
|
||||
else if (nodeType == GMMLogLikelihoodNode<ElemType>::TypeName())
|
||||
newNode = new GMMLogLikelihoodNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
|
||||
else if (nodeType == SequenceDecoderNode<ElemType>::TypeName())
|
||||
|
@ -1209,6 +1208,8 @@ public:
|
|||
newNode = new CosDistanceWithNegativeSamplesNode<ElemType>(m_deviceId, nodeName);
|
||||
else if (nodeType == ParallelNode<ElemType>::TypeName())
|
||||
newNode = new ParallelNode<ElemType>(m_deviceId, nodeName);
|
||||
else if (nodeType == RowStackNode<ElemType>::TypeName())
|
||||
newNode = new RowStackNode<ElemType>(m_deviceId, nodeName);
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "Error creating new ComputationNode of type %ls, with name %ls\n", nodeType.c_str(), nodeName.c_str());
|
||||
|
@ -1582,6 +1583,15 @@ public:
|
|||
return newNode;
|
||||
}
|
||||
|
||||
ComputationNodePtr RowStack(const std::vector<ComputationNodePtr> inputs, const std::wstring nodeName = L"")
|
||||
{
|
||||
ComputationNodePtr newNode(new RowStackNode<ElemType>(m_deviceId, nodeName));
|
||||
newNode->AttachInputs(inputs);
|
||||
AddNodeToNet(newNode);
|
||||
|
||||
return newNode;
|
||||
}
|
||||
|
||||
ComputationNodePtr GMMLogLikelihood(const ComputationNodePtr unnormedPrior, const ComputationNodePtr mean, const ComputationNodePtr logStddev, const ComputationNodePtr feature, const std::wstring nodeName = L"")
|
||||
{
|
||||
ComputationNodePtr newNode(new GMMLogLikelihoodNode<ElemType>(m_deviceId, nodeName));
|
||||
|
|
|
@ -158,6 +158,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
throw std::logic_error("This operation does not support six inputs.");
|
||||
}
|
||||
|
||||
virtual void AttachInputs(const std::vector<ComputationNodePtr>& /*inputs*/)
|
||||
{
|
||||
throw std::logic_error("This operation does not support variable-length inputs.");
|
||||
}
|
||||
|
||||
virtual void DetachInputs()
|
||||
{
|
||||
m_children.resize(0);
|
||||
|
|
|
@ -399,6 +399,167 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template class RowSliceNode<float>;
|
||||
template class RowSliceNode<double>;
|
||||
|
||||
//this node is used to extract part of the input by rows as the output
|
||||
//it has to be continuous segments of rows since each column is treated as one sample
|
||||
template<class ElemType>
|
||||
class RowStackNode : public ComputationNode<ElemType>
|
||||
{
|
||||
UsingComputationNodeMembers;
|
||||
public:
|
||||
RowStackNode(const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId)
|
||||
{
|
||||
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
|
||||
m_deviceId = deviceId;
|
||||
MoveMatricesToDevice(deviceId);
|
||||
InitRecurrentNode();
|
||||
}
|
||||
|
||||
RowStackNode(File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId)
|
||||
{
|
||||
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
|
||||
LoadFromFile(fstream, modelVersion, deviceId);
|
||||
}
|
||||
|
||||
// copy constructor
|
||||
RowStackNode(const RowStackNode<ElemType>* node, const std::wstring& newName, const CopyNodeFlags flags) : ComputationNode<ElemType>(node->m_deviceId)
|
||||
{
|
||||
node->CopyTo(this, newName, flags);
|
||||
}
|
||||
|
||||
virtual ComputationNodePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) const
|
||||
{
|
||||
const std::wstring& name = (newName == L"") ? NodeName() : newName;
|
||||
|
||||
ComputationNodePtr node = new RowStackNode<ElemType>(this, name, flags);
|
||||
return node;
|
||||
}
|
||||
|
||||
virtual void CopyTo(const ComputationNodePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const
|
||||
{
|
||||
ComputationNode<ElemType>::CopyTo(nodeP, newName, flags);
|
||||
RowStackNode<ElemType>* node = (RowStackNode<ElemType>*) nodeP;
|
||||
|
||||
if (flags & CopyNodeFlags::copyNodeChildren)
|
||||
{
|
||||
node->m_children = m_children;
|
||||
node->m_startRowIndeces = m_startRowIndeces;
|
||||
node->m_inputMatrices = m_inputMatrices;
|
||||
}
|
||||
}
|
||||
|
||||
virtual const std::wstring OperationName() const { return TypeName(); }
|
||||
static const std::wstring TypeName() { return L"RowStack"; }
|
||||
|
||||
virtual void ComputeInputPartial(const size_t inputIndex)
|
||||
{
|
||||
if (inputIndex >= ChildrenSize())
|
||||
throw std::invalid_argument("RowStack-ComputeInputPartial: inputIndex out of range.");
|
||||
|
||||
ComputeInputPartialS(Inputs(inputIndex)->GradientValues(), GradientValues(), m_startRowIndeces[inputIndex], m_startRowIndeces[inputIndex + 1] - m_startRowIndeces[inputIndex]);
|
||||
}
|
||||
|
||||
virtual void ComputeInputPartial(const size_t inputIndex, const size_t timeIdxInSeq)
|
||||
{
|
||||
if (inputIndex >= ChildrenSize())
|
||||
throw std::invalid_argument("RowStack-ComputeInputPartial: inputIndex out of range.");
|
||||
|
||||
Matrix<ElemType> sliceInputGrad = Inputs(inputIndex)->GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
Matrix<ElemType> sliceOutputGrad = GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
|
||||
ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startRowIndeces[inputIndex], m_startRowIndeces[inputIndex+1] - m_startRowIndeces[inputIndex]);
|
||||
}
|
||||
|
||||
static void WINAPI ComputeInputPartialS(Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const size_t startIndex, const size_t numRows)
|
||||
{
|
||||
inputGradientValues.AddWithRowSliceValuesOf(gradientValues, startIndex, numRows);
|
||||
}
|
||||
|
||||
virtual void EvaluateThisNode()
|
||||
{
|
||||
EvaluateThisNodeS(m_functionValues, m_inputMatrices, 0, Inputs(0)->FunctionValues().GetNumCols());
|
||||
}
|
||||
|
||||
virtual void EvaluateThisNode(const size_t timeIdxInSeq)
|
||||
{
|
||||
Matrix<ElemType> sliceFunctionValues = FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
|
||||
EvaluateThisNodeS(sliceFunctionValues, m_inputMatrices, timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
}
|
||||
|
||||
static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const std::vector<const Matrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
|
||||
{
|
||||
functionValues.AssignRowStackValuesOf(inputMatrices, sliceStartCol, sliceNumCols);
|
||||
#if NANCHECK
|
||||
functionValues.HasNan("RowStack");
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual void Validate()
|
||||
{
|
||||
PrintSelfBeforeValidation();
|
||||
|
||||
unsigned int numInputs = ChildrenSize();
|
||||
if (numInputs < 2)
|
||||
LogicError("RowStack operation: must have two or more inputs.");
|
||||
|
||||
if (Inputs(0) == nullptr)
|
||||
LogicError("RowStack operation: the input node is NULL.");
|
||||
|
||||
size_t numCols = Inputs(0)->FunctionValues().GetNumCols();
|
||||
m_startRowIndeces.resize(ChildrenSize()+1);
|
||||
m_inputMatrices.resize(ChildrenSize());
|
||||
|
||||
size_t totalRows = 0;
|
||||
m_startRowIndeces[0] = 0;
|
||||
|
||||
for (int i = 0; i < ChildrenSize(); i++)
|
||||
{
|
||||
if (Inputs(i) == nullptr)
|
||||
LogicError("RowStack operation: the input node is NULL.");
|
||||
|
||||
Matrix<ElemType>& childMatrix = Inputs(i)->FunctionValues();
|
||||
size_t numRows = childMatrix.GetNumRows();
|
||||
if (numRows == 0)
|
||||
LogicError("RowStack operation: the input node %ls has 0 rows.", Inputs(i)->NodeName().c_str());
|
||||
|
||||
if (childMatrix.GetNumCols() != numCols)
|
||||
LogicError("RowStack operation: the input node %ls has different number of columns.", Inputs(i)->NodeName().c_str());
|
||||
|
||||
totalRows += numRows;
|
||||
m_inputMatrices[i] = &childMatrix;
|
||||
m_startRowIndeces[i + 1] = m_startRowIndeces[i] + numRows;
|
||||
}
|
||||
|
||||
FunctionValues().Resize(totalRows, numCols);
|
||||
CopyImageSizeFromInputs();
|
||||
}
|
||||
|
||||
virtual void CopyImageSizeFromInputs()
|
||||
{
|
||||
CopyImageSizeFromInput(0, true);
|
||||
m_outputHeight = FunctionValues().GetNumRows();
|
||||
|
||||
//WARNING: this node will destroy the image size information from the child
|
||||
if (m_inputWidth * m_inputChannels != 1)
|
||||
fprintf(stderr, "WARNING: RowStack operation cannot inherit image size information from its child. Image size info is lost.\n");
|
||||
}
|
||||
|
||||
virtual void AttachInputs(const std::vector<ComputationNodePtr>& inputs)
|
||||
{
|
||||
unsigned int numInputs = inputs.size();
|
||||
m_children.resize(numInputs);
|
||||
for (unsigned int i = 0; i < numInputs; i++)
|
||||
m_children[i] = inputs[i];
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<size_t> m_startRowIndeces; //start row number in the stacked matrix of each input (child)
|
||||
std::vector<const Matrix<ElemType>*> m_inputMatrices;
|
||||
};
|
||||
|
||||
template class RowStackNode<float>;
|
||||
template class RowStackNode<double>;
|
||||
|
||||
template<class ElemType>
|
||||
class ScaleNode : public ComputationNode<ElemType>
|
||||
{
|
||||
|
|
|
@ -222,6 +222,8 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
|
|||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, RowSliceNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, RowStackNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, LookupTableNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, GMMLogLikelihoodNode<ElemType>::TypeName(), L"GMMLL"))
|
||||
|
|
|
@ -218,10 +218,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
if (colBegin(i,0) == SENTENCE_MIDDLE)
|
||||
{
|
||||
Matrix<ElemType> to1 = inputGradientValues.ColumnSlice((timeIdxInSeq - delay)*mNbr + i, 1);
|
||||
Matrix<ElemType> frm1= gradientValues.ColumnSlice(timeIdxInSeq * mNbr + i, 1);
|
||||
Matrix<ElemType> frm = gradientValues.ColumnSlice(timeIdxInSeq * mNbr + i, 1);
|
||||
Matrix<ElemType> to = inputGradientValues.ColumnSlice((timeIdxInSeq - delay)*mNbr + i, 1);
|
||||
|
||||
to1 += frm1;
|
||||
to += frm;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1810,8 +1810,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
double val = w->FunctionValues()(0, 0);
|
||||
|
||||
/// the label is a dense matrix. each element is the word index
|
||||
label = m_net->CreateInputNode(L"labels", 2 * (this->nce_noises + 1), mbSize);
|
||||
|
||||
|
|
|
@ -391,29 +391,43 @@ public:
|
|||
{
|
||||
std::vector<void*> inputs = EvaluateParameters(node, baseName, nodeParamStart, nodeParamCount, pass);
|
||||
|
||||
switch (inputs.size())
|
||||
if (cnNodeType == RowStackNode<ElemType>::TypeName()) //support variable length inputs
|
||||
{
|
||||
case 1:
|
||||
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]));
|
||||
break;
|
||||
case 2:
|
||||
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]));
|
||||
break;
|
||||
case 3:
|
||||
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]));
|
||||
break;
|
||||
case 4:
|
||||
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]));
|
||||
break;
|
||||
case 5:
|
||||
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]), ComputationNodePtr(inputs[4]));
|
||||
break;
|
||||
default:
|
||||
if (nodeParamCount > 0)
|
||||
RuntimeError("Invalid number of parameters name = '%s' call = '%s'\n", node->GetName().c_str(), node->GetValue().c_str());
|
||||
break;
|
||||
}
|
||||
std::vector<ComputationNodePtr> inputNodes;
|
||||
inputNodes.resize(inputs.size());
|
||||
for (int i = 0; i < inputs.size(); i++)
|
||||
inputNodes[i] = ComputationNodePtr(inputs[i]);
|
||||
|
||||
nodePtr->AttachInputs(inputNodes);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (inputs.size())
|
||||
{
|
||||
case 1:
|
||||
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]));
|
||||
break;
|
||||
case 2:
|
||||
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]));
|
||||
break;
|
||||
case 3:
|
||||
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]));
|
||||
break;
|
||||
case 4:
|
||||
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]));
|
||||
break;
|
||||
case 5:
|
||||
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]), ComputationNodePtr(inputs[4]));
|
||||
break;
|
||||
case 6:
|
||||
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]), ComputationNodePtr(inputs[4]), ComputationNodePtr(inputs[5]));
|
||||
break;
|
||||
default:
|
||||
if (nodeParamCount > 0)
|
||||
RuntimeError("Invalid number of parameters name = '%s' call = '%s'\n", node->GetName().c_str(), node->GetValue().c_str());
|
||||
break;
|
||||
}
|
||||
}
|
||||
// process common optional parameters (like "tag");
|
||||
ProcessOptionalParameters(node);
|
||||
break;
|
||||
|
|
12
Makefile.gpu
12
Makefile.gpu
|
@ -32,11 +32,11 @@ DEVICE = gpu
|
|||
BUILDTYPE = debug
|
||||
#BUILDTYPE = release
|
||||
# comment following and uncomment the next one to enable MKL library
|
||||
#MATHLIB = acml
|
||||
MATHLIB = mkl
|
||||
MATHLIB = acml
|
||||
#MATHLIB = mkl
|
||||
# modify relevant path below for your system
|
||||
MKL_PATH = /usr/users/chiaying/intel/composer_xe_2013.2.146
|
||||
ACML_PATH = /usr/local/acml5.3.0/gfortran64
|
||||
ACML_PATH = /usr/local/acml5.3.1/ifort64
|
||||
#######
|
||||
|
||||
BUILDFOR = $(ARCH).$(DEVICE).$(BUILDTYPE).$(MATHLIB)
|
||||
|
@ -48,8 +48,8 @@ ifeq ($(BUILDTYPE),debug)
|
|||
BUILDTYPE_OPT = -g
|
||||
GPU_BUILDTYPE_OPT = -G
|
||||
else
|
||||
BUILDTYPE_OPT = -O4
|
||||
GPU_BUILDTYPE_OPT =
|
||||
BUILDTYPE_OPT = -O3 -flto
|
||||
GPU_BUILDTYPE_OPT = -O3
|
||||
endif
|
||||
|
||||
ifeq ($(MATHLIB),mkl)
|
||||
|
@ -142,7 +142,7 @@ $(OBJDIR)/%.o : %.cu Makefile
|
|||
@echo $(SEPARATOR)
|
||||
@echo creating $@ for $(ARCH) with build type $(BUILDTYPE)
|
||||
@mkdir -p $(dir $@)
|
||||
$(NVCC) -c $< -o $@ $(BUILDTYPE_OPT) $(GPU_BUILDTYPE_OPT) $(NVCCFLAGS) $(INCFLAGS) -Xcompiler -fPIC
|
||||
$(NVCC) -c $< -o $@ $(GPU_BUILDTYPE_OPT) $(NVCCFLAGS) $(INCFLAGS) -Xcompiler -fPIC
|
||||
|
||||
$(OBJDIR)/%.o : %.cpp Makefile
|
||||
@echo $(SEPARATOR)
|
||||
|
|
|
@ -31,8 +31,8 @@ DEVICE = cpu
|
|||
#BUILDTYPE = debug
|
||||
BUILDTYPE = release
|
||||
# comment following and uncomment the next one to enable MKL library
|
||||
#MATHLIB = acml
|
||||
MATHLIB = mkl
|
||||
MATHLIB = acml
|
||||
#MATHLIB = mkl
|
||||
# modify relevant path below for your system
|
||||
MKL_PATH = /usr/users/chiaying/intel/composer_xe_2013.2.146
|
||||
ACML_PATH = /usr/users/yzhang87/code/acml/gfortran64
|
||||
|
|
|
@ -563,7 +563,7 @@ namespace CNTKMathTest
|
|||
Assert::IsTrue(C.IsEqualTo(D1, 0.0001));
|
||||
}
|
||||
|
||||
TEST_METHOD(CPUMatrixRowSlice)
|
||||
TEST_METHOD(CPUMatrixRowSliceAndStack)
|
||||
{
|
||||
Matrix M0(5,3);
|
||||
M0(0,0) = 1; M0(0,1) = 6; M0(0,2) = 11;
|
||||
|
@ -590,6 +590,26 @@ namespace CNTKMathTest
|
|||
M3 += M0;
|
||||
M0.AddToRowSliceValuesOf(M1, 2,2);
|
||||
Assert::IsTrue(M3.IsEqualTo(M0, 0.0001));
|
||||
|
||||
M2.AddWithRowSliceValuesOf(M1, 0, 2);
|
||||
Matrix M4(2, 3);
|
||||
M4(0, 0) = 6; M4(0, 1) = 16; M4(0, 2) = 26;
|
||||
M4(1, 0) = 8; M4(1, 1) = 18; M4(1, 2) = 28;
|
||||
Assert::IsTrue(M2.IsEqualTo(M4, 0.0001));
|
||||
|
||||
Matrix M5, M6, M7, M8;
|
||||
M5.AssignRowSliceValuesOf(M0, 0, 2);
|
||||
M6.AssignRowSliceValuesOf(M0, 2, 1);
|
||||
M7.AssignRowSliceValuesOf(M0, 3, 2);
|
||||
|
||||
std::vector<const Matrix*> inputMatrices;
|
||||
inputMatrices.resize(3);
|
||||
inputMatrices[0] = &M5;
|
||||
inputMatrices[1] = &M6;
|
||||
inputMatrices[2] = &M7;
|
||||
M8.AssignRowStackValuesOf(inputMatrices, 0, 3);
|
||||
|
||||
Assert::IsTrue(M8.IsEqualTo(M0, 0.0001));
|
||||
}
|
||||
|
||||
TEST_METHOD(CPUAssignRepeatOf)
|
||||
|
|
|
@ -278,7 +278,7 @@ namespace CNTKMathTest
|
|||
Assert::IsTrue(M2.IsEqualTo(M3, 0.0001f));
|
||||
}
|
||||
|
||||
TEST_METHOD(GPUMatrixRowSlice)
|
||||
TEST_METHOD(GPUMatrixRowSliceAndStack)
|
||||
{
|
||||
float *fArray = new float[15];
|
||||
fArray[0] = 1; fArray[5] = 6; fArray[10] = 11;
|
||||
|
@ -308,6 +308,27 @@ namespace CNTKMathTest
|
|||
M3 += M0;
|
||||
M0.AddToRowSliceValuesOf(M1, 2,2);
|
||||
Assert::IsTrue(M3.IsEqualTo(M0, 0.0001));
|
||||
|
||||
M2.AddWithRowSliceValuesOf(M1, 0, 2);
|
||||
float *fArray4 = new float[6];
|
||||
fArray4[0] = 6; fArray4[2] = 16; fArray4[4] = 26;
|
||||
fArray4[1] = 8; fArray4[3] = 18; fArray4[5] = 28;
|
||||
GPUMatrix<float> M4(2, 3, fArray4, matrixFlagNormal);
|
||||
Assert::IsTrue(M2.IsEqualTo(M4, 0.0001));
|
||||
|
||||
GPUMatrix<float> M5, M6, M7, M8;
|
||||
M5.AssignRowSliceValuesOf(M0, 0, 2);
|
||||
M6.AssignRowSliceValuesOf(M0, 2, 1);
|
||||
M7.AssignRowSliceValuesOf(M0, 3, 2);
|
||||
|
||||
std::vector<const GPUMatrix<float> *> inputMatrices;
|
||||
inputMatrices.resize(3);
|
||||
inputMatrices[0] = &M5;
|
||||
inputMatrices[1] = &M6;
|
||||
inputMatrices[2] = &M7;
|
||||
M8.AssignRowStackValuesOf(inputMatrices, 0, 3);
|
||||
|
||||
Assert::IsTrue(M8.IsEqualTo(M0, 0.0001));
|
||||
}
|
||||
|
||||
TEST_METHOD(GPUKhatriRaoProduct)
|
||||
|
|
|
@ -429,6 +429,48 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return *this;
|
||||
}
|
||||
|
||||
//stack the columns in inputMatrices (starting from sliceStartCol for sliceNumCols columns) and assign it to [this] object.
|
||||
template<class ElemType>
|
||||
CPUMatrix<ElemType>& CPUMatrix<ElemType>::AssignRowStackValuesOf(const std::vector<const CPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
|
||||
{
|
||||
if (sliceNumCols == 0)
|
||||
LogicError("AssignRowStackValuesOf: sliceNumCols should > 0.");
|
||||
|
||||
size_t totalRows = 0;
|
||||
size_t* startRowIndeces = new size_t[inputMatrices.size()];
|
||||
startRowIndeces[0] = 0;
|
||||
for (int i = 0; i < inputMatrices.size(); i++)
|
||||
{
|
||||
const CPUMatrix<ElemType>& a = *inputMatrices[i];
|
||||
if (a.IsEmpty())
|
||||
LogicError("AssignRowStackValuesOf: input matrix (%d) is empty.", i);
|
||||
|
||||
if (a.GetNumCols() < sliceStartCol + sliceNumCols)
|
||||
LogicError("AssignRowStackValuesOf: input matrix (%d) GetNumCols() < sliceStartCol + sliceNumCols.", i);
|
||||
|
||||
totalRows += a.GetNumRows();
|
||||
if (i<inputMatrices.size()-1)
|
||||
startRowIndeces[i + 1] = startRowIndeces[i] + a.GetNumRows();
|
||||
}
|
||||
|
||||
Resize(totalRows, sliceNumCols);
|
||||
|
||||
auto& us = *this;
|
||||
|
||||
#pragma omp parallel for
|
||||
for (long j = 0; j<sliceNumCols; j++)
|
||||
{
|
||||
for (int i = 0; i < inputMatrices.size(); i++)
|
||||
{
|
||||
memcpy(&us(startRowIndeces[i], j), &(*inputMatrices[i])(0, sliceStartCol+j), inputMatrices[i]->GetNumRows() * sizeof(ElemType));
|
||||
}
|
||||
}
|
||||
|
||||
delete [] startRowIndeces;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void CPUMatrix<ElemType>::MinusOneAt(CPUMatrix<ElemType>& c, const size_t position)
|
||||
{
|
||||
|
@ -672,16 +714,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// if it's externally managed, then populate the structure
|
||||
if (matrixFlags&matrixFlagDontOwnBuffer)
|
||||
{
|
||||
// free previous array allocation if any before overwriting
|
||||
if (m_pArray != nullptr)
|
||||
delete [] m_pArray;
|
||||
|
||||
m_pArray = pArray;
|
||||
m_numRows = numRows;
|
||||
m_numCols = numCols;
|
||||
// free previous array allocation if any before overwriting
|
||||
if (m_pArray != nullptr)
|
||||
delete[] m_pArray;
|
||||
m_pArray = pArray;
|
||||
m_elemSizeAllocated = GetNumElements();
|
||||
m_externalBuffer = true;
|
||||
}
|
||||
|
@ -3877,7 +3916,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
template<class ElemType>
|
||||
void CPUMatrix<ElemType>::AssignNoiseContrastiveEstimation(const CPUMatrix<ElemType>& a,
|
||||
const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias, size_t sampleCount, CPUMatrix<ElemType>& tmp, CPUMatrix<ElemType>& c)
|
||||
const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias, CPUMatrix<ElemType>& tmp, CPUMatrix<ElemType>& c)
|
||||
//this: samples+probs
|
||||
// a: hidden
|
||||
// b: embedding
|
||||
|
@ -3892,7 +3931,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
std::cerr << endl;
|
||||
}
|
||||
*/
|
||||
sampleCount *= 1;
|
||||
double log_likelihood = 0.0;
|
||||
size_t sample_size = this->GetNumRows() / 2;
|
||||
size_t batch_size = this->GetNumCols();
|
||||
|
|
|
@ -216,7 +216,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
CPUMatrix<ElemType>& AssignVectorNorm2Of(CPUMatrix<ElemType>& a, const bool isColWise);
|
||||
|
||||
void AssignNoiseContrastiveEstimation(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias,
|
||||
size_t sampleCount, CPUMatrix<ElemType>& tmp, CPUMatrix<ElemType>& c);
|
||||
CPUMatrix<ElemType>& tmp, CPUMatrix<ElemType>& c);
|
||||
|
||||
void AssignNCEUnnormalizedEval(const CPUMatrix<ElemType>& a,
|
||||
const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias, CPUMatrix<ElemType>& c);
|
||||
|
@ -244,6 +244,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
CPUMatrix<ElemType>& AssignRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
|
||||
CPUMatrix<ElemType>& AddToRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
|
||||
CPUMatrix<ElemType>& AddWithRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
|
||||
CPUMatrix<ElemType>& AssignRowStackValuesOf(const std::vector<const CPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
|
||||
|
||||
CPUMatrix<ElemType>& AssignToRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
|
||||
|
||||
|
|
|
@ -678,6 +678,63 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return *this;
|
||||
}
|
||||
|
||||
//stack the columns in inputMatrices (starting from sliceStartCol for sliceNumCols columns) and assign it to [this] object.
|
||||
template<class ElemType>
|
||||
GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRowStackValuesOf(const std::vector<const GPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
|
||||
{
|
||||
if (sliceNumCols == 0)
|
||||
LogicError("AssignRowStackValuesOf: sliceNumCols should > 0.");
|
||||
|
||||
size_t totalRows = 0;
|
||||
size_t* startRowIndeces = new size_t[inputMatrices.size()+1];
|
||||
ElemType ** bufferPointersInInputMatrices = new ElemType*[inputMatrices.size()];
|
||||
|
||||
startRowIndeces[0] = 0;
|
||||
|
||||
for (int i = 0; i < inputMatrices.size(); i++)
|
||||
{
|
||||
const GPUMatrix<ElemType>& a = *inputMatrices[i];
|
||||
if (a.IsEmpty())
|
||||
LogicError("AssignRowStackValuesOf: input matrix (%d) is empty.", i);
|
||||
|
||||
if (a.GetNumCols() < sliceStartCol + sliceNumCols)
|
||||
LogicError("AssignRowStackValuesOf: input matrix (%d) GetNumCols() < sliceStartCol + sliceNumCols.", i);
|
||||
|
||||
totalRows += a.GetNumRows();
|
||||
startRowIndeces[i + 1] = startRowIndeces[i] + a.GetNumRows();
|
||||
|
||||
bufferPointersInInputMatrices[i] = a.m_pArray + a.LocateColumn(sliceStartCol);
|
||||
}
|
||||
|
||||
Resize(totalRows, sliceNumCols);
|
||||
|
||||
PrepareDevice();
|
||||
|
||||
ElemType** bufferPointersInGPU = NULL;
|
||||
CUDA_CALL(cudaMalloc((void***)&bufferPointersInGPU, inputMatrices.size()*sizeof(ElemType*)));
|
||||
CUDA_CALL(cudaMemcpy(bufferPointersInGPU, bufferPointersInInputMatrices, inputMatrices.size()*sizeof(ElemType*), cudaMemcpyHostToDevice));
|
||||
delete[] bufferPointersInInputMatrices;
|
||||
|
||||
size_t* startRowIndecesInGPU = NULL;
|
||||
CUDA_CALL(cudaMalloc((void**)&startRowIndecesInGPU, (1+inputMatrices.size())*sizeof(size_t)));
|
||||
CUDA_CALL(cudaMemcpy(startRowIndecesInGPU, startRowIndeces, (1+inputMatrices.size())*sizeof(size_t), cudaMemcpyHostToDevice));
|
||||
delete[] startRowIndeces;
|
||||
|
||||
LONG64 N = (LONG64)GetNumElements();
|
||||
int blocksPerGrid = (int)ceil(1.0*N / threadsPerBlock);
|
||||
cudaEvent_t done = nullptr;
|
||||
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
|
||||
_assignRowStackValuesOf<ElemType> << <blocksPerGrid, threadsPerBlock, 0, t_stream >> >(m_pArray, bufferPointersInGPU, startRowIndecesInGPU, (long) inputMatrices.size(), N, (long)GetNumRows(), (long)GetNumCols());
|
||||
if (do_sync) CUDA_CALL(cudaEventRecord(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
|
||||
|
||||
CUDA_CALL(cudaFree(bufferPointersInGPU));
|
||||
CUDA_CALL(cudaFree(startRowIndecesInGPU));
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// c = c - 1.0 for a specific position
|
||||
template<class ElemType>
|
||||
void GPUMatrix<ElemType>::MinusOneAt(GPUMatrix<ElemType>& c, const size_t position)
|
||||
|
|
|
@ -274,6 +274,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
GPUMatrix<ElemType>& AssignRowSliceValuesOf(const GPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
|
||||
GPUMatrix<ElemType>& AddToRowSliceValuesOf(const GPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
|
||||
GPUMatrix<ElemType>& AddWithRowSliceValuesOf(const GPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
|
||||
GPUMatrix<ElemType>& AssignRowStackValuesOf(const std::vector<const GPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
|
||||
|
||||
GPUMatrix<ElemType>& AssignRepeatOf(const GPUMatrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats);
|
||||
GPUMatrix<ElemType>& AssignPositiveAndShiftedNegSample(const GPUMatrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber);
|
||||
|
|
|
@ -377,6 +377,27 @@ __global__ void _addWithRowSliceValuesOf(ElemType * dest, ElemType * src, const
|
|||
dest[id] += src[IDX2C(row + startIndex, col, srcRows)];
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
__global__ void _assignRowStackValuesOf(ElemType * dest, ElemType ** srces, size_t* startRowIndeces, const LONG64 numSrces, const LONG64 N, const long destRows, const long destCols)
|
||||
{
|
||||
LONG64 id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (id >= N)
|
||||
return;
|
||||
|
||||
long col = id / destRows; //dest is the full matrix, rowslice is taken from the src
|
||||
long row = id - (col * destRows);
|
||||
|
||||
//can we replace the for loop with something better?
|
||||
int srcId = 0;
|
||||
for (; srcId < numSrces; srcId++)
|
||||
{
|
||||
if (startRowIndeces[srcId + 1]>row)
|
||||
break;
|
||||
}
|
||||
|
||||
dest[id] = srces[srcId][IDX2C(row - startRowIndeces[srcId], col, startRowIndeces[srcId+1] - startRowIndeces[srcId])];
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
__global__ void _assignRepeatOf(ElemType * dest, ElemType * src, const LONG64 N, const long srcRows, const long srcCols, const long destRows)
|
||||
{
|
||||
|
|
|
@ -79,16 +79,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
size_t MajorIndexCount() const
|
||||
{
|
||||
return MajorIndexCount(m_numRows, m_numCols, m_elemSizeAllocated, m_format);
|
||||
return MajorIndexCount(m_numRows, m_numCols, m_nz, m_format);
|
||||
}
|
||||
size_t MajorIndexCount(const size_t numRows, const size_t numCols, const size_t numNZReserved, const MatrixFormat format) const
|
||||
size_t MajorIndexCount(const size_t numRows, const size_t numCols, const size_t numNZ, const MatrixFormat format) const
|
||||
{
|
||||
if (format == matrixFormatSparseBlockCol)
|
||||
return numCols;
|
||||
else if (format == matrixFormatSparseBlockRow)
|
||||
return numRows;
|
||||
else
|
||||
return numNZReserved;
|
||||
return numNZ;
|
||||
}
|
||||
size_t MajorIndexSize() const // actual number of major index bytes in use
|
||||
{
|
||||
|
|
|
@ -1520,6 +1520,68 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return *this;
|
||||
}
|
||||
|
||||
//stack the columns in inputMatrices (starting from sliceStartCol for sliceNumCols columns) and assign it to [this] object.
|
||||
template<class ElemType>
|
||||
Matrix<ElemType>& Matrix<ElemType>::AssignRowStackValuesOf(const std::vector<const Matrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
|
||||
{
|
||||
for (int i = 0; i < inputMatrices.size(); i++)
|
||||
{
|
||||
const Matrix<ElemType>& a = *inputMatrices[i];
|
||||
DecideAndMoveToRightDevice(*this, a);
|
||||
|
||||
//WARNING: a and this must have same type
|
||||
if (!(GetMatrixType() == a.GetMatrixType()))
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
CurrentDataLocation curLocation = GetCurrentMatrixLocation();
|
||||
if (curLocation == CurrentDataLocation::GPU || curLocation == CurrentDataLocation::BOTH)
|
||||
{
|
||||
if (GetMatrixType() != MatrixType::SPARSE)
|
||||
{
|
||||
//GPUDense;
|
||||
std::vector<const GPUMatrix<ElemType>*> gpuInputMatrices;
|
||||
gpuInputMatrices.resize(inputMatrices.size());
|
||||
for (int i = 0; i < inputMatrices.size(); i++)
|
||||
gpuInputMatrices[i] = inputMatrices[i]->m_GPUMatrix;
|
||||
|
||||
m_GPUMatrix->AssignRowStackValuesOf(gpuInputMatrices, sliceStartCol, sliceNumCols);
|
||||
|
||||
SetDataLocation(CurrentDataLocation::GPU, MatrixType::DENSE);
|
||||
}
|
||||
else
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
}
|
||||
else if (curLocation == CurrentDataLocation::CPU)
|
||||
{
|
||||
if (GetMatrixType() != MatrixType::SPARSE)
|
||||
{
|
||||
//CPUDense;
|
||||
std::vector<const CPUMatrix<ElemType>*> cpuInputMatrices;
|
||||
cpuInputMatrices.resize(inputMatrices.size());
|
||||
for (int i = 0; i < inputMatrices.size(); i++)
|
||||
cpuInputMatrices[i] = inputMatrices[i]->m_CPUMatrix;
|
||||
|
||||
m_CPUMatrix->AssignRowStackValuesOf(cpuInputMatrices, sliceStartCol, sliceNumCols);
|
||||
|
||||
SetDataLocation(CurrentDataLocation::CPU, MatrixType::DENSE);
|
||||
}
|
||||
else
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("Matrices do not exist in either CPU or GPU.");
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
template<class ElemType>
|
||||
Matrix<ElemType>& Matrix<ElemType>::AssignRepeatOf(const Matrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats)
|
||||
{
|
||||
|
@ -3600,7 +3662,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
size_t sampleCount = a.m_CPUMatrix->GetNumElements() / a.m_CPUMatrix->GetNumRows();
|
||||
tmp.Resize(a.GetNumRows() / 2, sampleCount);
|
||||
a.m_CPUMatrix->AssignNoiseContrastiveEstimation(*b.m_CPUMatrix, *c.m_CPUMatrix, *bias.m_CPUMatrix, sampleCount, *tmp.m_CPUMatrix, *this->m_CPUMatrix);
|
||||
a.m_CPUMatrix->AssignNoiseContrastiveEstimation(*b.m_CPUMatrix, *c.m_CPUMatrix, *bias.m_CPUMatrix, *tmp.m_CPUMatrix, *this->m_CPUMatrix);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -259,6 +259,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
Matrix<ElemType>& AssignRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
|
||||
Matrix<ElemType>& AddToRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
|
||||
Matrix<ElemType>& AddWithRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
|
||||
Matrix<ElemType>& AssignRowStackValuesOf(const std::vector<const Matrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
|
||||
|
||||
Matrix<ElemType>& AssignRepeatOf(const Matrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats);
|
||||
Matrix<ElemType>& AssignPositiveAndShiftedNegSample(const Matrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber);
|
||||
|
|
|
@ -479,6 +479,7 @@ namespace Microsoft {
|
|||
//for each column of a, we add all rows of a to this starting from startIndex
|
||||
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AddToRowSliceValuesOf(const GPUMatrix<ElemType>& /*a*/, const size_t startIndex, const size_t numRows) { return *this; }
|
||||
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AddWithRowSliceValuesOf(const GPUMatrix<ElemType>& /*a*/, const size_t startIndex, const size_t numRows) { return *this; }
|
||||
GPUMatrix<ElemType>& AssignRowStackValuesOf(const std::vector<const GPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols) { return *this; }
|
||||
|
||||
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRepeatOf(const GPUMatrix<ElemType>& /*a*/, const size_t numRowRepeats, const size_t numColRepeats) { return *this; }
|
||||
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignPositiveAndShiftedNegSample(const GPUMatrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber) { return *this; }
|
||||
|
|
|
@ -0,0 +1,234 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Setting some default values
|
||||
BUILD=1
|
||||
RUN=1
|
||||
CLEAN_AFTER=0
|
||||
CLEAN_BEFORE=0
|
||||
|
||||
# parsing command line arguments:
|
||||
while [[ $# > 0 ]]
|
||||
do
|
||||
key="$1"
|
||||
|
||||
case $key in
|
||||
-h|--help)
|
||||
echo "Usage: build-and-test [options]"
|
||||
echo "Options:"
|
||||
echo " -q|--quiet-build - redirect build output to file (by default those will be in <cntk_root>.run-<operating_system>-*)"
|
||||
echo " -r|--run-only - elides build step, runs the binaries that have already been built"
|
||||
echo " -b|--build-only - just build, do not run"
|
||||
echo " -cb|--clean-build - clean up the enlistment binaries before build"
|
||||
echo " -o|--output-directory <output_dir> - specify output directory to use"
|
||||
echo "The root directory used to build and run CNTK is hosts the Scripts directory that contains this script"
|
||||
exit 1
|
||||
;;
|
||||
-q|--quiet)
|
||||
QUIET_BUILD=1
|
||||
;;
|
||||
-r|--run-only)
|
||||
BUILD=0
|
||||
RUN=1
|
||||
;;
|
||||
-b|--build-only)
|
||||
BUILD=1
|
||||
RUN=0
|
||||
;;
|
||||
-cb|--clean-build)
|
||||
CLEAN_BEFORE=1
|
||||
BUILD=1
|
||||
;;
|
||||
-o|--output-directory)
|
||||
OUTPUT_DIR="$2"
|
||||
shift # past argument
|
||||
;;
|
||||
*)
|
||||
echo Unkown option $key
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift # past argument or value
|
||||
done
|
||||
|
||||
# Step 0 -- Validate all necessary prerequisites and check for incompatible options
|
||||
# It is possible to use this script on Windows to build CNTK
|
||||
# from Cygwin window with Visual C++ environment loaded.
|
||||
# In that case OS environment variable will be set and we
|
||||
# can use it to differentiate from Linux.
|
||||
if [[ $CLEAN_BEFORE == 1 && $RUN == 1 && $BUILD == 0 ]]; then
|
||||
echo "============ ERROR: Incompatible options RUN and CLEAN_BEFORE set without BUILD ============"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $OS == "Windows_NT" && $OSTYPE == "cygwin" ]]; then
|
||||
DEBUG_DIR=Debug
|
||||
RELEASE_DIR=Release
|
||||
PREFIX_DIR=x64
|
||||
BIN_NAME=CNTK.exe
|
||||
BUILD_OS="windows"
|
||||
|
||||
if [[ $VS120COMNTOOLS == "" ]]; then
|
||||
echo "============ Visual Studio 12.0 environment not properly setup or VS not installed ============"
|
||||
echo "============ Please find and run the appropriate vcvarsall.bat script ============"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $ACML_PATH == "" ]]; then
|
||||
echo "============ ACML path not set ============"
|
||||
echo "============ ACML libraries are needed to successfully build CNTK ============"
|
||||
exit 1
|
||||
fi
|
||||
elif [[ $OSTYPE == "linux-gnu" ]]; then
|
||||
DEBUG_DIR=x86_64.gpu.debug.acml
|
||||
RELEASE_DIR=x86_64.gpu.release.acml
|
||||
PREFIX_DIR=bin
|
||||
BIN_NAME=cntk
|
||||
MAKEFILE=Makefile.gpu
|
||||
BUILD_OS="linux"
|
||||
else
|
||||
echo "============ ERROR: Unsupported OS ============"
|
||||
echo "============ Scripts supports only building from Linux and Windows through Cygwin ============"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Step 1 -- Prepare temporary folders and files, tweak settings if necessary
|
||||
|
||||
# Get to the root path from which we know how to build and run
|
||||
SCRIPT=`readlink -f $0`
|
||||
SCRIPT_DIR=`dirname $SCRIPT`
|
||||
CNTK_ROOT=`dirname $SCRIPT_DIR`
|
||||
|
||||
# Setup the output directory
|
||||
if [[ $OUTPUT_DIR == "" ]]; then
|
||||
OUTPUT_DIR="$CNTK_ROOT/.run-$BUILD_OS-$RANDOM"
|
||||
fi
|
||||
|
||||
echo "============ Creating CNTK temp directory in $TMP_ROOT ============"
|
||||
mkdir -p $OUTPUT_DIR || exit $?
|
||||
|
||||
CONF_FILE="$OUTPUT_DIR/Simple.conf"
|
||||
BUILD_FILE="$OUTPUT_DIR/Build"
|
||||
RUN_FILE="$OUTPUT_DIR/Result"
|
||||
|
||||
if ! [[ -d "$CNTK_ROOT/MachineLearning" ]]; then
|
||||
echo "============ ERROR: Build script located in the wrong directory ($SCRIPT_DIR) ============"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd $CNTK_ROOT
|
||||
|
||||
if ! [[ -f $CONF_FILE ]]; then
|
||||
cp Demos/Simple/Simple.config $CONF_FILE || exit $?
|
||||
|
||||
# This chmod is necessary due to restrictive Cygwin interpretation of Windows permissions.
|
||||
# Cygwin interprets Windows permissions as ----rwx---, which lacks read permissions for user.
|
||||
chmod a+r $CONF_FILE || exit $?
|
||||
fi
|
||||
|
||||
if [[ $QUIET_BUILD == 1 ]]; then
|
||||
echo "============ WARNING: You have selected quiet build. All build output will be placed in ($OUTPUT_DIR) ============"
|
||||
fi
|
||||
|
||||
# Step 2 -- Build the project debug and release, if requested
|
||||
if [[ $BUILD == 1 ]]; then
|
||||
# Step 2 -- Perform necessary builds
|
||||
for FLAVOR in debug release
|
||||
do
|
||||
# Our make is too noisy right now and it is difficult to spot
|
||||
# issues from stdout and stderr. In the quiet mode these are
|
||||
# redirected to a file where they could be examined after the fact
|
||||
if [[ $QUIET_BUILD == 1 ]]; then
|
||||
exec 6>$BUILD_FILE.$FLAVOR.out || exit $?
|
||||
exec 7>$BUILD_FILE.$FLAVOR.err || exit $?
|
||||
else
|
||||
exec 6>&1 || exit $?
|
||||
exec 7>&2 || exit $?
|
||||
fi
|
||||
|
||||
echo "============ Building CNTK $FLAVOR (clean=$CLEAN_BEFORE) ============"
|
||||
|
||||
if [[ $OS == "Windows_NT" ]]; then
|
||||
if [[ $CLEAN_BEFORE == 1 ]]; then
|
||||
msbuild.exe /property:Configuration=$FLAVOR /t:Clean 1>&6 2>&7 || exit $?
|
||||
fi
|
||||
msbuild.exe /property:Configuration=$FLAVOR /m 1>&6 2>&7 || exit $?
|
||||
else
|
||||
if [[ $CLEAN_BEFORE == 1 ]]; then
|
||||
make BUILDTYPE=$FLAVOR -f $MAKEFILE clean 1>&6 2>&7 || exit $?
|
||||
fi
|
||||
make BUILDTYPE=$FLAVOR -j -f $MAKEFILE 1>&6 2>&7 || exit $?
|
||||
fi
|
||||
chmod a+r $BUILD_FILE.*
|
||||
done
|
||||
fi
|
||||
|
||||
# Step 3 -- Run the project tests, both debug and release, if requested
|
||||
if [[ $RUN == 1 ]]; then
|
||||
if ! [[ -f "$CNTK_ROOT/$PREFIX_DIR/$DEBUG_DIR/$BIN_NAME" && -f "$CNTK_ROOT/$PREFIX_DIR/$RELEASE_DIR/$BIN_NAME" ]]; then
|
||||
echo "============ ERROR: CNTK did not build properly ============"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd $PREFIX_DIR
|
||||
|
||||
for TARGET in CPU GPU
|
||||
do
|
||||
# These sed scripts are simply toggling DeviceNumber argument in the config file
|
||||
# If it is set to Auto, it will pick GPU over CPU. At -1 CPU is selected.
|
||||
if [[ $TARGET == CPU ]]; then
|
||||
sed -i -e 's/^DeviceNumber.*/DeviceNumber=-1/g' $CONF_FILE || exit $?
|
||||
else
|
||||
sed -i -e 's/^DeviceNumber.*/DeviceNumber=Auto/g' $CONF_FILE || exit $?
|
||||
fi
|
||||
|
||||
for FLAVOR in debug release
|
||||
do
|
||||
if [[ FLAVOR == "debug" ]]; then
|
||||
FLAVOR_DIR="$DEBUG_DIR"
|
||||
else
|
||||
FLAVOR_DIR="$RELEASE_DIR"
|
||||
fi
|
||||
OUT_FILE="$RUN_FILE.$FLAVOR.out"
|
||||
|
||||
echo "============ Running CNTK for ($FLAVOR) ($TARGET), output in ($RUN_FILE.*) ============"
|
||||
rm -rf models
|
||||
if [[ $OS == "Windows_NT" ]]; then
|
||||
# We have to use cygpath on Windows to modify the file paths into the format readable by cntk.
|
||||
time ./$FLAVOR_DIR/$BIN_NAME configFile="`cygpath -w $CONF_FILE`" &>$OUT_FILE || exit $?
|
||||
else
|
||||
time ./$FLAVOR_DIR/$BIN_NAME configFile=$CONF_FILE &>$OUT_FILE || exit $?
|
||||
fi
|
||||
chmod a+r $RUN_FILE.*
|
||||
|
||||
# Check if execution was successful
|
||||
grep -q "Using $TARGET" "$OUT_FILE" || {
|
||||
echo "============ ERROR: Run output (in $OUT_FILE) did not contain information about target device ($TARGET) ============"
|
||||
exit 1
|
||||
}
|
||||
|
||||
grep -q "EXCEPTION" "$OUT_FILE" && {
|
||||
echo "============ ERROR: Run output in ($OUT_FILE) contains exceptions ============"
|
||||
grep "EXCEPTION" "$OUT_FILE"
|
||||
exit 1
|
||||
}
|
||||
done
|
||||
done
|
||||
fi
|
||||
|
||||
# Step 5 -- Optionally clean after builds and tests
|
||||
if [[ $CLEAN_AFTER == 1 ]]; then
|
||||
rm -rf models
|
||||
cd $CNTK_ROOT
|
||||
for FLAVOR in debug release
|
||||
do
|
||||
echo "============ Cleaning up CNTK $FLAVOR ============"
|
||||
if [[ $OS == "Windows_NT" ]]; then
|
||||
msbuild.exe /property:Configuration=$FLAVOR /t:clean 1>&6 2>&7 || exit $?
|
||||
else
|
||||
make BUILDTYPE=$FLAVOR -f $MAKEFILE clean 1>&6 2>&7 || exit $?
|
||||
fi
|
||||
done
|
||||
rm -rf $OUTPUT_DIR
|
||||
fi
|
||||
|
||||
echo "============ Build and test of CNTK was successful! ============"
|
Загрузка…
Ссылка в новой задаче