Merge with new changes including RowStackNode

This commit is contained in:
kaisheny 2015-06-16 16:01:19 -07:00
Родитель 01468f3fb6 99af4139a5
Коммит f332421b7b
32 изменённых файлов: 810 добавлений и 134 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -15,6 +15,7 @@ x64/
build/
[Bb]in/
[Oo]bj/
.run-*
# Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets
!packages/*/build/

Просмотреть файл

@ -47,8 +47,8 @@ BinaryWriter<ElemType>::~BinaryWriter()
// miniBatchMode=Partial
// randomize=None
// wfile=c:\speech\mnist\mnist_test.bin
// #wsize - inital size of the file in MB
// # if calculated size would be bigger, that is used instead
// #wsize - inital size of the file in MB default to 256
// # has to be large enough for your dataset. the file will shrink to the actual size when closed.
// #wsize=256
// #wrecords - number of records we should allocate space for in the file
// # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file

Просмотреть файл

@ -980,8 +980,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
// dereference matrix that corresponds to key (input/output name) and
// populate based on whether its a feature or a label
//Matrix<ElemType>& data =
*matrices[iter->first]; // can be features or labels
//Matrix<ElemType>& data = *matrices[iter->first]; // can be features or labels
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
{
@ -1058,8 +1057,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
// dereference matrix that corresponds to key (input/output name) and
// populate based on whether its a feature or a label
//Matrix<ElemType>& data =
*matrices[iter->first]; // can be features or labels
//Matrix<ElemType>& data =*matrices[iter->first]; // can be features or labels
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
{
@ -1134,8 +1132,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
// dereference matrix that corresponds to key (input/output name) and
// populate based on whether its a feature or a label
//Matrix<ElemType>& data =
*matrices[iter->first]; // can be features or labels
//Matrix<ElemType>& data = *matrices[iter->first]; // can be features or labels
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
{

Просмотреть файл

@ -142,6 +142,15 @@ extern void _CHECKED_ASSERT_error(const char * file, int line, const char * exp)
#endif
#endif
/**
These macros are used for sentence segmentation information.
*/
#define SENTENCE_BEGIN 0
#define SENTENCE_MIDDLE 1
#define NO_LABELS -1
#define EXISTS_SENTENCE_BEGIN_OR_NO_LABELS 0
#define NO_EXISTS_SENTENCE_BEGIN_OR_NO_LABELS 1
// ----------------------------------------------------------------------------
// basic data types
// ----------------------------------------------------------------------------

Просмотреть файл

@ -382,47 +382,58 @@ public:
// TODO: we can store labels more efficiently now since we don't do frame-wise random access anymore.
// OK, utterance has all we need --remember it
utteranceset.push_back (std::move (utterance));
if (m==0)
{
_totalframes += uttframes;
framesaccum.push_back(uttframes); //track number of frames in each utterance - first feature is the reference
if (!labels.empty() && !lacksmlf)
//if (!labels.empty() && labelsiter != labels[0].end())
{
foreach_index (j, labels)
// first verify that all the label files have the proper duration
bool durationmatch = true;
foreach_index(j, labels)
{
const auto & labseq = labels[j].find(key)->second;
// check if durations match; skip if not
size_t labframes = labseq.empty() ? 0 : (labseq[labseq.size()-1].firstframe + labseq[labseq.size()-1].numframes);
size_t labframes = labseq.empty() ? 0 : (labseq[labseq.size() - 1].firstframe + labseq[labseq.size() - 1].numframes);
if (labframes != uttframes)
{
fprintf (stderr, " [duration mismatch (%d in label vs. %d in feat file), skipping %S]", labframes, uttframes, key.c_str());
fprintf(stderr, " [duration mismatch (%d in label vs. %d in feat file), skipping %S]", labframes, uttframes, key.c_str());
nomlf++;
continue; // skip this utterance at all
durationmatch = false;
break; // continue; // skip this utterance at all
}
// expand classid sequence into flat array
foreach_index (i, labseq)
}
if (durationmatch){
utteranceset.push_back(std::move(utterance));
_totalframes += uttframes;
framesaccum.push_back(uttframes); //track number of frames in each utterance - first feature is the reference
// then parse each mlf if the durations are consistent
foreach_index(j, labels)
{
const auto & e = labseq[i];
if ((i > 0 && labseq[i-1].firstframe + labseq[i-1].numframes != e.firstframe) || (i == 0 && e.firstframe != 0))
throw std::runtime_error (msra::strfun::strprintf ("minibatchutterancesource: labels not in consecutive order MLF in label set: %S", key.c_str()));
if (e.classid >= udim[j])
throw std::runtime_error (msra::strfun::strprintf ("minibatchutterancesource: class id %d exceeds model output dimension %d in file %S", e.classid, udim, key.c_str()));
if (e.classid != (CLASSIDTYPE) e.classid)
throw std::runtime_error ("CLASSIDTYPE has too few bits");
for (size_t t = e.firstframe; t < e.firstframe + e.numframes; t++)
classids[j]->push_back ((CLASSIDTYPE) e.classid);
numclasses[j] = max (numclasses[j], 1u + e.classid);
counts[j].resize (numclasses[j], 0);
counts[j][e.classid] += e.numframes;
}
classids[j]->push_back ((CLASSIDTYPE) -1); // append a boundary marker marker for checking
const auto & labseq = labels[j].find(key)->second;
// expand classid sequence into flat array
foreach_index(i, labseq)
{
const auto & e = labseq[i];
if ((i > 0 && labseq[i - 1].firstframe + labseq[i - 1].numframes != e.firstframe) || (i == 0 && e.firstframe != 0))
throw std::runtime_error(msra::strfun::strprintf("minibatchutterancesource: labels not in consecutive order MLF in label set: %S", key.c_str()));
if (e.classid >= udim[j])
throw std::runtime_error(msra::strfun::strprintf("minibatchutterancesource: class id %d exceeds model output dimension %d in file %S", e.classid, udim, key.c_str()));
if (e.classid != (CLASSIDTYPE)e.classid)
throw std::runtime_error("CLASSIDTYPE has too few bits");
for (size_t t = e.firstframe; t < e.firstframe + e.numframes; t++)
classids[j]->push_back((CLASSIDTYPE)e.classid);
numclasses[j] = max(numclasses[j], 1u + e.classid);
counts[j].resize(numclasses[j], 0);
counts[j][e.classid] += e.numframes;
}
if (!labels[j].empty() && classids[j]->size() != _totalframes + utteranceset.size())
throw std::logic_error (msra::strfun::strprintf ("minibatchutterancesource: label duration inconsistent with feature file in MLF label set: %S", key.c_str()));
assert (labels[j].empty() || classids[j]->size() == _totalframes + utteranceset.size());
classids[j]->push_back((CLASSIDTYPE)-1); // append a boundary marker marker for checking
if (!labels[j].empty() && classids[j]->size() != _totalframes + utteranceset.size())
throw std::logic_error(msra::strfun::strprintf("minibatchutterancesource: label duration inconsistent with feature file in MLF label set: %S", key.c_str()));
assert(labels[j].empty() || classids[j]->size() == _totalframes + utteranceset.size());
}
}
}
else{
@ -451,7 +462,7 @@ public:
}
if (nomlf + nolat > 0)
{
fprintf (stderr, "minibatchutterancesource: out of %d files, %d files not found in label set and %d have no lattice\n", infiles.size(), nomlf, nolat);
fprintf (stderr, "minibatchutterancesource: out of %d files, %d files not found in label set and %d have no lattice\n", infiles[0].size(), nomlf, nolat);
if (nomlf + nolat > infiles[m].size() / 2)
throw std::runtime_error ("minibatchutterancesource: too many files not found in label set--assuming broken configuration\n");
}

Просмотреть файл

@ -24,6 +24,7 @@
#define DATAREADER_EXPORTS // creating the exports here
#include "DataReader.h"
#include "HTKMLFReader.h"
#include "commandArgUtil.h"
#ifdef LEAKDETECT
#include <vld.h> // for memory leak detection
#endif

Просмотреть файл

@ -28,6 +28,7 @@
#include "DataWriter.h"
#include "commandArgUtil.h"
#include "HTKMLFWriter.h"
#include "commandArgUtil.h"
#ifdef LEAKDETECT
#include <vld.h> // for memory leak detection
#endif

Просмотреть файл

@ -2048,6 +2048,10 @@ void BatchSequenceReader<ElemType>::GetLabelOutput(std::map<std::wstring,
{
RuntimeError("GetLabelOutput::should use CPU for labels ");
}
if (curDevId != CPUDEVICE)
{
labels->TransferFromDeviceToDevice(CPUDEVICE, curDevId, true, false, false);
}
}
template<class ElemType>

Просмотреть файл

@ -11,6 +11,11 @@
#include <stdexcept>
#include <stdint.h>
#if WIN32
#define ftell64 _ftelli64
#else
#define ftell64 ftell
#endif
// SetState for a particular value
template <typename NumType, typename LabelType>
@ -362,10 +367,10 @@ void UCIParser<NumType, LabelType>::ParseInit(LPCWSTR fileName, size_t startFeat
errno_t err = _wfopen_s( &m_pFile, fileName, L"rb" );
if (err)
std::runtime_error("UCIParser::ParseInit - error opening file");
throw std::runtime_error("UCIParser::ParseInit - error opening file");
int rc = _fseeki64(m_pFile, 0, SEEK_END);
if (rc)
std::runtime_error("UCIParser::ParseInit - error seeking in file");
throw std::runtime_error("UCIParser::ParseInit - error seeking in file");
m_fileSize = GetFilePosition();
m_fileBuffer = new BYTE[m_bufferSize];
@ -377,9 +382,9 @@ void UCIParser<NumType, LabelType>::ParseInit(LPCWSTR fileName, size_t startFeat
template <typename NumType, typename LabelType>
int64_t UCIParser<NumType, LabelType>::GetFilePosition()
{
int64_t position = _ftelli64(m_pFile);
int64_t position = ftell64(m_pFile);
if (position == -1L)
std::runtime_error("UCIParser::GetFilePosition - error retrieving file position in file");
throw std::runtime_error("UCIParser::GetFilePosition - error retrieving file position in file");
return position;
}
@ -392,7 +397,7 @@ void UCIParser<NumType, LabelType>::SetFilePosition(int64_t position)
{
int rc = _fseeki64(m_pFile, position, SEEK_SET);
if (rc)
std::runtime_error("UCIParser::SetFilePosition - error seeking in file");
throw std::runtime_error("UCIParser::SetFilePosition - error seeking in file");
// setup state machine to start at this position
PrepareStartPosition(position);
@ -445,7 +450,7 @@ size_t UCIParser<NumType, LabelType>::UpdateBuffer()
size_t bytesToRead = min(m_bufferSize, m_fileSize-m_bufferStart)-saveBytes;
size_t bytesRead = fread(m_fileBuffer+saveBytes, 1, bytesToRead, m_pFile);
if (bytesRead == 0 && ferror(m_pFile))
std::runtime_error("UCIParser::UpdateBuffer - error reading file");
throw std::runtime_error("UCIParser::UpdateBuffer - error reading file");
return bytesRead;
}

Просмотреть файл

@ -90,8 +90,8 @@ private:
int m_elementsConvertedThisLine;
// global stats
int m_totalNumbersConverted;
int m_totalLabelsConverted;
int64_t m_totalNumbersConverted;
int64_t m_totalLabelsConverted;
// file positions/buffer
FILE * m_pFile;

Просмотреть файл

@ -1,8 +1,9 @@
# command=Simple_Demo_Output
RootDir=..
command=Simple_Demo:Simple_Demo_Output
# deviceId=-1 for CPU, >=0 for GPU devices
DeviceNumber=-1
#stderr=Demo
precision=float
@ -13,7 +14,6 @@ deviceId=$DeviceNumber$
outputNodeNames=ScaledLogLikelihood
traceLevel=1
#######################################
# TRAINING CONFIG (Simple, Fixed LR) #
#######################################
@ -52,22 +52,22 @@ Simple_Demo=[
reader=[
# reader to use
readerType=UCIFastReader
file=../Demos/Simple/SimpleDataTrain.txt
file=$RootDir$/Demos/Simple/SimpleDataTrain.txt
miniBatchMode=Partial
randomize=Auto
verbosity=1
features=[
dim=2 # two-dimensional input data
dim=2 # two-dimensional input data
start=0 # Start with first element on line
]
labels=[
start=2 # Skip two elements
start=2 # Skip two elements
dim=1 # One label dimension
labelDim=2 # Two labels possible
labelMappingFile=../Demos/Simple/SimpleMapping.txt
labelMappingFile=$RootDir$/Demos/Simple/SimpleMapping.txt
]
]
]
@ -84,16 +84,16 @@ Simple_Demo_Output=[
reader=[
# reader to use
readerType=UCIFastReader
file=../Demos/Simple/SimpleDataTest.txt
file=$RootDir$/Demos/Simple/SimpleDataTest.txt
features=[
dim=2
start=0
start=0
]
labels=[
start=2
start=2
dim=1
labelDim=2
labelMappingFile=../Demos/Simple/SimpleMapping.txt
labelMappingFile=$RootDir$/Demos/Simple/SimpleMapping.txt
]
]
outputPath=SimpleOutput # Dump output as text

Просмотреть файл

@ -550,41 +550,38 @@ public:
}
ComputationNodePtr nodePtr = GetNodeFromName(nodeName);
ComputationNodePtr childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3, childNodePtr4;
switch (numChildren)
std::vector<ComputationNodePtr> childrenNodes;
childrenNodes.resize(numChildren);
for (int j = 0; j < numChildren; j++)
childrenNodes[j] = GetNodeFromName(childrenNames[j]);
if (nodePtr->OperationName() == RowStackNode<ElemType>::TypeName()) //allow for variable input nodes
nodePtr->AttachInputs(childrenNodes);
else //fixed input nodes
{
case 1:
childNodePtr0 = GetNodeFromName(childrenNames[0]);
nodePtr->AttachInputs(childNodePtr0);
break;
case 2:
childNodePtr0 = GetNodeFromName(childrenNames[0]);
childNodePtr1 = GetNodeFromName(childrenNames[1]);
nodePtr->AttachInputs(childNodePtr0, childNodePtr1);
break;
case 3:
childNodePtr0 = GetNodeFromName(childrenNames[0]);
childNodePtr1 = GetNodeFromName(childrenNames[1]);
childNodePtr2 = GetNodeFromName(childrenNames[2]);
nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2);
break;
case 4:
childNodePtr0 = GetNodeFromName(childrenNames[0]);
childNodePtr1 = GetNodeFromName(childrenNames[1]);
childNodePtr2 = GetNodeFromName(childrenNames[2]);
childNodePtr3 = GetNodeFromName(childrenNames[3]);
nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3);
break;
case 5:
childNodePtr0 = GetNodeFromName(childrenNames[0]);
childNodePtr1 = GetNodeFromName(childrenNames[1]);
childNodePtr2 = GetNodeFromName(childrenNames[2]);
childNodePtr3 = GetNodeFromName(childrenNames[3]);
childNodePtr4 = GetNodeFromName(childrenNames[4]);
nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3, childNodePtr4);
break;
default:
throw std::logic_error("Invalid number of children.");
switch (numChildren)
{
case 1:
nodePtr->AttachInputs(childrenNodes[0]);
break;
case 2:
nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1]);
break;
case 3:
nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2]);
break;
case 4:
nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2], childrenNodes[3]);
break;
case 5:
nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2], childrenNodes[3], childrenNodes[4]);
break;
case 6:
nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2], childrenNodes[3], childrenNodes[4], childrenNodes[5]);
break;
default:
throw std::logic_error("Invalid number of children.");
}
}
}
}
@ -1028,6 +1025,8 @@ public:
newNode = new LookupTableNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
else if (nodeType == RowSliceNode<ElemType>::TypeName())
newNode = new RowSliceNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
else if (nodeType == RowStackNode<ElemType>::TypeName())
newNode = new RowStackNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
else if (nodeType == GMMLogLikelihoodNode<ElemType>::TypeName())
newNode = new GMMLogLikelihoodNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
else if (nodeType == SequenceDecoderNode<ElemType>::TypeName())
@ -1209,6 +1208,8 @@ public:
newNode = new CosDistanceWithNegativeSamplesNode<ElemType>(m_deviceId, nodeName);
else if (nodeType == ParallelNode<ElemType>::TypeName())
newNode = new ParallelNode<ElemType>(m_deviceId, nodeName);
else if (nodeType == RowStackNode<ElemType>::TypeName())
newNode = new RowStackNode<ElemType>(m_deviceId, nodeName);
else
{
fprintf(stderr, "Error creating new ComputationNode of type %ls, with name %ls\n", nodeType.c_str(), nodeName.c_str());
@ -1582,6 +1583,15 @@ public:
return newNode;
}
ComputationNodePtr RowStack(const std::vector<ComputationNodePtr> inputs, const std::wstring nodeName = L"")
{
ComputationNodePtr newNode(new RowStackNode<ElemType>(m_deviceId, nodeName));
newNode->AttachInputs(inputs);
AddNodeToNet(newNode);
return newNode;
}
ComputationNodePtr GMMLogLikelihood(const ComputationNodePtr unnormedPrior, const ComputationNodePtr mean, const ComputationNodePtr logStddev, const ComputationNodePtr feature, const std::wstring nodeName = L"")
{
ComputationNodePtr newNode(new GMMLogLikelihoodNode<ElemType>(m_deviceId, nodeName));

Просмотреть файл

@ -158,6 +158,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
throw std::logic_error("This operation does not support six inputs.");
}
virtual void AttachInputs(const std::vector<ComputationNodePtr>& /*inputs*/)
{
throw std::logic_error("This operation does not support variable-length inputs.");
}
virtual void DetachInputs()
{
m_children.resize(0);

Просмотреть файл

@ -399,6 +399,167 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template class RowSliceNode<float>;
template class RowSliceNode<double>;
//this node is used to extract part of the input by rows as the output
//it has to be continuous segments of rows since each column is treated as one sample
template<class ElemType>
class RowStackNode : public ComputationNode<ElemType>
{
UsingComputationNodeMembers;
public:
RowStackNode(const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId)
{
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
m_deviceId = deviceId;
MoveMatricesToDevice(deviceId);
InitRecurrentNode();
}
RowStackNode(File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId)
{
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
LoadFromFile(fstream, modelVersion, deviceId);
}
// copy constructor
RowStackNode(const RowStackNode<ElemType>* node, const std::wstring& newName, const CopyNodeFlags flags) : ComputationNode<ElemType>(node->m_deviceId)
{
node->CopyTo(this, newName, flags);
}
virtual ComputationNodePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) const
{
const std::wstring& name = (newName == L"") ? NodeName() : newName;
ComputationNodePtr node = new RowStackNode<ElemType>(this, name, flags);
return node;
}
virtual void CopyTo(const ComputationNodePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const
{
ComputationNode<ElemType>::CopyTo(nodeP, newName, flags);
RowStackNode<ElemType>* node = (RowStackNode<ElemType>*) nodeP;
if (flags & CopyNodeFlags::copyNodeChildren)
{
node->m_children = m_children;
node->m_startRowIndeces = m_startRowIndeces;
node->m_inputMatrices = m_inputMatrices;
}
}
virtual const std::wstring OperationName() const { return TypeName(); }
static const std::wstring TypeName() { return L"RowStack"; }
virtual void ComputeInputPartial(const size_t inputIndex)
{
if (inputIndex >= ChildrenSize())
throw std::invalid_argument("RowStack-ComputeInputPartial: inputIndex out of range.");
ComputeInputPartialS(Inputs(inputIndex)->GradientValues(), GradientValues(), m_startRowIndeces[inputIndex], m_startRowIndeces[inputIndex + 1] - m_startRowIndeces[inputIndex]);
}
virtual void ComputeInputPartial(const size_t inputIndex, const size_t timeIdxInSeq)
{
if (inputIndex >= ChildrenSize())
throw std::invalid_argument("RowStack-ComputeInputPartial: inputIndex out of range.");
Matrix<ElemType> sliceInputGrad = Inputs(inputIndex)->GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
Matrix<ElemType> sliceOutputGrad = GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startRowIndeces[inputIndex], m_startRowIndeces[inputIndex+1] - m_startRowIndeces[inputIndex]);
}
static void WINAPI ComputeInputPartialS(Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const size_t startIndex, const size_t numRows)
{
inputGradientValues.AddWithRowSliceValuesOf(gradientValues, startIndex, numRows);
}
virtual void EvaluateThisNode()
{
EvaluateThisNodeS(m_functionValues, m_inputMatrices, 0, Inputs(0)->FunctionValues().GetNumCols());
}
virtual void EvaluateThisNode(const size_t timeIdxInSeq)
{
Matrix<ElemType> sliceFunctionValues = FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
EvaluateThisNodeS(sliceFunctionValues, m_inputMatrices, timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
}
static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const std::vector<const Matrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
{
functionValues.AssignRowStackValuesOf(inputMatrices, sliceStartCol, sliceNumCols);
#if NANCHECK
functionValues.HasNan("RowStack");
#endif
}
virtual void Validate()
{
PrintSelfBeforeValidation();
unsigned int numInputs = ChildrenSize();
if (numInputs < 2)
LogicError("RowStack operation: must have two or more inputs.");
if (Inputs(0) == nullptr)
LogicError("RowStack operation: the input node is NULL.");
size_t numCols = Inputs(0)->FunctionValues().GetNumCols();
m_startRowIndeces.resize(ChildrenSize()+1);
m_inputMatrices.resize(ChildrenSize());
size_t totalRows = 0;
m_startRowIndeces[0] = 0;
for (int i = 0; i < ChildrenSize(); i++)
{
if (Inputs(i) == nullptr)
LogicError("RowStack operation: the input node is NULL.");
Matrix<ElemType>& childMatrix = Inputs(i)->FunctionValues();
size_t numRows = childMatrix.GetNumRows();
if (numRows == 0)
LogicError("RowStack operation: the input node %ls has 0 rows.", Inputs(i)->NodeName().c_str());
if (childMatrix.GetNumCols() != numCols)
LogicError("RowStack operation: the input node %ls has different number of columns.", Inputs(i)->NodeName().c_str());
totalRows += numRows;
m_inputMatrices[i] = &childMatrix;
m_startRowIndeces[i + 1] = m_startRowIndeces[i] + numRows;
}
FunctionValues().Resize(totalRows, numCols);
CopyImageSizeFromInputs();
}
virtual void CopyImageSizeFromInputs()
{
CopyImageSizeFromInput(0, true);
m_outputHeight = FunctionValues().GetNumRows();
//WARNING: this node will destroy the image size information from the child
if (m_inputWidth * m_inputChannels != 1)
fprintf(stderr, "WARNING: RowStack operation cannot inherit image size information from its child. Image size info is lost.\n");
}
virtual void AttachInputs(const std::vector<ComputationNodePtr>& inputs)
{
unsigned int numInputs = inputs.size();
m_children.resize(numInputs);
for (unsigned int i = 0; i < numInputs; i++)
m_children[i] = inputs[i];
}
private:
std::vector<size_t> m_startRowIndeces; //start row number in the stacked matrix of each input (child)
std::vector<const Matrix<ElemType>*> m_inputMatrices;
};
template class RowStackNode<float>;
template class RowStackNode<double>;
template<class ElemType>
class ScaleNode : public ComputationNode<ElemType>
{

Просмотреть файл

@ -222,6 +222,8 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
ret = true;
else if (EqualInsensitive(nodeType, RowSliceNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, RowStackNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, LookupTableNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, GMMLogLikelihoodNode<ElemType>::TypeName(), L"GMMLL"))

Просмотреть файл

@ -218,10 +218,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
if (colBegin(i,0) == SENTENCE_MIDDLE)
{
Matrix<ElemType> to1 = inputGradientValues.ColumnSlice((timeIdxInSeq - delay)*mNbr + i, 1);
Matrix<ElemType> frm1= gradientValues.ColumnSlice(timeIdxInSeq * mNbr + i, 1);
Matrix<ElemType> frm = gradientValues.ColumnSlice(timeIdxInSeq * mNbr + i, 1);
Matrix<ElemType> to = inputGradientValues.ColumnSlice((timeIdxInSeq - delay)*mNbr + i, 1);
to1 += frm1;
to += frm;
}
}

Просмотреть файл

@ -1810,8 +1810,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
double val = w->FunctionValues()(0, 0);
/// the label is a dense matrix. each element is the word index
label = m_net->CreateInputNode(L"labels", 2 * (this->nce_noises + 1), mbSize);

Просмотреть файл

@ -391,29 +391,43 @@ public:
{
std::vector<void*> inputs = EvaluateParameters(node, baseName, nodeParamStart, nodeParamCount, pass);
switch (inputs.size())
if (cnNodeType == RowStackNode<ElemType>::TypeName()) //support variable length inputs
{
case 1:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]));
break;
case 2:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]));
break;
case 3:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]));
break;
case 4:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]));
break;
case 5:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]), ComputationNodePtr(inputs[4]));
break;
default:
if (nodeParamCount > 0)
RuntimeError("Invalid number of parameters name = '%s' call = '%s'\n", node->GetName().c_str(), node->GetValue().c_str());
break;
}
std::vector<ComputationNodePtr> inputNodes;
inputNodes.resize(inputs.size());
for (int i = 0; i < inputs.size(); i++)
inputNodes[i] = ComputationNodePtr(inputs[i]);
nodePtr->AttachInputs(inputNodes);
}
else
{
switch (inputs.size())
{
case 1:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]));
break;
case 2:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]));
break;
case 3:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]));
break;
case 4:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]));
break;
case 5:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]), ComputationNodePtr(inputs[4]));
break;
case 6:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]), ComputationNodePtr(inputs[4]), ComputationNodePtr(inputs[5]));
break;
default:
if (nodeParamCount > 0)
RuntimeError("Invalid number of parameters name = '%s' call = '%s'\n", node->GetName().c_str(), node->GetValue().c_str());
break;
}
}
// process common optional parameters (like "tag");
ProcessOptionalParameters(node);
break;

Просмотреть файл

@ -32,11 +32,11 @@ DEVICE = gpu
BUILDTYPE = debug
#BUILDTYPE = release
# comment following and uncomment the next one to enable MKL library
#MATHLIB = acml
MATHLIB = mkl
MATHLIB = acml
#MATHLIB = mkl
# modify relevant path below for your system
MKL_PATH = /usr/users/chiaying/intel/composer_xe_2013.2.146
ACML_PATH = /usr/local/acml5.3.0/gfortran64
ACML_PATH = /usr/local/acml5.3.1/ifort64
#######
BUILDFOR = $(ARCH).$(DEVICE).$(BUILDTYPE).$(MATHLIB)
@ -48,8 +48,8 @@ ifeq ($(BUILDTYPE),debug)
BUILDTYPE_OPT = -g
GPU_BUILDTYPE_OPT = -G
else
BUILDTYPE_OPT = -O4
GPU_BUILDTYPE_OPT =
BUILDTYPE_OPT = -O3 -flto
GPU_BUILDTYPE_OPT = -O3
endif
ifeq ($(MATHLIB),mkl)
@ -142,7 +142,7 @@ $(OBJDIR)/%.o : %.cu Makefile
@echo $(SEPARATOR)
@echo creating $@ for $(ARCH) with build type $(BUILDTYPE)
@mkdir -p $(dir $@)
$(NVCC) -c $< -o $@ $(BUILDTYPE_OPT) $(GPU_BUILDTYPE_OPT) $(NVCCFLAGS) $(INCFLAGS) -Xcompiler -fPIC
$(NVCC) -c $< -o $@ $(GPU_BUILDTYPE_OPT) $(NVCCFLAGS) $(INCFLAGS) -Xcompiler -fPIC
$(OBJDIR)/%.o : %.cpp Makefile
@echo $(SEPARATOR)

Просмотреть файл

@ -31,8 +31,8 @@ DEVICE = cpu
#BUILDTYPE = debug
BUILDTYPE = release
# comment following and uncomment the next one to enable MKL library
#MATHLIB = acml
MATHLIB = mkl
MATHLIB = acml
#MATHLIB = mkl
# modify relevant path below for your system
MKL_PATH = /usr/users/chiaying/intel/composer_xe_2013.2.146
ACML_PATH = /usr/users/yzhang87/code/acml/gfortran64

Просмотреть файл

@ -563,7 +563,7 @@ namespace CNTKMathTest
Assert::IsTrue(C.IsEqualTo(D1, 0.0001));
}
TEST_METHOD(CPUMatrixRowSlice)
TEST_METHOD(CPUMatrixRowSliceAndStack)
{
Matrix M0(5,3);
M0(0,0) = 1; M0(0,1) = 6; M0(0,2) = 11;
@ -590,6 +590,26 @@ namespace CNTKMathTest
M3 += M0;
M0.AddToRowSliceValuesOf(M1, 2,2);
Assert::IsTrue(M3.IsEqualTo(M0, 0.0001));
M2.AddWithRowSliceValuesOf(M1, 0, 2);
Matrix M4(2, 3);
M4(0, 0) = 6; M4(0, 1) = 16; M4(0, 2) = 26;
M4(1, 0) = 8; M4(1, 1) = 18; M4(1, 2) = 28;
Assert::IsTrue(M2.IsEqualTo(M4, 0.0001));
Matrix M5, M6, M7, M8;
M5.AssignRowSliceValuesOf(M0, 0, 2);
M6.AssignRowSliceValuesOf(M0, 2, 1);
M7.AssignRowSliceValuesOf(M0, 3, 2);
std::vector<const Matrix*> inputMatrices;
inputMatrices.resize(3);
inputMatrices[0] = &M5;
inputMatrices[1] = &M6;
inputMatrices[2] = &M7;
M8.AssignRowStackValuesOf(inputMatrices, 0, 3);
Assert::IsTrue(M8.IsEqualTo(M0, 0.0001));
}
TEST_METHOD(CPUAssignRepeatOf)

Просмотреть файл

@ -278,7 +278,7 @@ namespace CNTKMathTest
Assert::IsTrue(M2.IsEqualTo(M3, 0.0001f));
}
TEST_METHOD(GPUMatrixRowSlice)
TEST_METHOD(GPUMatrixRowSliceAndStack)
{
float *fArray = new float[15];
fArray[0] = 1; fArray[5] = 6; fArray[10] = 11;
@ -308,6 +308,27 @@ namespace CNTKMathTest
M3 += M0;
M0.AddToRowSliceValuesOf(M1, 2,2);
Assert::IsTrue(M3.IsEqualTo(M0, 0.0001));
M2.AddWithRowSliceValuesOf(M1, 0, 2);
float *fArray4 = new float[6];
fArray4[0] = 6; fArray4[2] = 16; fArray4[4] = 26;
fArray4[1] = 8; fArray4[3] = 18; fArray4[5] = 28;
GPUMatrix<float> M4(2, 3, fArray4, matrixFlagNormal);
Assert::IsTrue(M2.IsEqualTo(M4, 0.0001));
GPUMatrix<float> M5, M6, M7, M8;
M5.AssignRowSliceValuesOf(M0, 0, 2);
M6.AssignRowSliceValuesOf(M0, 2, 1);
M7.AssignRowSliceValuesOf(M0, 3, 2);
std::vector<const GPUMatrix<float> *> inputMatrices;
inputMatrices.resize(3);
inputMatrices[0] = &M5;
inputMatrices[1] = &M6;
inputMatrices[2] = &M7;
M8.AssignRowStackValuesOf(inputMatrices, 0, 3);
Assert::IsTrue(M8.IsEqualTo(M0, 0.0001));
}
TEST_METHOD(GPUKhatriRaoProduct)

Просмотреть файл

@ -429,6 +429,48 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return *this;
}
//stack the columns in inputMatrices (starting from sliceStartCol for sliceNumCols columns) and assign it to [this] object.
template<class ElemType>
CPUMatrix<ElemType>& CPUMatrix<ElemType>::AssignRowStackValuesOf(const std::vector<const CPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
{
if (sliceNumCols == 0)
LogicError("AssignRowStackValuesOf: sliceNumCols should > 0.");
size_t totalRows = 0;
size_t* startRowIndeces = new size_t[inputMatrices.size()];
startRowIndeces[0] = 0;
for (int i = 0; i < inputMatrices.size(); i++)
{
const CPUMatrix<ElemType>& a = *inputMatrices[i];
if (a.IsEmpty())
LogicError("AssignRowStackValuesOf: input matrix (%d) is empty.", i);
if (a.GetNumCols() < sliceStartCol + sliceNumCols)
LogicError("AssignRowStackValuesOf: input matrix (%d) GetNumCols() < sliceStartCol + sliceNumCols.", i);
totalRows += a.GetNumRows();
if (i<inputMatrices.size()-1)
startRowIndeces[i + 1] = startRowIndeces[i] + a.GetNumRows();
}
Resize(totalRows, sliceNumCols);
auto& us = *this;
#pragma omp parallel for
for (long j = 0; j<sliceNumCols; j++)
{
for (int i = 0; i < inputMatrices.size(); i++)
{
memcpy(&us(startRowIndeces[i], j), &(*inputMatrices[i])(0, sliceStartCol+j), inputMatrices[i]->GetNumRows() * sizeof(ElemType));
}
}
delete [] startRowIndeces;
return *this;
}
template<class ElemType>
void CPUMatrix<ElemType>::MinusOneAt(CPUMatrix<ElemType>& c, const size_t position)
{
@ -672,16 +714,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// if it's externally managed, then populate the structure
if (matrixFlags&matrixFlagDontOwnBuffer)
{
// free previous array allocation if any before overwriting
if (m_pArray != nullptr)
delete [] m_pArray;
m_pArray = pArray;
m_numRows = numRows;
m_numCols = numCols;
// free previous array allocation if any before overwriting
if (m_pArray != nullptr)
delete[] m_pArray;
m_pArray = pArray;
m_elemSizeAllocated = GetNumElements();
m_externalBuffer = true;
}
@ -3877,7 +3916,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
void CPUMatrix<ElemType>::AssignNoiseContrastiveEstimation(const CPUMatrix<ElemType>& a,
const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias, size_t sampleCount, CPUMatrix<ElemType>& tmp, CPUMatrix<ElemType>& c)
const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias, CPUMatrix<ElemType>& tmp, CPUMatrix<ElemType>& c)
//this: samples+probs
// a: hidden
// b: embedding
@ -3892,7 +3931,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
std::cerr << endl;
}
*/
sampleCount *= 1;
double log_likelihood = 0.0;
size_t sample_size = this->GetNumRows() / 2;
size_t batch_size = this->GetNumCols();

Просмотреть файл

@ -216,7 +216,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
CPUMatrix<ElemType>& AssignVectorNorm2Of(CPUMatrix<ElemType>& a, const bool isColWise);
void AssignNoiseContrastiveEstimation(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias,
size_t sampleCount, CPUMatrix<ElemType>& tmp, CPUMatrix<ElemType>& c);
CPUMatrix<ElemType>& tmp, CPUMatrix<ElemType>& c);
void AssignNCEUnnormalizedEval(const CPUMatrix<ElemType>& a,
const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias, CPUMatrix<ElemType>& c);
@ -244,6 +244,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
CPUMatrix<ElemType>& AssignRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
CPUMatrix<ElemType>& AddToRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
CPUMatrix<ElemType>& AddWithRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
CPUMatrix<ElemType>& AssignRowStackValuesOf(const std::vector<const CPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
CPUMatrix<ElemType>& AssignToRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);

Просмотреть файл

@ -678,6 +678,63 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return *this;
}
//stack the columns in inputMatrices (starting from sliceStartCol for sliceNumCols columns) and assign it to [this] object.
template<class ElemType>
GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRowStackValuesOf(const std::vector<const GPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
{
if (sliceNumCols == 0)
LogicError("AssignRowStackValuesOf: sliceNumCols should > 0.");
size_t totalRows = 0;
size_t* startRowIndeces = new size_t[inputMatrices.size()+1];
ElemType ** bufferPointersInInputMatrices = new ElemType*[inputMatrices.size()];
startRowIndeces[0] = 0;
for (int i = 0; i < inputMatrices.size(); i++)
{
const GPUMatrix<ElemType>& a = *inputMatrices[i];
if (a.IsEmpty())
LogicError("AssignRowStackValuesOf: input matrix (%d) is empty.", i);
if (a.GetNumCols() < sliceStartCol + sliceNumCols)
LogicError("AssignRowStackValuesOf: input matrix (%d) GetNumCols() < sliceStartCol + sliceNumCols.", i);
totalRows += a.GetNumRows();
startRowIndeces[i + 1] = startRowIndeces[i] + a.GetNumRows();
bufferPointersInInputMatrices[i] = a.m_pArray + a.LocateColumn(sliceStartCol);
}
Resize(totalRows, sliceNumCols);
PrepareDevice();
ElemType** bufferPointersInGPU = NULL;
CUDA_CALL(cudaMalloc((void***)&bufferPointersInGPU, inputMatrices.size()*sizeof(ElemType*)));
CUDA_CALL(cudaMemcpy(bufferPointersInGPU, bufferPointersInInputMatrices, inputMatrices.size()*sizeof(ElemType*), cudaMemcpyHostToDevice));
delete[] bufferPointersInInputMatrices;
size_t* startRowIndecesInGPU = NULL;
CUDA_CALL(cudaMalloc((void**)&startRowIndecesInGPU, (1+inputMatrices.size())*sizeof(size_t)));
CUDA_CALL(cudaMemcpy(startRowIndecesInGPU, startRowIndeces, (1+inputMatrices.size())*sizeof(size_t), cudaMemcpyHostToDevice));
delete[] startRowIndeces;
LONG64 N = (LONG64)GetNumElements();
int blocksPerGrid = (int)ceil(1.0*N / threadsPerBlock);
cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
_assignRowStackValuesOf<ElemType> << <blocksPerGrid, threadsPerBlock, 0, t_stream >> >(m_pArray, bufferPointersInGPU, startRowIndecesInGPU, (long) inputMatrices.size(), N, (long)GetNumRows(), (long)GetNumCols());
if (do_sync) CUDA_CALL(cudaEventRecord(done));
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
CUDA_CALL(cudaFree(bufferPointersInGPU));
CUDA_CALL(cudaFree(startRowIndecesInGPU));
return *this;
}
/// c = c - 1.0 for a specific position
template<class ElemType>
void GPUMatrix<ElemType>::MinusOneAt(GPUMatrix<ElemType>& c, const size_t position)

Просмотреть файл

@ -274,6 +274,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
GPUMatrix<ElemType>& AssignRowSliceValuesOf(const GPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
GPUMatrix<ElemType>& AddToRowSliceValuesOf(const GPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
GPUMatrix<ElemType>& AddWithRowSliceValuesOf(const GPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
GPUMatrix<ElemType>& AssignRowStackValuesOf(const std::vector<const GPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
GPUMatrix<ElemType>& AssignRepeatOf(const GPUMatrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats);
GPUMatrix<ElemType>& AssignPositiveAndShiftedNegSample(const GPUMatrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber);

Просмотреть файл

@ -377,6 +377,27 @@ __global__ void _addWithRowSliceValuesOf(ElemType * dest, ElemType * src, const
dest[id] += src[IDX2C(row + startIndex, col, srcRows)];
}
template<class ElemType>
__global__ void _assignRowStackValuesOf(ElemType * dest, ElemType ** srces, size_t* startRowIndeces, const LONG64 numSrces, const LONG64 N, const long destRows, const long destCols)
{
LONG64 id = blockDim.x * blockIdx.x + threadIdx.x;
if (id >= N)
return;
long col = id / destRows; //dest is the full matrix, rowslice is taken from the src
long row = id - (col * destRows);
//can we replace the for loop with something better?
int srcId = 0;
for (; srcId < numSrces; srcId++)
{
if (startRowIndeces[srcId + 1]>row)
break;
}
dest[id] = srces[srcId][IDX2C(row - startRowIndeces[srcId], col, startRowIndeces[srcId+1] - startRowIndeces[srcId])];
}
template<class ElemType>
__global__ void _assignRepeatOf(ElemType * dest, ElemType * src, const LONG64 N, const long srcRows, const long srcCols, const long destRows)
{

Просмотреть файл

@ -79,16 +79,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t MajorIndexCount() const
{
return MajorIndexCount(m_numRows, m_numCols, m_elemSizeAllocated, m_format);
return MajorIndexCount(m_numRows, m_numCols, m_nz, m_format);
}
size_t MajorIndexCount(const size_t numRows, const size_t numCols, const size_t numNZReserved, const MatrixFormat format) const
size_t MajorIndexCount(const size_t numRows, const size_t numCols, const size_t numNZ, const MatrixFormat format) const
{
if (format == matrixFormatSparseBlockCol)
return numCols;
else if (format == matrixFormatSparseBlockRow)
return numRows;
else
return numNZReserved;
return numNZ;
}
size_t MajorIndexSize() const // actual number of major index bytes in use
{

Просмотреть файл

@ -1520,6 +1520,68 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return *this;
}
//stack the columns in inputMatrices (starting from sliceStartCol for sliceNumCols columns) and assign it to [this] object.
template<class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignRowStackValuesOf(const std::vector<const Matrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
{
for (int i = 0; i < inputMatrices.size(); i++)
{
const Matrix<ElemType>& a = *inputMatrices[i];
DecideAndMoveToRightDevice(*this, a);
//WARNING: a and this must have same type
if (!(GetMatrixType() == a.GetMatrixType()))
NOT_IMPLEMENTED;
}
CurrentDataLocation curLocation = GetCurrentMatrixLocation();
if (curLocation == CurrentDataLocation::GPU || curLocation == CurrentDataLocation::BOTH)
{
if (GetMatrixType() != MatrixType::SPARSE)
{
//GPUDense;
std::vector<const GPUMatrix<ElemType>*> gpuInputMatrices;
gpuInputMatrices.resize(inputMatrices.size());
for (int i = 0; i < inputMatrices.size(); i++)
gpuInputMatrices[i] = inputMatrices[i]->m_GPUMatrix;
m_GPUMatrix->AssignRowStackValuesOf(gpuInputMatrices, sliceStartCol, sliceNumCols);
SetDataLocation(CurrentDataLocation::GPU, MatrixType::DENSE);
}
else
{
NOT_IMPLEMENTED;
}
}
else if (curLocation == CurrentDataLocation::CPU)
{
if (GetMatrixType() != MatrixType::SPARSE)
{
//CPUDense;
std::vector<const CPUMatrix<ElemType>*> cpuInputMatrices;
cpuInputMatrices.resize(inputMatrices.size());
for (int i = 0; i < inputMatrices.size(); i++)
cpuInputMatrices[i] = inputMatrices[i]->m_CPUMatrix;
m_CPUMatrix->AssignRowStackValuesOf(cpuInputMatrices, sliceStartCol, sliceNumCols);
SetDataLocation(CurrentDataLocation::CPU, MatrixType::DENSE);
}
else
{
NOT_IMPLEMENTED;
}
}
else
{
throw std::runtime_error("Matrices do not exist in either CPU or GPU.");
}
return *this;
}
template<class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignRepeatOf(const Matrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats)
{
@ -3600,7 +3662,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
size_t sampleCount = a.m_CPUMatrix->GetNumElements() / a.m_CPUMatrix->GetNumRows();
tmp.Resize(a.GetNumRows() / 2, sampleCount);
a.m_CPUMatrix->AssignNoiseContrastiveEstimation(*b.m_CPUMatrix, *c.m_CPUMatrix, *bias.m_CPUMatrix, sampleCount, *tmp.m_CPUMatrix, *this->m_CPUMatrix);
a.m_CPUMatrix->AssignNoiseContrastiveEstimation(*b.m_CPUMatrix, *c.m_CPUMatrix, *bias.m_CPUMatrix, *tmp.m_CPUMatrix, *this->m_CPUMatrix);
}
else
{

Просмотреть файл

@ -259,6 +259,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType>& AssignRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
Matrix<ElemType>& AddToRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
Matrix<ElemType>& AddWithRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
Matrix<ElemType>& AssignRowStackValuesOf(const std::vector<const Matrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
Matrix<ElemType>& AssignRepeatOf(const Matrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats);
Matrix<ElemType>& AssignPositiveAndShiftedNegSample(const Matrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber);

Просмотреть файл

@ -479,6 +479,7 @@ namespace Microsoft {
//for each column of a, we add all rows of a to this starting from startIndex
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AddToRowSliceValuesOf(const GPUMatrix<ElemType>& /*a*/, const size_t startIndex, const size_t numRows) { return *this; }
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AddWithRowSliceValuesOf(const GPUMatrix<ElemType>& /*a*/, const size_t startIndex, const size_t numRows) { return *this; }
GPUMatrix<ElemType>& AssignRowStackValuesOf(const std::vector<const GPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols) { return *this; }
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRepeatOf(const GPUMatrix<ElemType>& /*a*/, const size_t numRowRepeats, const size_t numColRepeats) { return *this; }
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignPositiveAndShiftedNegSample(const GPUMatrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber) { return *this; }

234
Scripts/build-and-test Normal file
Просмотреть файл

@ -0,0 +1,234 @@
#!/bin/bash
# Setting some default values
BUILD=1
RUN=1
CLEAN_AFTER=0
CLEAN_BEFORE=0
# parsing command line arguments:
while [[ $# > 0 ]]
do
key="$1"
case $key in
-h|--help)
echo "Usage: build-and-test [options]"
echo "Options:"
echo " -q|--quiet-build - redirect build output to file (by default those will be in <cntk_root>.run-<operating_system>-*)"
echo " -r|--run-only - elides build step, runs the binaries that have already been built"
echo " -b|--build-only - just build, do not run"
echo " -cb|--clean-build - clean up the enlistment binaries before build"
echo " -o|--output-directory <output_dir> - specify output directory to use"
echo "The root directory used to build and run CNTK is hosts the Scripts directory that contains this script"
exit 1
;;
-q|--quiet)
QUIET_BUILD=1
;;
-r|--run-only)
BUILD=0
RUN=1
;;
-b|--build-only)
BUILD=1
RUN=0
;;
-cb|--clean-build)
CLEAN_BEFORE=1
BUILD=1
;;
-o|--output-directory)
OUTPUT_DIR="$2"
shift # past argument
;;
*)
echo Unkown option $key
exit 1
;;
esac
shift # past argument or value
done
# Step 0 -- Validate all necessary prerequisites and check for incompatible options
# It is possible to use this script on Windows to build CNTK
# from Cygwin window with Visual C++ environment loaded.
# In that case OS environment variable will be set and we
# can use it to differentiate from Linux.
if [[ $CLEAN_BEFORE == 1 && $RUN == 1 && $BUILD == 0 ]]; then
echo "============ ERROR: Incompatible options RUN and CLEAN_BEFORE set without BUILD ============"
exit 1
fi
if [[ $OS == "Windows_NT" && $OSTYPE == "cygwin" ]]; then
DEBUG_DIR=Debug
RELEASE_DIR=Release
PREFIX_DIR=x64
BIN_NAME=CNTK.exe
BUILD_OS="windows"
if [[ $VS120COMNTOOLS == "" ]]; then
echo "============ Visual Studio 12.0 environment not properly setup or VS not installed ============"
echo "============ Please find and run the appropriate vcvarsall.bat script ============"
exit 1
fi
if [[ $ACML_PATH == "" ]]; then
echo "============ ACML path not set ============"
echo "============ ACML libraries are needed to successfully build CNTK ============"
exit 1
fi
elif [[ $OSTYPE == "linux-gnu" ]]; then
DEBUG_DIR=x86_64.gpu.debug.acml
RELEASE_DIR=x86_64.gpu.release.acml
PREFIX_DIR=bin
BIN_NAME=cntk
MAKEFILE=Makefile.gpu
BUILD_OS="linux"
else
echo "============ ERROR: Unsupported OS ============"
echo "============ Scripts supports only building from Linux and Windows through Cygwin ============"
exit 1
fi
# Step 1 -- Prepare temporary folders and files, tweak settings if necessary
# Get to the root path from which we know how to build and run
SCRIPT=`readlink -f $0`
SCRIPT_DIR=`dirname $SCRIPT`
CNTK_ROOT=`dirname $SCRIPT_DIR`
# Setup the output directory
if [[ $OUTPUT_DIR == "" ]]; then
OUTPUT_DIR="$CNTK_ROOT/.run-$BUILD_OS-$RANDOM"
fi
echo "============ Creating CNTK temp directory in $TMP_ROOT ============"
mkdir -p $OUTPUT_DIR || exit $?
CONF_FILE="$OUTPUT_DIR/Simple.conf"
BUILD_FILE="$OUTPUT_DIR/Build"
RUN_FILE="$OUTPUT_DIR/Result"
if ! [[ -d "$CNTK_ROOT/MachineLearning" ]]; then
echo "============ ERROR: Build script located in the wrong directory ($SCRIPT_DIR) ============"
exit 1
fi
cd $CNTK_ROOT
if ! [[ -f $CONF_FILE ]]; then
cp Demos/Simple/Simple.config $CONF_FILE || exit $?
# This chmod is necessary due to restrictive Cygwin interpretation of Windows permissions.
# Cygwin interprets Windows permissions as ----rwx---, which lacks read permissions for user.
chmod a+r $CONF_FILE || exit $?
fi
if [[ $QUIET_BUILD == 1 ]]; then
echo "============ WARNING: You have selected quiet build. All build output will be placed in ($OUTPUT_DIR) ============"
fi
# Step 2 -- Build the project debug and release, if requested
if [[ $BUILD == 1 ]]; then
# Step 2 -- Perform necessary builds
for FLAVOR in debug release
do
# Our make is too noisy right now and it is difficult to spot
# issues from stdout and stderr. In the quiet mode these are
# redirected to a file where they could be examined after the fact
if [[ $QUIET_BUILD == 1 ]]; then
exec 6>$BUILD_FILE.$FLAVOR.out || exit $?
exec 7>$BUILD_FILE.$FLAVOR.err || exit $?
else
exec 6>&1 || exit $?
exec 7>&2 || exit $?
fi
echo "============ Building CNTK $FLAVOR (clean=$CLEAN_BEFORE) ============"
if [[ $OS == "Windows_NT" ]]; then
if [[ $CLEAN_BEFORE == 1 ]]; then
msbuild.exe /property:Configuration=$FLAVOR /t:Clean 1>&6 2>&7 || exit $?
fi
msbuild.exe /property:Configuration=$FLAVOR /m 1>&6 2>&7 || exit $?
else
if [[ $CLEAN_BEFORE == 1 ]]; then
make BUILDTYPE=$FLAVOR -f $MAKEFILE clean 1>&6 2>&7 || exit $?
fi
make BUILDTYPE=$FLAVOR -j -f $MAKEFILE 1>&6 2>&7 || exit $?
fi
chmod a+r $BUILD_FILE.*
done
fi
# Step 3 -- Run the project tests, both debug and release, if requested
if [[ $RUN == 1 ]]; then
if ! [[ -f "$CNTK_ROOT/$PREFIX_DIR/$DEBUG_DIR/$BIN_NAME" && -f "$CNTK_ROOT/$PREFIX_DIR/$RELEASE_DIR/$BIN_NAME" ]]; then
echo "============ ERROR: CNTK did not build properly ============"
exit 1
fi
cd $PREFIX_DIR
for TARGET in CPU GPU
do
# These sed scripts are simply toggling DeviceNumber argument in the config file
# If it is set to Auto, it will pick GPU over CPU. At -1 CPU is selected.
if [[ $TARGET == CPU ]]; then
sed -i -e 's/^DeviceNumber.*/DeviceNumber=-1/g' $CONF_FILE || exit $?
else
sed -i -e 's/^DeviceNumber.*/DeviceNumber=Auto/g' $CONF_FILE || exit $?
fi
for FLAVOR in debug release
do
if [[ FLAVOR == "debug" ]]; then
FLAVOR_DIR="$DEBUG_DIR"
else
FLAVOR_DIR="$RELEASE_DIR"
fi
OUT_FILE="$RUN_FILE.$FLAVOR.out"
echo "============ Running CNTK for ($FLAVOR) ($TARGET), output in ($RUN_FILE.*) ============"
rm -rf models
if [[ $OS == "Windows_NT" ]]; then
# We have to use cygpath on Windows to modify the file paths into the format readable by cntk.
time ./$FLAVOR_DIR/$BIN_NAME configFile="`cygpath -w $CONF_FILE`" &>$OUT_FILE || exit $?
else
time ./$FLAVOR_DIR/$BIN_NAME configFile=$CONF_FILE &>$OUT_FILE || exit $?
fi
chmod a+r $RUN_FILE.*
# Check if execution was successful
grep -q "Using $TARGET" "$OUT_FILE" || {
echo "============ ERROR: Run output (in $OUT_FILE) did not contain information about target device ($TARGET) ============"
exit 1
}
grep -q "EXCEPTION" "$OUT_FILE" && {
echo "============ ERROR: Run output in ($OUT_FILE) contains exceptions ============"
grep "EXCEPTION" "$OUT_FILE"
exit 1
}
done
done
fi
# Step 5 -- Optionally clean after builds and tests
if [[ $CLEAN_AFTER == 1 ]]; then
rm -rf models
cd $CNTK_ROOT
for FLAVOR in debug release
do
echo "============ Cleaning up CNTK $FLAVOR ============"
if [[ $OS == "Windows_NT" ]]; then
msbuild.exe /property:Configuration=$FLAVOR /t:clean 1>&6 2>&7 || exit $?
else
make BUILDTYPE=$FLAVOR -f $MAKEFILE clean 1>&6 2>&7 || exit $?
fi
done
rm -rf $OUTPUT_DIR
fi
echo "============ Build and test of CNTK was successful! ============"