This commit is contained in:
Yu Zhang 2015-07-05 22:19:18 -07:00
Родитель b3157ff534
Коммит 2582c8f5a8
17 изменённых файлов: 1058 добавлений и 1283 удалений

Просмотреть файл

@ -23,21 +23,21 @@
#include "HTKMLFReader.h"
namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
void DATAREADER_API GetReader(IDataReader<ElemType>** preader)
{
// *preader = new SequenceReader<ElemType>();
*preader = new HTKMLFReader<ElemType>();
}
extern "C" DATAREADER_API void GetReaderF(IDataReader<float>** preader)
{
GetReader(preader);
}
extern "C" DATAREADER_API void GetReaderD(IDataReader<double>** preader)
{
GetReader(preader);
}
template<class ElemType>
void DATAREADER_API GetReader(IDataReader<ElemType>** preader)
{
*preader = new HTKMLFReader<ElemType>();
}
extern "C" DATAREADER_API void GetReaderF(IDataReader<float>** preader)
{
GetReader(preader);
}
extern "C" DATAREADER_API void GetReaderD(IDataReader<double>** preader)
{
GetReader(preader);
}
// Utility function, in ConfigFile.cpp, but HTKMLFReader doesn't need that code...
// Trim - trim white space off the start and end of the string

Просмотреть файл

@ -102,12 +102,12 @@ private:
public:
Matrix<ElemType> m_sentenceBegin;
vector<MinibatchPackingFlag> m_minibatchPackingFlag;
Matrix<ElemType> m_sentenceBegin;
vector<MinibatchPackingFlag> m_minibatchPackingFlag;
bool mIgnoreSentenceBeginTag;
HTKMLFReader() : m_sentenceBegin(CPUDEVICE) {
}
bool mIgnoreSentenceBeginTag;
HTKMLFReader() : m_sentenceBegin(CPUDEVICE) {
}
virtual void Init(const ConfigParameters& config);
virtual void Destroy() {delete this;}
virtual ~HTKMLFReader();
@ -118,7 +118,7 @@ public:
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart=0);
virtual bool DataEnd(EndDataType endDataType);
void SetSentenceSegBatch(Matrix<ElemType> &sentenceBegin, vector<MinibatchPackingFlag>& sentenceExistsBeginOrNoLabels);
void SetSentenceSegBatch(Matrix<ElemType> &sentenceBegin, vector<MinibatchPackingFlag>& sentenceExistsBeginOrNoLabels);
void SetSentenceEnd(int /*actualMbSize*/){};
void SetRandomSeed(int);
};

Просмотреть файл

@ -113,23 +113,23 @@ public:
/// the second data stream has two sentences, with 0 indicating begining of sentences
/// you may use 1 even if a sentence begins at that position, in this case, the trainer will carry over hidden states to the following
/// frame.
Matrix<ElemType> m_sentenceBegin;
Matrix<ElemType> m_sentenceBegin;
/// a matrix of 1 x n_length
/// 1 denotes the case that there exists sentnece begin or no_labels case in this frame
/// 0 denotes such case is not in this frame
vector<MinibatchPackingFlag> m_minibatchPackingFlag;
vector<MinibatchPackingFlag> m_minibatchPackingFlag;
/// by default it is false
/// if true, reader will set to SENTENCE_MIDDLE for time positions that are orignally correspond to SENTENCE_BEGIN
/// set to true so that a current minibatch can uses state activities from the previous minibatch.
/// default will have truncated BPTT, which only does BPTT inside a minibatch
bool mIgnoreSentenceBeginTag;
HTKMLFReader() : m_sentenceBegin(CPUDEVICE) {
}
bool mIgnoreSentenceBeginTag;
HTKMLFReader() : m_sentenceBegin(CPUDEVICE) {
}
virtual void Init(const ConfigParameters& config);
virtual void Destroy() {delete this;}

Просмотреть файл

@ -1418,10 +1418,7 @@ the first row is 0/1 bit for wether corresponding frame has sentence beginining/
m_toProcess[i] = actualmbsizeOri;
first = false;
}
else[merge]
tool = kdiff3
[mergetool "kdiff3"]
cmd = \"C:\\\\Program Files (x86)\\\\KDiff3\\\\kdiff3\" $BASE $LOCAL $REMOTE -o $MERGED
else
{
if (m_toProcess[i] != actualmbsizeOri)
{
@ -1475,10 +1472,7 @@ the first row is 0/1 bit for wether corresponding frame has sentence beginining/
{
// loop through the columns and set one value to 1
// in the future we want to use a sparse matrix here
for (int k=0; k < actualmbsizeOri; k++)[merge]
tool = kdiff3
[mergetool "kdiff3"]
cmd = \"C:\\\\Program Files (x86)\\\\KDiff3\\\\kdiff3\" $BASE $LOCAL $REMOTE -o $MERGED
for (int k=0; k < actualmbsizeOri; k++)
{
assert(uids[k] < dim);
//labels(uids[i], i) = (ElemType)1;
@ -1608,10 +1602,7 @@ the first row is 0/1 bit for wether corresponding frame has sentence beginining/
break;
}
return ret;
}[merge]
tool = kdiff3
[mergetool "kdiff3"]
cmd = \"C:\\\\Program Files (x86)\\\\KDiff3\\\\kdiff3\" $BASE $LOCAL $REMOTE -o $MERGED
}
template<class ElemType>
void HTKMLFReader<ElemType>::SetSentenceEndInBatch(vector<size_t> &sentenceEnd)
@ -1654,9 +1645,6 @@ the first row is 0/1 bit for wether corresponding frame has sentence beginining/
}
}
template class HTKMLFReader<float>;[merge]
tool = kdiff3
[mergetool "kdiff3"]
cmd = \"C:\\\\Program Files (x86)\\\\KDiff3\\\\kdiff3\" $BASE $LOCAL $REMOTE -o $MERGED
template class HTKMLFReader<float>;
template class HTKMLFReader<double>;
}}}

Просмотреть файл

@ -143,21 +143,21 @@ public:
vector<stSentenceInfo> mSentenceIndex2SentenceInfo;
public:
using LUSequenceParser<NumType, LabelType>::m_dimFeatures;
using LUSequenceParser<NumType, LabelType>::m_dimLabelsIn;
using LUSequenceParser<NumType, LabelType>::m_beginSequenceIn;
using LUSequenceParser<NumType, LabelType>::m_endSequenceIn;
using LUSequenceParser<NumType, LabelType>::m_dimLabelsOut;
using LUSequenceParser<NumType, LabelType>::m_beginSequenceOut;
using LUSequenceParser<NumType, LabelType>::m_endSequenceOut;
using LUSequenceParser<NumType, LabelType>::m_traceLevel;
using LUSequenceParser<NumType, LabelType>::m_beginTag;
using LUSequenceParser<NumType, LabelType>::m_endTag;
using LUSequenceParser<NumType, LabelType>::m_fileBuffer;
using LUSequenceParser<NumType, LabelType>::m_inputs;
using LUSequenceParser<NumType, LabelType>::m_labels;
using LUSequenceParser<NumType, LabelType>::m_beginSequence;
using LUSequenceParser<NumType, LabelType>::m_endSequence;
using LUSequenceParser<NumType, LabelType>::m_dimFeatures;
using LUSequenceParser<NumType, LabelType>::m_dimLabelsIn;
using LUSequenceParser<NumType, LabelType>::m_beginSequenceIn;
using LUSequenceParser<NumType, LabelType>::m_endSequenceIn;
using LUSequenceParser<NumType, LabelType>::m_dimLabelsOut;
using LUSequenceParser<NumType, LabelType>::m_beginSequenceOut;
using LUSequenceParser<NumType, LabelType>::m_endSequenceOut;
using LUSequenceParser<NumType, LabelType>::m_traceLevel;
using LUSequenceParser<NumType, LabelType>::m_beginTag;
using LUSequenceParser<NumType, LabelType>::m_endTag;
using LUSequenceParser<NumType, LabelType>::m_fileBuffer;
using LUSequenceParser<NumType, LabelType>::m_inputs;
using LUSequenceParser<NumType, LabelType>::m_labels;
using LUSequenceParser<NumType, LabelType>::m_beginSequence;
using LUSequenceParser<NumType, LabelType>::m_endSequence;
LUBatchLUSequenceParser() {
};
~LUBatchLUSequenceParser() {

Просмотреть файл

@ -200,53 +200,53 @@ public:
using LabelType = wstring;
using LabelIdType = long;
using LUSequenceReader<ElemType>::mWordMappingFn;
using LUSequenceReader<ElemType>::m_cachingReader;
using LUSequenceReader<ElemType>::mWordMapping;
using LUSequenceReader<ElemType>::mUnkStr;
using LUSequenceReader<ElemType>::m_cachingWriter;
using LUSequenceReader<ElemType>::m_featuresName;
using LUSequenceReader<ElemType>::m_labelsName;
using LUSequenceReader<ElemType>::labelInfoMin;
using LUSequenceReader<ElemType>::labelInfoMax;
using LUSequenceReader<ElemType>::m_featureDim;
using LUSequenceReader<ElemType>::m_labelInfo;
// using LUSequenceReader<ElemType>::m_labelInfoIn;
using LUSequenceReader<ElemType>::m_mbStartSample;
using LUSequenceReader<ElemType>::m_epoch;
using LUSequenceReader<ElemType>::m_totalSamples;
using LUSequenceReader<ElemType>::m_epochStartSample;
using LUSequenceReader<ElemType>::m_seqIndex;
using LUSequenceReader<ElemType>::m_endReached;
using LUSequenceReader<ElemType>::m_readNextSampleLine;
using LUSequenceReader<ElemType>::m_readNextSample;
using LUSequenceReader<ElemType>::m_traceLevel;
using LUSequenceReader<ElemType>::m_wordContext;
using LUSequenceReader<ElemType>::m_featureCount;
using typename LUSequenceReader<ElemType>::LabelInfo;
using LUSequenceReader<ElemType>::labelInfoIn;
using LUSequenceReader<ElemType>::labelInfoOut;
// using LUSequenceReader<ElemType>::arrayLabels;
using LUSequenceReader<ElemType>::m_readerConfig;
using LUSequenceReader<ElemType>::m_featuresBuffer;
using LUSequenceReader<ElemType>::m_labelsBuffer;
using LUSequenceReader<ElemType>::m_labelsIdBuffer;
using LUSequenceReader<ElemType>::m_mbSize;
using LUSequenceReader<ElemType>::m_epochSize;
using LUSequenceReader<ElemType>::m_featureData;
using LUSequenceReader<ElemType>::m_sequence;
using LUSequenceReader<ElemType>::m_labelData;
using LUSequenceReader<ElemType>::m_labelIdData;
using LUSequenceReader<ElemType>::m_idx2clsRead;
using LUSequenceReader<ElemType>::m_clsinfoRead;
using LUSequenceReader<ElemType>::m_featureWordContext;
using LUSequenceReader<ElemType>::LoadLabelFile;
using LUSequenceReader<ElemType>::ReleaseMemory;
using LUSequenceReader<ElemType>::LMSetupEpoch;
using LUSequenceReader<ElemType>::ChangeMaping;
using LUSequenceReader<ElemType>::GetIdFromLabel;
using LUSequenceReader<ElemType>::InitCache;
using LUSequenceReader<ElemType>::mRandomize;
using LUSequenceReader<ElemType>::m_seed;
using LUSequenceReader<ElemType>::m_cachingReader;
using LUSequenceReader<ElemType>::mWordMapping;
using LUSequenceReader<ElemType>::mUnkStr;
using LUSequenceReader<ElemType>::m_cachingWriter;
using LUSequenceReader<ElemType>::m_featuresName;
using LUSequenceReader<ElemType>::m_labelsName;
using LUSequenceReader<ElemType>::labelInfoMin;
using LUSequenceReader<ElemType>::labelInfoMax;
using LUSequenceReader<ElemType>::m_featureDim;
using LUSequenceReader<ElemType>::m_labelInfo;
// using LUSequenceReader<ElemType>::m_labelInfoIn;
using LUSequenceReader<ElemType>::m_mbStartSample;
using LUSequenceReader<ElemType>::m_epoch;
using LUSequenceReader<ElemType>::m_totalSamples;
using LUSequenceReader<ElemType>::m_epochStartSample;
using LUSequenceReader<ElemType>::m_seqIndex;
using LUSequenceReader<ElemType>::m_endReached;
using LUSequenceReader<ElemType>::m_readNextSampleLine;
using LUSequenceReader<ElemType>::m_readNextSample;
using LUSequenceReader<ElemType>::m_traceLevel;
using LUSequenceReader<ElemType>::m_wordContext;
using LUSequenceReader<ElemType>::m_featureCount;
using typename LUSequenceReader<ElemType>::LabelInfo;
using LUSequenceReader<ElemType>::labelInfoIn;
using LUSequenceReader<ElemType>::labelInfoOut;
// using LUSequenceReader<ElemType>::arrayLabels;
using LUSequenceReader<ElemType>::m_readerConfig;
using LUSequenceReader<ElemType>::m_featuresBuffer;
using LUSequenceReader<ElemType>::m_labelsBuffer;
using LUSequenceReader<ElemType>::m_labelsIdBuffer;
using LUSequenceReader<ElemType>::m_mbSize;
using LUSequenceReader<ElemType>::m_epochSize;
using LUSequenceReader<ElemType>::m_featureData;
using LUSequenceReader<ElemType>::m_sequence;
using LUSequenceReader<ElemType>::m_labelData;
using LUSequenceReader<ElemType>::m_labelIdData;
using LUSequenceReader<ElemType>::m_idx2clsRead;
using LUSequenceReader<ElemType>::m_clsinfoRead;
using LUSequenceReader<ElemType>::m_featureWordContext;
using LUSequenceReader<ElemType>::LoadLabelFile;
using LUSequenceReader<ElemType>::ReleaseMemory;
using LUSequenceReader<ElemType>::LMSetupEpoch;
using LUSequenceReader<ElemType>::ChangeMaping;
using LUSequenceReader<ElemType>::GetIdFromLabel;
using LUSequenceReader<ElemType>::InitCache;
using LUSequenceReader<ElemType>::mRandomize;
using LUSequenceReader<ElemType>::m_seed;
using LUSequenceReader<ElemType>::mTotalSentenceSofar;
using LUSequenceReader<ElemType>::GetSentenceEndIdFromOutputLabel;
private:

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -27,45 +27,45 @@ enum LabelKind
template<class ElemType>
class LibSVM_BinaryInput {
private:
HANDLE m_hndl;
HANDLE m_filemap;
HANDLE m_header;
HANDLE m_offsets;
HANDLE m_data;
HANDLE m_hndl;
HANDLE m_filemap;
HANDLE m_header;
HANDLE m_offsets;
HANDLE m_data;
//void* header_orig; // Don't need this since the header is at the start of the file
void* offsets_orig;
void* data_orig;
//void* header_orig; // Don't need this since the header is at the start of the file
void* offsets_orig;
void* data_orig;
void* header_buffer;
void* offsets_buffer;
void* data_buffer;
void* header_buffer;
void* offsets_buffer;
void* data_buffer;
size_t m_dim;
size_t mbSize;
size_t MAX_BUFFER = 400;
size_t m_labelDim;
size_t m_dim;
size_t mbSize;
size_t MAX_BUFFER = 400;
size_t m_labelDim;
ElemType* values; // = (ElemType*)malloc(sizeof(ElemType)* 230 * 1024);
int64_t* offsets; // = (int*)malloc(sizeof(int)* 230 * 1024);
int32_t* colIndices; // = (int*)malloc(sizeof(int) * (batchsize + 1));
int32_t* rowIndices; // = (int*)malloc(sizeof(int) * MAX_BUFFER * batchsize);
int32_t* classIndex; // = (int*)malloc(sizeof(int) * batchsize);
ElemType* classWeight; // = (ElemType*)malloc(sizeof(ElemType) * batchsize);
ElemType* values; // = (ElemType*)malloc(sizeof(ElemType)* 230 * 1024);
int64_t* offsets; // = (int*)malloc(sizeof(int)* 230 * 1024);
int32_t* colIndices; // = (int*)malloc(sizeof(int) * (batchsize + 1));
int32_t* rowIndices; // = (int*)malloc(sizeof(int) * MAX_BUFFER * batchsize);
int32_t* classIndex; // = (int*)malloc(sizeof(int) * batchsize);
ElemType* classWeight; // = (ElemType*)malloc(sizeof(ElemType) * batchsize);
ElemType* m_labelsBuffer;
public:
int64_t numRows;
int64_t numBatches;
int32_t numCols;
int64_t totalNNz;
int64_t numRows;
int64_t numBatches;
int32_t numCols;
int64_t totalNNz;
LibSVM_BinaryInput();
~LibSVM_BinaryInput();
void Init(std::wstring fileName, size_t dim);
bool SetupEpoch( size_t minibatchSize);
bool Next_Batch(Matrix<ElemType>& features, Matrix<ElemType>& labels, size_t actualmbsize, int batchIndex);
void Dispose();
LibSVM_BinaryInput();
~LibSVM_BinaryInput();
void Init(std::wstring fileName, size_t dim);
bool SetupEpoch( size_t minibatchSize);
bool Next_Batch(Matrix<ElemType>& features, Matrix<ElemType>& labels, size_t actualmbsize, int batchIndex);
void Dispose();
};
template<class ElemType>
@ -75,11 +75,11 @@ class LibSVMBinaryReader : public IDataReader<ElemType>
// typedef std::string LabelType;
// typedef unsigned LabelIdType;
private:
int* read_order; // array to shuffle to reorder the dataset
std::wstring m_featuresName;
size_t m_featuresDim;
LibSVM_BinaryInput<ElemType> featuresInput;
int64_t m_processedMinibatches;
int* read_order; // array to shuffle to reorder the dataset
std::wstring m_featuresName;
size_t m_featuresDim;
LibSVM_BinaryInput<ElemType> featuresInput;
int64_t m_processedMinibatches;
size_t m_mbSize; // size of minibatch requested
LabelIdType m_labelIdMax; // maximum label ID we have encountered so far
@ -126,7 +126,7 @@ private:
size_t RandomizeSweep(size_t epochSample);
//bool Randomize() {return m_randomizeRange != randomizeNone;}
bool Randomize() { return false; }
bool Randomize() { return false; }
void SetupEpoch();
void StoreLabel(ElemType& labelStore, const LabelType& labelValue);
size_t RecordsToRead(size_t mbStartSample, bool tail=false);
@ -138,7 +138,7 @@ private:
public:
virtual void Init(const ConfigParameters& config);
virtual void Destroy();
LibSVMBinaryReader() { m_qfeaturesBuffer = NULL; m_dfeaturesBuffer = NULL; m_labelsBuffer = NULL; }
LibSVMBinaryReader() { m_qfeaturesBuffer = NULL; m_dfeaturesBuffer = NULL; m_labelsBuffer = NULL; }
virtual ~LibSVMBinaryReader();
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
virtual bool GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices);

Просмотреть файл

@ -17,8 +17,8 @@
static inline size_t RoundUp(size_t m, size_t n)
{
if (m % n == 0) return m / n;
else return m / n + 1;
if (m % n == 0) return m / n;
else return m / n + 1;
}
namespace Microsoft { namespace MSR { namespace CNTK {
@ -35,8 +35,8 @@ template<class ElemType>
class UCIFastReader : public IDataReader<ElemType>
{
public:
using LabelType = typename IDataReader<ElemType>::LabelType;
using LabelIdType = typename IDataReader<ElemType>::LabelIdType;
using LabelType = typename IDataReader<ElemType>::LabelType;
using LabelIdType = typename IDataReader<ElemType>::LabelIdType;
using IDataReader<ElemType>::mBlgSize;
//typedef std::string LabelType;
//typedef unsigned LabelIdType;

Просмотреть файл

@ -35,6 +35,7 @@
#include "SynchronousExecutionEngine.h"
#include "ModelEditLanguage.h"
#include "SGD.h"
#include "commandArgUtil.h"
#include "MultiNetworksSGD.h"
#include "SimpleEvaluator.h"
#include "SimpleOutputWriter.h"
@ -431,20 +432,20 @@ void DoCreateLabelMap(const ConfigParameters& config)
//////////////////////////////////////////////////////////////////////////
// for action SVD
// An action "SVD" performs the following process to transform an existing model:
// 1. For a Learnable Parameter A whose name matches with the user specified regex,
// A is approximated by two matrice multiplication B*C ;
// 2. In order to keep the low-rank structure in training,
// the original A node will be replaced by A' whose opertions is Times
// with its left children being B and right chilren being
// An action "SVD" performs the following process to transform an existing model:
// 1. For a Learnable Parameter A whose name matches with the user specified regex,
// A is approximated by two matrice multiplication B*C ;
// 2. In order to keep the low-rank structure in training,
// the original A node will be replaced by A' whose opertions is Times
// with its left children being B and right chilren being
//
// To use this command,
// user need to specify:
// 1) modelPath -- path to the existing model
// 2) outputmodelPath -- where to write the transformed model
// 3) KeepRatio -- how many percentage of energy we want to keep
// 4) ParameterName -- name (regex) of the parameter node we want to perform a SVD decomposition
//
// To use this command,
// user need to specify:
// 1) modelPath -- path to the existing model
// 2) outputmodelPath -- where to write the transformed model
// 3) KeepRatio -- how many percentage of energy we want to keep
// 4) ParameterName -- name (regex) of the parameter node we want to perform a SVD decomposition
//
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
// helper function for DoParameterSVD
@ -524,9 +525,9 @@ void DoParameterSVD(const ConfigParameters& config)
///
/// the outputs are the vocabulary, word2class and class2idx file with the information below
/// vocabulary format is as follows
/// 0 42068 </s> 0
/// 1 50770 the 0
/// 2 45020 <unk> 1
/// 0 42068 </s> 0
/// 1 50770 the 0
/// 2 45020 <unk> 1
/// the first column is word index
/// the last column is class index of the word
/// the second column and the third column are for information purpose and
@ -559,7 +560,7 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
if (!fp)
RuntimeError("inputFile cannot be read");
if (nbrCls > 0)
cls2idx.Resize(nbrCls, 1);
cls2idx.Resize(nbrCls, 1);
std::unordered_map<string, double> v_count;
/// get line
@ -596,7 +597,7 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
std::priority_queue<stringdouble, std::vector<stringdouble>, compare_second<stringdouble> >
q(compare_second<stringdouble>(), std::vector<stringdouble>(v_count.begin(), v_count.end()));
int wordCountLessCutoff = v_count.size();
size_t wordCountLessCutoff = v_count.size();
if (cutoff > 0)
for (std::unordered_map<std::string, double>::iterator iter = v_count.begin(); iter != v_count.end(); iter++)
if (iter->second <= cutoff)
@ -646,10 +647,10 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
double dd = 0;
if (nbrCls > 0)
{
for (std::unordered_map<std::string, double>::iterator iter = removed.begin(); iter != removed.end(); iter++)
total += iter->second;
for (std::unordered_map<std::string, double>::iterator iter = removed.begin(); iter != removed.end(); iter++)
dd += sqrt(iter->second / total);
for (std::unordered_map<std::string, double>::iterator iter = removed.begin(); iter != removed.end(); iter++)
total += iter->second;
for (std::unordered_map<std::string, double>::iterator iter = removed.begin(); iter != removed.end(); iter++)
dd += sqrt(iter->second / total);
}
double df = 0;
@ -662,11 +663,11 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
double freq = p.top().second;
if (nbrCls > 0)
{
df += sqrt(freq / total) / dd;
if (df > 1)
df = 1;
if (df > 1.0 * (class_id + 1) / nbrCls && class_id < nbrCls)
class_id++;
df += sqrt(freq / total) / dd;
if (df > 1)
df = 1;
if (df > 1.0 * (class_id + 1) / nbrCls && class_id < nbrCls)
class_id++;
}
size_t wid = m_words.size();
@ -676,7 +677,7 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
m_count[wid] = freq;
if (nbrCls > 0)
m_class[wid] = class_id;
m_class[wid] = class_id;
p.pop();
}
@ -685,7 +686,7 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
for (size_t i = 0; i < m_index.size(); i++)
{
if (nbrCls > 0)
wrd2cls(i, 0) = (ElemType)m_class[i];
wrd2cls(i, 0) = (ElemType)m_class[i];
long long clsIdx = nbrCls > 0 ? m_class[i] : 0;
if (nbrCls > 0 && clsIdx != prevClsIdx)
{
@ -1059,49 +1060,49 @@ void DoConvertFromDbn(const ConfigParameters& config)
template <typename ElemType>
void DoTopologyPlot(const ConfigParameters& config)
{
wstring modelPath = config("modelPath");
wstring outdot = config("outputDotFile"); // filename for the dot language output, if not specified, %modelpath%.dot will be used
wstring outRending = config("outputFile"); // filename for the rendered topology plot
// this can be empty, in that case no rendering will be done
// or if this is set, renderCmd must be set, so CNTK will call re
wstring RenderCmd = config("RenderCmd"); // if this option is set, then CNTK will call the render to convert the outdotFile to a graph
// e.g. "d:\Tools\graphviz\bin\dot.exe -Tpng -x <IN> -o<OUT>"
// where <IN> and <OUT> are two special placeholders
wstring modelPath = config("modelPath");
wstring outdot = config("outputDotFile"); // filename for the dot language output, if not specified, %modelpath%.dot will be used
wstring outRending = config("outputFile"); // filename for the rendered topology plot
// this can be empty, in that case no rendering will be done
// or if this is set, renderCmd must be set, so CNTK will call re
wstring RenderCmd = config("RenderCmd"); // if this option is set, then CNTK will call the render to convert the outdotFile to a graph
// e.g. "d:\Tools\graphviz\bin\dot.exe -Tpng -x <IN> -o<OUT>"
// where <IN> and <OUT> are two special placeholders
//========================================
// Sec. 1 option check
//========================================
if (outdot.empty())
{
outdot = modelPath +L".dot";
}
//========================================
// Sec. 1 option check
//========================================
if (outdot.empty())
{
outdot = modelPath +L".dot";
}
wstring rescmd;
if (!outRending.empty()) // we need to render the plot
{
std::wregex inputPlaceHolder(L"(.+)(<IN>)(.*)");
std::wregex outputPlaceHolder(L"(.+)(<OUT>)(.*)");
wstring rescmd;
if (!outRending.empty()) // we need to render the plot
{
std::wregex inputPlaceHolder(L"(.+)(<IN>)(.*)");
std::wregex outputPlaceHolder(L"(.+)(<OUT>)(.*)");
rescmd = regex_replace(RenderCmd, inputPlaceHolder, L"$1"+outdot+L"$3");
rescmd = regex_replace(rescmd, outputPlaceHolder, L"$1"+outRending+L"$3");
}
rescmd = regex_replace(RenderCmd, inputPlaceHolder, L"$1"+outdot+L"$3");
rescmd = regex_replace(rescmd, outputPlaceHolder, L"$1"+outRending+L"$3");
}
ComputationNetwork<ElemType> net(-1);
net.LoadFromFile(modelPath);
net.PlotNetworkTopology(outdot);
ComputationNetwork<ElemType> net(-1);
net.LoadFromFile(modelPath);
net.PlotNetworkTopology(outdot);
fprintf(stderr, "Output network description in dot language to %S\n", outdot.c_str());
if (!outRending.empty())
{
if (!outRending.empty())
{
fprintf(stderr, "Executing a third-part tool for rendering dot:\n%S\n", rescmd.c_str());
#ifdef __unix__
system(msra::strfun::utf8(rescmd).c_str());
#else
_wsystem(rescmd.c_str());
_wsystem(rescmd.c_str());
#endif
fprintf(stderr, "Done\n");
}
}
}
@ -1152,7 +1153,7 @@ void DoCommand(const ConfigParameters& config)
else if (action[j] == "createLabelMap")
DoCreateLabelMap<ElemType>(commandParams);
else if (action[j] == "writeWordAndClass")
DoWriteWordAndClassInfo<ElemType>(commandParams);
DoWriteWordAndClassInfo<ElemType>(commandParams);
else if (action[j] == "plot")
DoTopologyPlot<ElemType>(commandParams);
else if (action[j] == "SVD")

Просмотреть файл

@ -2943,70 +2943,8 @@ public:
}
}
/**
call unit test of each node
this adds a verification of the correctness of node operations.
*/
bool UnitTest(bool allowFragment = false)
{
// currently only validates nodes, we should validate everything we can
if (FeatureNodes().size() == 0 && !allowFragment)
{
throw std::runtime_error("No Feature nodes specified");
}
// first give criteria nodes as root node
if (FinalCriterionNodes().size() > 0)
{
for (auto node : FinalCriterionNodes())
{
if (!allowFragment) FormRecurentLoops(node);
size_t actualMBSize = this->GetActualMBSize();
this->SetActualMiniBatchSize(actualMBSize);
if (UnitTest(node) == false)
return false;
}
}
else if (!allowFragment)
{
throw std::runtime_error("No Criterion nodes specified");
}
// now output nodes
if (OutputNodes().size() > 0)
{
for (auto node : OutputNodes())
if (UnitTest(node) == false)
return false;
}
else if (!allowFragment)
{
throw std::runtime_error("No Output nodes specified");
}
// now evaluation nodes
if (EvaluationNodes().size() > 0)
{
for (auto node : EvaluationNodes())
if (UnitTest(node) == false)
return false;
}
return true;
}
bool UnitTest(const ComputationNodePtr rootNode)
{
fprintf(stderr, "\n\n Unit test node %ws \n", rootNode->NodeName().c_str());
std::list<ComputationNodePtr>& nodes = GetEvalOrder(rootNode);
for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++)
{
if ((*nodeIter)->UnitTest() == false)
return false;
}
fprintf(stderr, "\n\n");
return true;
}
//========================================
// This function performs SVD decomposition for different groups of learnable parameters

Просмотреть файл

@ -58,10 +58,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
protected:
//std containers such as list and map does not support class reference so we need to use pointer
typedef ComputationNode<ElemType>* ComputationNodePtr;
typedef std::pair<ComputationNodePtr, ComputationNodePtr> ComputationArc;
typedef std::pair<ComputationNodePtr, ComputationNodePtr> ComputationArc;
public:
ComputationNode(DEVICEID_TYPE deviceId) : m_functionValues(deviceId), m_gradientValues(deviceId)
ComputationNode(DEVICEID_TYPE deviceId): m_functionValues(deviceId), m_gradientValues(deviceId)
{
m_deviceId = deviceId;
m_loopId = -1;
@ -367,15 +367,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return m_indexInLoop;
}
std::wstring GetName() const
{
return m_nodeName;
}
std::wstring GetName() const
{
return m_nodeName;
}
std::vector<ComputationNodePtr> GetChildren() const
{
return m_children;
}
std::vector<ComputationNodePtr> GetChildren() const
{
return m_children;
}
bool isVisisted()
{
@ -687,39 +687,38 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
// [1/13/2015 erw] add to enumerate all the edges
void EnumerateArcs(std::unordered_set<ComputationNodePtr>& vistied, std::list<ComputationArc>& arcs )
// enumerate arcs that can be reached starting from the current node's children
// [in/out] visited record already visited nodes
{
std::list<ComputationNodePtr> tovisit;
// [1/13/2015 erw] add to enumerate all the edges
void EnumerateArcs(std::unordered_set<ComputationNodePtr>& vistied, std::list<ComputationArc>& arcs)
// enumerate arcs that can be reached starting from the current node's children
// [in/out] visited record already visited nodes
{
std::list<ComputationNodePtr> tovisit;
if (vistied.find(this) == vistied.end()) // only do when this node has not been visited before
{
tovisit.push_back(this);
if (vistied.find(this) == vistied.end()) // only do when this node has not been visited before
{
tovisit.push_back(this);
while (!tovisit.empty())
{
ComputationNodePtr curNode = tovisit.front();
tovisit.pop_front();
while (!tovisit.empty())
{
ComputationNodePtr curNode = tovisit.front();
tovisit.pop_front();
if (vistied.find(curNode) == vistied.end())
{
for (size_t i = 0; i < curNode->m_children.size(); i++)
{
arcs.push_back(ComputationArc(curNode, curNode->m_children[i]));
if (vistied.find(curNode) == vistied.end())
{
for (size_t i = 0; i < curNode->m_children.size(); i++)
{
arcs.push_back(ComputationArc(curNode, curNode->m_children[i]));
if (vistied.find(curNode->m_children[i]) == vistied.end()) // this children has not been visited before
{
tovisit.push_front(curNode->m_children[i]); // going to visit each of the children
}
}
vistied.insert(curNode);
}
}
}
}
if (vistied.find(curNode->m_children[i]) == vistied.end()) // this children has not been visited before
{
tovisit.push_front(curNode->m_children[i]); // going to visit each of the children
}
}
vistied.insert(curNode);
}
}
}
}
// NOTE: we should reimplement this to be thread-safe and use a larger than requested initialized memory block
// we can then just wrap that memory block in a matrix of the correct dimensions since it will be const no one can change it
@ -826,8 +825,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return nodes;
}
std::wstring CreateUniqNodeName() const
{
#ifdef USE_GUID_AS_NAME

Просмотреть файл

@ -515,72 +515,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
node->CopyTo(this, newName, flags);
}
bool UnitTest()
{
try{
size_t nInput = 2;
size_t nHidden = 3;
size_t nOutput = 3;
Inputs(0)->FunctionValues().Resize(nInput, nHidden);
Inputs(0)->FunctionValues().SetValue(1.0);
Inputs(1)->FunctionValues().TransferFromDeviceToDevice(m_deviceId, CPUDEVICE, true);
Inputs(1)->FunctionValues().SwitchToMatrixType(DENSE, matrixFormatDense, false);
Inputs(1)->FunctionValues().Resize(nHidden, nOutput);
Inputs(1)->FunctionValues().SetValue(0.0);
Inputs(1)->FunctionValues().SetValue(0, 0, 1.0);
Inputs(1)->FunctionValues().SetValue(1, 1, 2.0);
Inputs(1)->FunctionValues().TransferFromDeviceToDevice(CPUDEVICE, m_deviceId, true);
Inputs(1)->FunctionValues().SwitchToMatrixType(SPARSE, matrixFormatSparseCSC, true);
FunctionValues().Resize(nInput, nOutput);
EvaluateThisNode();
/// check with expected values
FunctionValues().TransferFromDeviceToDevice(m_deviceId, CPUDEVICE, true);
if (!ISCLOSE(FunctionValues()(0, 0), 1.0, EPSILON) ||
!ISCLOSE(FunctionValues()(0, 1), 2.0, EPSILON) ||
!ISCLOSE(FunctionValues()(1, 1), 2.0, EPSILON) )
throw("LSTMNode forward computation error");
if (FunctionValues().GetDeviceId() != m_deviceId)
FunctionValues().TransferFromDeviceToDevice(FunctionValues().GetDeviceId(), m_deviceId, true);
GradientValues().Resize(nInput, nOutput);
GradientValues().SetValue(1.0);
for (size_t i = 0; i < 2; i++)
{
Inputs(i)->GradientValues().Resize(Inputs(i)->FunctionValues().GetNumRows(), Inputs(i)->FunctionValues().GetNumCols());
Inputs(i)->GradientValues().SetValue(0);
}
for (size_t i = 0; i < 2; i++)
ComputeInputPartial(i);
/// check with expected values
if (!ISCLOSE(Inputs(1)->GradientValues()(0, 0), 2, EPSILON) /// bi
|| !ISCLOSE(Inputs(1)->GradientValues()(0, 1), 2, EPSILON) // Wxi
|| !ISCLOSE(Inputs(1)->GradientValues()(1, 0), 2, EPSILON) // Whi
|| !ISCLOSE(Inputs(1)->GradientValues()(2, 1), 2, EPSILON) // Wci
)
throw("LSTMNode gradient error on input gates");
for (size_t i = 0; i < 2; i++)
{
if (Inputs(i)->GradientValues().GetDeviceId() != m_deviceId)
Inputs(i)->GradientValues().TransferFromDeviceToDevice(Inputs(i)->GradientValues().GetDeviceId(), m_deviceId, true);
}
}
catch (...)
{
fprintf(stderr, "LookupTableNode unit test is not passed!");
return false;
}
fprintf(stderr, "LookupTableNode unit test passed!\n");
return true;
}
};
template class LookupTableNode<float>;

Просмотреть файл

@ -2521,8 +2521,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> m_temp;
};
template class CosDistanceWithNegativeSamplesNode<float>;
template class CosDistanceWithNegativeSamplesNode<double>;
template class CosDistanceWithNegativeSamplesNode<float>;
template class CosDistanceWithNegativeSamplesNode<double>;
template<class ElemType>
class TransposeNode : public ComputationNode<ElemType>

Просмотреть файл

@ -417,113 +417,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
CopyImageSizeFromInputs();
}
bool UnitTest()
{
try{
size_t nT = 3;
size_t nInput = 2;
/// backup
Matrix<ElemType> f0(m_deviceId), func(m_deviceId);
Matrix<ElemType> target(m_deviceId);
Matrix<ElemType> boundary(m_deviceId);
boundary.Resize(1, nT);
boundary.SetValue(SENTENCE_MIDDLE);
boundary.ColumnSlice(0, 1).SetValue(SENTENCE_BEGIN);
vector<MinibatchPackingFlag> minibatchPackingFlag;
minibatchPackingFlag.resize(nT);
std::fill(minibatchPackingFlag.begin(), minibatchPackingFlag.end(), MinibatchPackingFlag::None);
minibatchPackingFlag[1] = MinibatchPackingFlag::UtteranceStart;
ResetBound(&boundary, &minibatchPackingFlag);
f0 = Inputs(0)->FunctionValues();
func = FunctionValues();
target.Resize(nInput, nT);
for (size_t i = 0; i < nT; i++)
target(0, i) = 1;
Inputs(0)->FunctionValues().Resize(nInput, nT);
Inputs(0)->FunctionValues().SetValue(ConstOnes(nInput, nT, m_deviceId));
Inputs(0)->FunctionValues().SetValue((ElemType)0.1);
FunctionValues().Resize(nInput, nT);
FunctionValues().SetValue(0.0);
for (size_t t = 0; t < nT; t++)
{
EvaluateThisNode(t);
if (t == 0)
{
/// check with expected values
if (!ISCLOSE(FunctionValues()(0, 0), 0.0, EPSILON) ||
!ISCLOSE(FunctionValues()(0, 1), 0, EPSILON) ||
!ISCLOSE(FunctionValues()(0, 2), 0, EPSILON) )
throw("Delaynode forward computation error");
}
if (t == 1)
{
/// check with expected values
if (!ISCLOSE(FunctionValues()(0, 0), 0.0, EPSILON) ||
!ISCLOSE(FunctionValues()(0, 1), 0.1, EPSILON) ||
!ISCLOSE(FunctionValues()(0, 2), 0, EPSILON) )
throw("Delaynode forward computation error");
}
if (t == 2)
{
/// check with expected values
if (!ISCLOSE(FunctionValues()(0, 0), 0.0, EPSILON) ||
!ISCLOSE(FunctionValues()(0, 1), 0.1, EPSILON) ||
!ISCLOSE(FunctionValues()(0, 2), 0.1, EPSILON) )
throw("Delaynode forward computation error");
}
if (FunctionValues().GetDeviceId() != m_deviceId)
FunctionValues().TransferFromDeviceToDevice(FunctionValues().GetDeviceId(), m_deviceId, true);
}
GradientValues().Resize(nInput, nT);
GradientValues().SetValue(1.0);
Inputs(0)->GradientValues().Resize(nInput, nT);
Inputs(0)->GradientValues().SetValue(0);
for (int t = nT - 1; t >= 0; t--)
{
ComputeInputPartial(0, t);
if (t==nT-1)
/// check with expected values
if (!ISCLOSE(Inputs(0)->GradientValues()(0, 0), 0, EPSILON)
|| !ISCLOSE(Inputs(0)->GradientValues()(0, 1), 1, EPSILON)
|| !ISCLOSE(Inputs(0)->GradientValues()(0, 2), 0, EPSILON))
throw("DelayNode gradient error on input gates");
if (t==nT-2)
if (!ISCLOSE(Inputs(0)->GradientValues()(0, 0), 1, EPSILON)
|| !ISCLOSE(Inputs(0)->GradientValues()(0, 1), 1, EPSILON)
|| !ISCLOSE(Inputs(0)->GradientValues()(0, 2), 0, EPSILON))
throw("DelayNode gradient error on input gates");
if (t == 0)
if (!ISCLOSE(Inputs(0)->GradientValues()(0, 1), 1, EPSILON)
|| !ISCLOSE(Inputs(0)->GradientValues()(0, 1), 1, EPSILON)
|| !ISCLOSE(Inputs(0)->GradientValues()(0, 2), 0, EPSILON))
throw("DelayNode gradient error on input gates");
}
if (Inputs(0)->GradientValues().GetDeviceId() != m_deviceId)
Inputs(0)->GradientValues().TransferFromDeviceToDevice(Inputs(0)->GradientValues().GetDeviceId(), m_deviceId, true);
}
catch (...)
{
fprintf(stderr, "Delaynode unit test is not passed!");
return false;
}
fprintf(stderr, "Delaynode unit test passed!\n");
return true;
}
virtual void AttachInputs(const ComputationNodePtr inputNode)
{

Просмотреть файл

@ -822,105 +822,105 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
ComputationNetwork<ElemType>& SimpleNetworkBuilder<ElemType>::BuildSeqTrnLSTMNetworkFromDescription(size_t mbSize)
{
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
ULONG randomSeed = 1;
ComputationNetwork<ElemType>& SimpleNetworkBuilder<ElemType>::BuildSeqTrnLSTMNetworkFromDescription(size_t mbSize)
{
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
ULONG randomSeed = 1;
size_t numHiddenLayers = m_layerSizes.size() - 2;
size_t numHiddenLayers = m_layerSizes.size() - 2;
size_t numRecurrentLayers = m_recurrentLayers.size();
size_t numRecurrentLayers = m_recurrentLayers.size();
ComputationNodePtr input = nullptr, w = nullptr, b = nullptr, u = nullptr, e = nullptr, delay = nullptr, output = nullptr, label = nullptr, prior = nullptr;
ComputationNodePtr Wxo = nullptr, Who = nullptr, Wco = nullptr, bo = nullptr, Wxi = nullptr, Whi = nullptr, Wci = nullptr, bi = nullptr;
ComputationNodePtr Wxf = nullptr, Whf = nullptr, Wcf = nullptr, bf = nullptr, Wxc = nullptr, Whc = nullptr, bc = nullptr;
ComputationNodePtr ot = nullptr, it = nullptr, ft = nullptr, ct = nullptr, ht = nullptr;
ComputationNodePtr delayHI = nullptr, delayCI = nullptr, delayHO = nullptr, delayHF = nullptr, delayHC = nullptr, delayCF = nullptr, delayCC = nullptr;
ComputationNodePtr directWIO = nullptr, directInput = nullptr, directOutput = nullptr;
ComputationNodePtr outputFromEachLayer[MAX_DEPTH] = { nullptr };
ComputationNodePtr trans = nullptr;
ComputationNodePtr input = nullptr, w = nullptr, b = nullptr, u = nullptr, e = nullptr, delay = nullptr, output = nullptr, label = nullptr, prior = nullptr;
ComputationNodePtr Wxo = nullptr, Who = nullptr, Wco = nullptr, bo = nullptr, Wxi = nullptr, Whi = nullptr, Wci = nullptr, bi = nullptr;
ComputationNodePtr Wxf = nullptr, Whf = nullptr, Wcf = nullptr, bf = nullptr, Wxc = nullptr, Whc = nullptr, bc = nullptr;
ComputationNodePtr ot = nullptr, it = nullptr, ft = nullptr, ct = nullptr, ht = nullptr;
ComputationNodePtr delayHI = nullptr, delayCI = nullptr, delayHO = nullptr, delayHF = nullptr, delayHC = nullptr, delayCF = nullptr, delayCC = nullptr;
ComputationNodePtr directWIO = nullptr, directInput = nullptr, directOutput = nullptr;
ComputationNodePtr outputFromEachLayer[MAX_DEPTH] = { nullptr };
ComputationNodePtr trans = nullptr;
input = m_net->CreateInputNode(L"features", m_layerSizes[0], mbSize);
m_net->FeatureNodes().push_back(input);
input = m_net->CreateInputNode(L"features", m_layerSizes[0], mbSize);
m_net->FeatureNodes().push_back(input);
if (m_applyMeanVarNorm)
{
w = m_net->Mean(input);
b = m_net->InvStdDev(input);
output = m_net->PerDimMeanVarNormalization(input, w, b);
if (m_applyMeanVarNorm)
{
w = m_net->Mean(input);
b = m_net->InvStdDev(input);
output = m_net->PerDimMeanVarNormalization(input, w, b);
input = output;
}
input = output;
}
if (m_lookupTableOrder > 0)
{
e = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"E%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
output = m_net->LookupTable(e, input, L"LookupTable");
if (m_lookupTableOrder > 0)
{
e = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"E%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
output = m_net->LookupTable(e, input, L"LookupTable");
if (m_addDropoutNodes)
input = m_net->Dropout(output);
else
input = output;
if (m_addDropoutNodes)
input = m_net->Dropout(output);
else
input = output;
outputFromEachLayer[1] = input;
}
outputFromEachLayer[1] = input;
}
/// direct connect from input node to output node
/// direct connect from input node to output node
int recur_idx = 0;
int offset = m_lookupTableOrder > 0 ? 1 : 0;
if (numHiddenLayers > 0)
{
int recur_idx = 0;
int offset = m_lookupTableOrder > 0 ? 1 : 0;
if (numHiddenLayers > 0)
{
for (int i = offset; i<numHiddenLayers; i++)
{
if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1)
{
{
if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1)
{
output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, i, m_layerSizes[i] * (offset ? m_lookupTableOrder : 1), m_layerSizes[i + 1], input);
input = output;
recur_idx++;
}
else
{
recur_idx++;
}
else
{
u = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"U%d", i), m_layerSizes[i + 1], m_layerSizes[i] * (offset ? m_lookupTableOrder : 1));
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
b = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"B%d", i), m_layerSizes[i + 1], 1);
output = ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
}
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
b = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"B%d", i), m_layerSizes[i + 1], 1);
output = ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
}
if (m_addDropoutNodes)
input = m_net->Dropout(output);
else
input = output;
}
}
if (m_addDropoutNodes)
input = m_net->Dropout(output);
else
input = output;
}
}
w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"TimesBeforeSoftMax%d", numHiddenLayers), m_layerSizes[numHiddenLayers + 1], m_layerSizes[numHiddenLayers]);
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"TimesBeforeSoftMax%d", numHiddenLayers), m_layerSizes[numHiddenLayers + 1], m_layerSizes[numHiddenLayers]);
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
output = m_net->Times(w, input, L"outputsBeforeSoftmax");
output = m_net->Times(w, input, L"outputsBeforeSoftmax");
trans = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"TransProb%d", numHiddenLayers), m_layerSizes[numHiddenLayers + 1], m_layerSizes[numHiddenLayers + 1]);
trans = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"TransProb%d", numHiddenLayers), m_layerSizes[numHiddenLayers + 1], m_layerSizes[numHiddenLayers + 1]);
trans->FunctionValues().SetValue((ElemType)1.0 / m_layerSizes[numHiddenLayers + 1]);
// m_net->InitLearnableParameters(trans, m_uniformInit, randomSeed++, m_initValueScale);
// m_net->InitLearnableParameters(trans, m_uniformInit, randomSeed++, m_initValueScale);
trans->NeedGradient() = true;
label = m_net->CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1], mbSize);
AddTrainAndEvalCriterionNodes(output, label, nullptr, L"CRFTrainCriterion", L"CRFEvalCriterion", nullptr, trans);
label = m_net->CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1], mbSize);
AddTrainAndEvalCriterionNodes(output, label, nullptr, L"CRFTrainCriterion", L"CRFEvalCriterion", nullptr, trans);
input = output;
output = m_net->SequenceDecoder(label, input, trans, L"outputs");
m_net->OutputNodes().push_back(output);
m_net->OutputNodes().push_back(output);
output = m_net->Softmax(input, L"PosteriorProb");
output = m_net->Softmax(input, L"PosteriorProb");
}
}
m_net->ResetEvalTimeStamp();
m_net->ResetEvalTimeStamp();
return *m_net;
}
return *m_net;
}
template<class ElemType>
ComputationNetwork<ElemType>& SimpleNetworkBuilder<ElemType>::BuildCLASSLSTMNetworkFromDescription(size_t mbSize)
@ -1756,13 +1756,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ComputationNodePtr Wxo = nullptr, Who = nullptr, Wco = nullptr, bo = nullptr, Wxi = nullptr, Whi = nullptr, Wci = nullptr, bi = nullptr;
ComputationNodePtr clslogpostprob = nullptr;
ComputationNodePtr bias = nullptr;
ComputationNodePtr outputFromEachLayer[MAX_DEPTH] = { nullptr };
input = m_net->CreateSparseInputNode(L"features", m_layerSizes[0], mbSize);
m_net->FeatureNodes().push_back(input);
if (input->FunctionValues().GetDeviceId() != CPUDEVICE)
RuntimeError("BuildNCELSTMNetworkFromDescription : only support CPU sparse matrix input at this moment. Contact Yinggong Zhao (v-yinggz@microsoft.com) or Kaisheng Yao (kaisheny@microsoft.com) for updates.");
if (m_applyMeanVarNorm)
{
w = m_net->Mean(input);
@ -1782,28 +1780,53 @@ namespace Microsoft { namespace MSR { namespace CNTK {
input = m_net->Dropout(output);
else
input = output;
outputFromEachLayer[1] = input;
}
/// direct connect from input node to output node
int recur_idx = 0;
int offset = m_lookupTableOrder > 0 ? 1 : 0;
if (numHiddenLayers > 0)
{
output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
input = output;
outputFromEachLayer[offset + 1] = input;
for (int i = 1 + offset; i<numHiddenLayers; i++)
{
output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, i, m_layerSizes[i], m_layerSizes[i + 1], input);
if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i)
{
output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, i, m_layerSizes[i], m_layerSizes[i + 1], input);
recur_idx++;
}
else
{
u = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"U%d", i), m_layerSizes[i + 1], m_layerSizes[i]);
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
b = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"B%d", i), m_layerSizes[i + 1], 1);
output = ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
}
if (m_addDropoutNodes)
input = m_net->Dropout(output);
else
input = output;
outputFromEachLayer[i + 1] = input;
}
}
for (size_t i = offset; i < m_layerSizes.size(); i++)
{
/// add direct connect from each layers' output to the layer before the output layer
output = BuildDirectConnect(randomSeed, mbSize, i, (i > 1) ? m_layerSizes[i] : ((offset == 0) ? m_layerSizes[i] : m_layerSizes[i] * m_lookupTableOrder), m_layerSizes[numHiddenLayers], outputFromEachLayer[i], input);
if (output != nullptr)
input = output;
}
/// need to have [input_dim x output_dim] matrix
/// e.g., [200 x 10000], where 10000 is the vocabulary size
/// this is for speed-up issue as per word matrix can be simply obtained using column slice

Просмотреть файл

@ -3727,7 +3727,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
CUDA_CALL(cudaMemcpy(res,d_res,sizeof(long)*1,cudaMemcpyDeviceToHost));
CUDA_CALL(cudaFree(d_res));
if (res[0]!=0)
return bResult = true;
bResult = true;
delete [] res;
return bResult;
}