Merge with master branch.
This commit is contained in:
Родитель
b3157ff534
Коммит
2582c8f5a8
|
@ -23,21 +23,21 @@
|
|||
#include "HTKMLFReader.h"
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
template<class ElemType>
|
||||
void DATAREADER_API GetReader(IDataReader<ElemType>** preader)
|
||||
{
|
||||
// *preader = new SequenceReader<ElemType>();
|
||||
*preader = new HTKMLFReader<ElemType>();
|
||||
}
|
||||
|
||||
extern "C" DATAREADER_API void GetReaderF(IDataReader<float>** preader)
|
||||
{
|
||||
GetReader(preader);
|
||||
}
|
||||
extern "C" DATAREADER_API void GetReaderD(IDataReader<double>** preader)
|
||||
{
|
||||
GetReader(preader);
|
||||
}
|
||||
template<class ElemType>
|
||||
void DATAREADER_API GetReader(IDataReader<ElemType>** preader)
|
||||
{
|
||||
*preader = new HTKMLFReader<ElemType>();
|
||||
}
|
||||
|
||||
extern "C" DATAREADER_API void GetReaderF(IDataReader<float>** preader)
|
||||
{
|
||||
GetReader(preader);
|
||||
}
|
||||
extern "C" DATAREADER_API void GetReaderD(IDataReader<double>** preader)
|
||||
{
|
||||
GetReader(preader);
|
||||
}
|
||||
// Utility function, in ConfigFile.cpp, but HTKMLFReader doesn't need that code...
|
||||
|
||||
// Trim - trim white space off the start and end of the string
|
||||
|
|
|
@ -102,12 +102,12 @@ private:
|
|||
|
||||
|
||||
public:
|
||||
Matrix<ElemType> m_sentenceBegin;
|
||||
vector<MinibatchPackingFlag> m_minibatchPackingFlag;
|
||||
Matrix<ElemType> m_sentenceBegin;
|
||||
vector<MinibatchPackingFlag> m_minibatchPackingFlag;
|
||||
|
||||
bool mIgnoreSentenceBeginTag;
|
||||
HTKMLFReader() : m_sentenceBegin(CPUDEVICE) {
|
||||
}
|
||||
bool mIgnoreSentenceBeginTag;
|
||||
HTKMLFReader() : m_sentenceBegin(CPUDEVICE) {
|
||||
}
|
||||
virtual void Init(const ConfigParameters& config);
|
||||
virtual void Destroy() {delete this;}
|
||||
virtual ~HTKMLFReader();
|
||||
|
@ -118,7 +118,7 @@ public:
|
|||
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart=0);
|
||||
|
||||
virtual bool DataEnd(EndDataType endDataType);
|
||||
void SetSentenceSegBatch(Matrix<ElemType> &sentenceBegin, vector<MinibatchPackingFlag>& sentenceExistsBeginOrNoLabels);
|
||||
void SetSentenceSegBatch(Matrix<ElemType> &sentenceBegin, vector<MinibatchPackingFlag>& sentenceExistsBeginOrNoLabels);
|
||||
void SetSentenceEnd(int /*actualMbSize*/){};
|
||||
void SetRandomSeed(int);
|
||||
};
|
||||
|
|
|
@ -113,23 +113,23 @@ public:
|
|||
/// the second data stream has two sentences, with 0 indicating begining of sentences
|
||||
/// you may use 1 even if a sentence begins at that position, in this case, the trainer will carry over hidden states to the following
|
||||
/// frame.
|
||||
Matrix<ElemType> m_sentenceBegin;
|
||||
Matrix<ElemType> m_sentenceBegin;
|
||||
|
||||
/// a matrix of 1 x n_length
|
||||
/// 1 denotes the case that there exists sentnece begin or no_labels case in this frame
|
||||
/// 0 denotes such case is not in this frame
|
||||
|
||||
|
||||
vector<MinibatchPackingFlag> m_minibatchPackingFlag;
|
||||
vector<MinibatchPackingFlag> m_minibatchPackingFlag;
|
||||
|
||||
/// by default it is false
|
||||
/// if true, reader will set to SENTENCE_MIDDLE for time positions that are orignally correspond to SENTENCE_BEGIN
|
||||
/// set to true so that a current minibatch can uses state activities from the previous minibatch.
|
||||
/// default will have truncated BPTT, which only does BPTT inside a minibatch
|
||||
|
||||
bool mIgnoreSentenceBeginTag;
|
||||
HTKMLFReader() : m_sentenceBegin(CPUDEVICE) {
|
||||
}
|
||||
bool mIgnoreSentenceBeginTag;
|
||||
HTKMLFReader() : m_sentenceBegin(CPUDEVICE) {
|
||||
}
|
||||
|
||||
virtual void Init(const ConfigParameters& config);
|
||||
virtual void Destroy() {delete this;}
|
||||
|
|
|
@ -1418,10 +1418,7 @@ the first row is 0/1 bit for wether corresponding frame has sentence beginining/
|
|||
m_toProcess[i] = actualmbsizeOri;
|
||||
first = false;
|
||||
}
|
||||
else[merge]
|
||||
tool = kdiff3
|
||||
[mergetool "kdiff3"]
|
||||
cmd = \"C:\\\\Program Files (x86)\\\\KDiff3\\\\kdiff3\" $BASE $LOCAL $REMOTE -o $MERGED
|
||||
else
|
||||
{
|
||||
if (m_toProcess[i] != actualmbsizeOri)
|
||||
{
|
||||
|
@ -1475,10 +1472,7 @@ the first row is 0/1 bit for wether corresponding frame has sentence beginining/
|
|||
{
|
||||
// loop through the columns and set one value to 1
|
||||
// in the future we want to use a sparse matrix here
|
||||
for (int k=0; k < actualmbsizeOri; k++)[merge]
|
||||
tool = kdiff3
|
||||
[mergetool "kdiff3"]
|
||||
cmd = \"C:\\\\Program Files (x86)\\\\KDiff3\\\\kdiff3\" $BASE $LOCAL $REMOTE -o $MERGED
|
||||
for (int k=0; k < actualmbsizeOri; k++)
|
||||
{
|
||||
assert(uids[k] < dim);
|
||||
//labels(uids[i], i) = (ElemType)1;
|
||||
|
@ -1608,10 +1602,7 @@ the first row is 0/1 bit for wether corresponding frame has sentence beginining/
|
|||
break;
|
||||
}
|
||||
return ret;
|
||||
}[merge]
|
||||
tool = kdiff3
|
||||
[mergetool "kdiff3"]
|
||||
cmd = \"C:\\\\Program Files (x86)\\\\KDiff3\\\\kdiff3\" $BASE $LOCAL $REMOTE -o $MERGED
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void HTKMLFReader<ElemType>::SetSentenceEndInBatch(vector<size_t> &sentenceEnd)
|
||||
|
@ -1654,9 +1645,6 @@ the first row is 0/1 bit for wether corresponding frame has sentence beginining/
|
|||
}
|
||||
}
|
||||
|
||||
template class HTKMLFReader<float>;[merge]
|
||||
tool = kdiff3
|
||||
[mergetool "kdiff3"]
|
||||
cmd = \"C:\\\\Program Files (x86)\\\\KDiff3\\\\kdiff3\" $BASE $LOCAL $REMOTE -o $MERGED
|
||||
template class HTKMLFReader<float>;
|
||||
template class HTKMLFReader<double>;
|
||||
}}}
|
||||
|
|
|
@ -143,21 +143,21 @@ public:
|
|||
vector<stSentenceInfo> mSentenceIndex2SentenceInfo;
|
||||
|
||||
public:
|
||||
using LUSequenceParser<NumType, LabelType>::m_dimFeatures;
|
||||
using LUSequenceParser<NumType, LabelType>::m_dimLabelsIn;
|
||||
using LUSequenceParser<NumType, LabelType>::m_beginSequenceIn;
|
||||
using LUSequenceParser<NumType, LabelType>::m_endSequenceIn;
|
||||
using LUSequenceParser<NumType, LabelType>::m_dimLabelsOut;
|
||||
using LUSequenceParser<NumType, LabelType>::m_beginSequenceOut;
|
||||
using LUSequenceParser<NumType, LabelType>::m_endSequenceOut;
|
||||
using LUSequenceParser<NumType, LabelType>::m_traceLevel;
|
||||
using LUSequenceParser<NumType, LabelType>::m_beginTag;
|
||||
using LUSequenceParser<NumType, LabelType>::m_endTag;
|
||||
using LUSequenceParser<NumType, LabelType>::m_fileBuffer;
|
||||
using LUSequenceParser<NumType, LabelType>::m_inputs;
|
||||
using LUSequenceParser<NumType, LabelType>::m_labels;
|
||||
using LUSequenceParser<NumType, LabelType>::m_beginSequence;
|
||||
using LUSequenceParser<NumType, LabelType>::m_endSequence;
|
||||
using LUSequenceParser<NumType, LabelType>::m_dimFeatures;
|
||||
using LUSequenceParser<NumType, LabelType>::m_dimLabelsIn;
|
||||
using LUSequenceParser<NumType, LabelType>::m_beginSequenceIn;
|
||||
using LUSequenceParser<NumType, LabelType>::m_endSequenceIn;
|
||||
using LUSequenceParser<NumType, LabelType>::m_dimLabelsOut;
|
||||
using LUSequenceParser<NumType, LabelType>::m_beginSequenceOut;
|
||||
using LUSequenceParser<NumType, LabelType>::m_endSequenceOut;
|
||||
using LUSequenceParser<NumType, LabelType>::m_traceLevel;
|
||||
using LUSequenceParser<NumType, LabelType>::m_beginTag;
|
||||
using LUSequenceParser<NumType, LabelType>::m_endTag;
|
||||
using LUSequenceParser<NumType, LabelType>::m_fileBuffer;
|
||||
using LUSequenceParser<NumType, LabelType>::m_inputs;
|
||||
using LUSequenceParser<NumType, LabelType>::m_labels;
|
||||
using LUSequenceParser<NumType, LabelType>::m_beginSequence;
|
||||
using LUSequenceParser<NumType, LabelType>::m_endSequence;
|
||||
LUBatchLUSequenceParser() {
|
||||
};
|
||||
~LUBatchLUSequenceParser() {
|
||||
|
|
|
@ -200,53 +200,53 @@ public:
|
|||
using LabelType = wstring;
|
||||
using LabelIdType = long;
|
||||
using LUSequenceReader<ElemType>::mWordMappingFn;
|
||||
using LUSequenceReader<ElemType>::m_cachingReader;
|
||||
using LUSequenceReader<ElemType>::mWordMapping;
|
||||
using LUSequenceReader<ElemType>::mUnkStr;
|
||||
using LUSequenceReader<ElemType>::m_cachingWriter;
|
||||
using LUSequenceReader<ElemType>::m_featuresName;
|
||||
using LUSequenceReader<ElemType>::m_labelsName;
|
||||
using LUSequenceReader<ElemType>::labelInfoMin;
|
||||
using LUSequenceReader<ElemType>::labelInfoMax;
|
||||
using LUSequenceReader<ElemType>::m_featureDim;
|
||||
using LUSequenceReader<ElemType>::m_labelInfo;
|
||||
// using LUSequenceReader<ElemType>::m_labelInfoIn;
|
||||
using LUSequenceReader<ElemType>::m_mbStartSample;
|
||||
using LUSequenceReader<ElemType>::m_epoch;
|
||||
using LUSequenceReader<ElemType>::m_totalSamples;
|
||||
using LUSequenceReader<ElemType>::m_epochStartSample;
|
||||
using LUSequenceReader<ElemType>::m_seqIndex;
|
||||
using LUSequenceReader<ElemType>::m_endReached;
|
||||
using LUSequenceReader<ElemType>::m_readNextSampleLine;
|
||||
using LUSequenceReader<ElemType>::m_readNextSample;
|
||||
using LUSequenceReader<ElemType>::m_traceLevel;
|
||||
using LUSequenceReader<ElemType>::m_wordContext;
|
||||
using LUSequenceReader<ElemType>::m_featureCount;
|
||||
using typename LUSequenceReader<ElemType>::LabelInfo;
|
||||
using LUSequenceReader<ElemType>::labelInfoIn;
|
||||
using LUSequenceReader<ElemType>::labelInfoOut;
|
||||
// using LUSequenceReader<ElemType>::arrayLabels;
|
||||
using LUSequenceReader<ElemType>::m_readerConfig;
|
||||
using LUSequenceReader<ElemType>::m_featuresBuffer;
|
||||
using LUSequenceReader<ElemType>::m_labelsBuffer;
|
||||
using LUSequenceReader<ElemType>::m_labelsIdBuffer;
|
||||
using LUSequenceReader<ElemType>::m_mbSize;
|
||||
using LUSequenceReader<ElemType>::m_epochSize;
|
||||
using LUSequenceReader<ElemType>::m_featureData;
|
||||
using LUSequenceReader<ElemType>::m_sequence;
|
||||
using LUSequenceReader<ElemType>::m_labelData;
|
||||
using LUSequenceReader<ElemType>::m_labelIdData;
|
||||
using LUSequenceReader<ElemType>::m_idx2clsRead;
|
||||
using LUSequenceReader<ElemType>::m_clsinfoRead;
|
||||
using LUSequenceReader<ElemType>::m_featureWordContext;
|
||||
using LUSequenceReader<ElemType>::LoadLabelFile;
|
||||
using LUSequenceReader<ElemType>::ReleaseMemory;
|
||||
using LUSequenceReader<ElemType>::LMSetupEpoch;
|
||||
using LUSequenceReader<ElemType>::ChangeMaping;
|
||||
using LUSequenceReader<ElemType>::GetIdFromLabel;
|
||||
using LUSequenceReader<ElemType>::InitCache;
|
||||
using LUSequenceReader<ElemType>::mRandomize;
|
||||
using LUSequenceReader<ElemType>::m_seed;
|
||||
using LUSequenceReader<ElemType>::m_cachingReader;
|
||||
using LUSequenceReader<ElemType>::mWordMapping;
|
||||
using LUSequenceReader<ElemType>::mUnkStr;
|
||||
using LUSequenceReader<ElemType>::m_cachingWriter;
|
||||
using LUSequenceReader<ElemType>::m_featuresName;
|
||||
using LUSequenceReader<ElemType>::m_labelsName;
|
||||
using LUSequenceReader<ElemType>::labelInfoMin;
|
||||
using LUSequenceReader<ElemType>::labelInfoMax;
|
||||
using LUSequenceReader<ElemType>::m_featureDim;
|
||||
using LUSequenceReader<ElemType>::m_labelInfo;
|
||||
// using LUSequenceReader<ElemType>::m_labelInfoIn;
|
||||
using LUSequenceReader<ElemType>::m_mbStartSample;
|
||||
using LUSequenceReader<ElemType>::m_epoch;
|
||||
using LUSequenceReader<ElemType>::m_totalSamples;
|
||||
using LUSequenceReader<ElemType>::m_epochStartSample;
|
||||
using LUSequenceReader<ElemType>::m_seqIndex;
|
||||
using LUSequenceReader<ElemType>::m_endReached;
|
||||
using LUSequenceReader<ElemType>::m_readNextSampleLine;
|
||||
using LUSequenceReader<ElemType>::m_readNextSample;
|
||||
using LUSequenceReader<ElemType>::m_traceLevel;
|
||||
using LUSequenceReader<ElemType>::m_wordContext;
|
||||
using LUSequenceReader<ElemType>::m_featureCount;
|
||||
using typename LUSequenceReader<ElemType>::LabelInfo;
|
||||
using LUSequenceReader<ElemType>::labelInfoIn;
|
||||
using LUSequenceReader<ElemType>::labelInfoOut;
|
||||
// using LUSequenceReader<ElemType>::arrayLabels;
|
||||
using LUSequenceReader<ElemType>::m_readerConfig;
|
||||
using LUSequenceReader<ElemType>::m_featuresBuffer;
|
||||
using LUSequenceReader<ElemType>::m_labelsBuffer;
|
||||
using LUSequenceReader<ElemType>::m_labelsIdBuffer;
|
||||
using LUSequenceReader<ElemType>::m_mbSize;
|
||||
using LUSequenceReader<ElemType>::m_epochSize;
|
||||
using LUSequenceReader<ElemType>::m_featureData;
|
||||
using LUSequenceReader<ElemType>::m_sequence;
|
||||
using LUSequenceReader<ElemType>::m_labelData;
|
||||
using LUSequenceReader<ElemType>::m_labelIdData;
|
||||
using LUSequenceReader<ElemType>::m_idx2clsRead;
|
||||
using LUSequenceReader<ElemType>::m_clsinfoRead;
|
||||
using LUSequenceReader<ElemType>::m_featureWordContext;
|
||||
using LUSequenceReader<ElemType>::LoadLabelFile;
|
||||
using LUSequenceReader<ElemType>::ReleaseMemory;
|
||||
using LUSequenceReader<ElemType>::LMSetupEpoch;
|
||||
using LUSequenceReader<ElemType>::ChangeMaping;
|
||||
using LUSequenceReader<ElemType>::GetIdFromLabel;
|
||||
using LUSequenceReader<ElemType>::InitCache;
|
||||
using LUSequenceReader<ElemType>::mRandomize;
|
||||
using LUSequenceReader<ElemType>::m_seed;
|
||||
using LUSequenceReader<ElemType>::mTotalSentenceSofar;
|
||||
using LUSequenceReader<ElemType>::GetSentenceEndIdFromOutputLabel;
|
||||
private:
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -27,45 +27,45 @@ enum LabelKind
|
|||
template<class ElemType>
|
||||
class LibSVM_BinaryInput {
|
||||
private:
|
||||
HANDLE m_hndl;
|
||||
HANDLE m_filemap;
|
||||
HANDLE m_header;
|
||||
HANDLE m_offsets;
|
||||
HANDLE m_data;
|
||||
HANDLE m_hndl;
|
||||
HANDLE m_filemap;
|
||||
HANDLE m_header;
|
||||
HANDLE m_offsets;
|
||||
HANDLE m_data;
|
||||
|
||||
//void* header_orig; // Don't need this since the header is at the start of the file
|
||||
void* offsets_orig;
|
||||
void* data_orig;
|
||||
//void* header_orig; // Don't need this since the header is at the start of the file
|
||||
void* offsets_orig;
|
||||
void* data_orig;
|
||||
|
||||
void* header_buffer;
|
||||
void* offsets_buffer;
|
||||
void* data_buffer;
|
||||
void* header_buffer;
|
||||
void* offsets_buffer;
|
||||
void* data_buffer;
|
||||
|
||||
size_t m_dim;
|
||||
size_t mbSize;
|
||||
size_t MAX_BUFFER = 400;
|
||||
size_t m_labelDim;
|
||||
size_t m_dim;
|
||||
size_t mbSize;
|
||||
size_t MAX_BUFFER = 400;
|
||||
size_t m_labelDim;
|
||||
|
||||
ElemType* values; // = (ElemType*)malloc(sizeof(ElemType)* 230 * 1024);
|
||||
int64_t* offsets; // = (int*)malloc(sizeof(int)* 230 * 1024);
|
||||
int32_t* colIndices; // = (int*)malloc(sizeof(int) * (batchsize + 1));
|
||||
int32_t* rowIndices; // = (int*)malloc(sizeof(int) * MAX_BUFFER * batchsize);
|
||||
int32_t* classIndex; // = (int*)malloc(sizeof(int) * batchsize);
|
||||
ElemType* classWeight; // = (ElemType*)malloc(sizeof(ElemType) * batchsize);
|
||||
ElemType* values; // = (ElemType*)malloc(sizeof(ElemType)* 230 * 1024);
|
||||
int64_t* offsets; // = (int*)malloc(sizeof(int)* 230 * 1024);
|
||||
int32_t* colIndices; // = (int*)malloc(sizeof(int) * (batchsize + 1));
|
||||
int32_t* rowIndices; // = (int*)malloc(sizeof(int) * MAX_BUFFER * batchsize);
|
||||
int32_t* classIndex; // = (int*)malloc(sizeof(int) * batchsize);
|
||||
ElemType* classWeight; // = (ElemType*)malloc(sizeof(ElemType) * batchsize);
|
||||
|
||||
ElemType* m_labelsBuffer;
|
||||
public:
|
||||
int64_t numRows;
|
||||
int64_t numBatches;
|
||||
int32_t numCols;
|
||||
int64_t totalNNz;
|
||||
int64_t numRows;
|
||||
int64_t numBatches;
|
||||
int32_t numCols;
|
||||
int64_t totalNNz;
|
||||
|
||||
LibSVM_BinaryInput();
|
||||
~LibSVM_BinaryInput();
|
||||
void Init(std::wstring fileName, size_t dim);
|
||||
bool SetupEpoch( size_t minibatchSize);
|
||||
bool Next_Batch(Matrix<ElemType>& features, Matrix<ElemType>& labels, size_t actualmbsize, int batchIndex);
|
||||
void Dispose();
|
||||
LibSVM_BinaryInput();
|
||||
~LibSVM_BinaryInput();
|
||||
void Init(std::wstring fileName, size_t dim);
|
||||
bool SetupEpoch( size_t minibatchSize);
|
||||
bool Next_Batch(Matrix<ElemType>& features, Matrix<ElemType>& labels, size_t actualmbsize, int batchIndex);
|
||||
void Dispose();
|
||||
};
|
||||
|
||||
template<class ElemType>
|
||||
|
@ -75,11 +75,11 @@ class LibSVMBinaryReader : public IDataReader<ElemType>
|
|||
// typedef std::string LabelType;
|
||||
// typedef unsigned LabelIdType;
|
||||
private:
|
||||
int* read_order; // array to shuffle to reorder the dataset
|
||||
std::wstring m_featuresName;
|
||||
size_t m_featuresDim;
|
||||
LibSVM_BinaryInput<ElemType> featuresInput;
|
||||
int64_t m_processedMinibatches;
|
||||
int* read_order; // array to shuffle to reorder the dataset
|
||||
std::wstring m_featuresName;
|
||||
size_t m_featuresDim;
|
||||
LibSVM_BinaryInput<ElemType> featuresInput;
|
||||
int64_t m_processedMinibatches;
|
||||
|
||||
size_t m_mbSize; // size of minibatch requested
|
||||
LabelIdType m_labelIdMax; // maximum label ID we have encountered so far
|
||||
|
@ -126,7 +126,7 @@ private:
|
|||
|
||||
size_t RandomizeSweep(size_t epochSample);
|
||||
//bool Randomize() {return m_randomizeRange != randomizeNone;}
|
||||
bool Randomize() { return false; }
|
||||
bool Randomize() { return false; }
|
||||
void SetupEpoch();
|
||||
void StoreLabel(ElemType& labelStore, const LabelType& labelValue);
|
||||
size_t RecordsToRead(size_t mbStartSample, bool tail=false);
|
||||
|
@ -138,7 +138,7 @@ private:
|
|||
public:
|
||||
virtual void Init(const ConfigParameters& config);
|
||||
virtual void Destroy();
|
||||
LibSVMBinaryReader() { m_qfeaturesBuffer = NULL; m_dfeaturesBuffer = NULL; m_labelsBuffer = NULL; }
|
||||
LibSVMBinaryReader() { m_qfeaturesBuffer = NULL; m_dfeaturesBuffer = NULL; m_labelsBuffer = NULL; }
|
||||
virtual ~LibSVMBinaryReader();
|
||||
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
|
||||
virtual bool GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices);
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
|
||||
static inline size_t RoundUp(size_t m, size_t n)
|
||||
{
|
||||
if (m % n == 0) return m / n;
|
||||
else return m / n + 1;
|
||||
if (m % n == 0) return m / n;
|
||||
else return m / n + 1;
|
||||
}
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
@ -35,8 +35,8 @@ template<class ElemType>
|
|||
class UCIFastReader : public IDataReader<ElemType>
|
||||
{
|
||||
public:
|
||||
using LabelType = typename IDataReader<ElemType>::LabelType;
|
||||
using LabelIdType = typename IDataReader<ElemType>::LabelIdType;
|
||||
using LabelType = typename IDataReader<ElemType>::LabelType;
|
||||
using LabelIdType = typename IDataReader<ElemType>::LabelIdType;
|
||||
using IDataReader<ElemType>::mBlgSize;
|
||||
//typedef std::string LabelType;
|
||||
//typedef unsigned LabelIdType;
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include "SynchronousExecutionEngine.h"
|
||||
#include "ModelEditLanguage.h"
|
||||
#include "SGD.h"
|
||||
#include "commandArgUtil.h"
|
||||
#include "MultiNetworksSGD.h"
|
||||
#include "SimpleEvaluator.h"
|
||||
#include "SimpleOutputWriter.h"
|
||||
|
@ -431,20 +432,20 @@ void DoCreateLabelMap(const ConfigParameters& config)
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// for action SVD
|
||||
// An action "SVD" performs the following process to transform an existing model:
|
||||
// 1. For a Learnable Parameter A whose name matches with the user specified regex,
|
||||
// A is approximated by two matrice multiplication B*C ;
|
||||
// 2. In order to keep the low-rank structure in training,
|
||||
// the original A node will be replaced by A' whose opertions is Times
|
||||
// with its left children being B and right chilren being
|
||||
// An action "SVD" performs the following process to transform an existing model:
|
||||
// 1. For a Learnable Parameter A whose name matches with the user specified regex,
|
||||
// A is approximated by two matrice multiplication B*C ;
|
||||
// 2. In order to keep the low-rank structure in training,
|
||||
// the original A node will be replaced by A' whose opertions is Times
|
||||
// with its left children being B and right chilren being
|
||||
//
|
||||
// To use this command,
|
||||
// user need to specify:
|
||||
// 1) modelPath -- path to the existing model
|
||||
// 2) outputmodelPath -- where to write the transformed model
|
||||
// 3) KeepRatio -- how many percentage of energy we want to keep
|
||||
// 4) ParameterName -- name (regex) of the parameter node we want to perform a SVD decomposition
|
||||
//
|
||||
// To use this command,
|
||||
// user need to specify:
|
||||
// 1) modelPath -- path to the existing model
|
||||
// 2) outputmodelPath -- where to write the transformed model
|
||||
// 3) KeepRatio -- how many percentage of energy we want to keep
|
||||
// 4) ParameterName -- name (regex) of the parameter node we want to perform a SVD decomposition
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// helper function for DoParameterSVD
|
||||
|
@ -524,9 +525,9 @@ void DoParameterSVD(const ConfigParameters& config)
|
|||
///
|
||||
/// the outputs are the vocabulary, word2class and class2idx file with the information below
|
||||
/// vocabulary format is as follows
|
||||
/// 0 42068 </s> 0
|
||||
/// 1 50770 the 0
|
||||
/// 2 45020 <unk> 1
|
||||
/// 0 42068 </s> 0
|
||||
/// 1 50770 the 0
|
||||
/// 2 45020 <unk> 1
|
||||
/// the first column is word index
|
||||
/// the last column is class index of the word
|
||||
/// the second column and the third column are for information purpose and
|
||||
|
@ -559,7 +560,7 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
|
|||
if (!fp)
|
||||
RuntimeError("inputFile cannot be read");
|
||||
if (nbrCls > 0)
|
||||
cls2idx.Resize(nbrCls, 1);
|
||||
cls2idx.Resize(nbrCls, 1);
|
||||
std::unordered_map<string, double> v_count;
|
||||
|
||||
/// get line
|
||||
|
@ -596,7 +597,7 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
|
|||
std::priority_queue<stringdouble, std::vector<stringdouble>, compare_second<stringdouble> >
|
||||
q(compare_second<stringdouble>(), std::vector<stringdouble>(v_count.begin(), v_count.end()));
|
||||
|
||||
int wordCountLessCutoff = v_count.size();
|
||||
size_t wordCountLessCutoff = v_count.size();
|
||||
if (cutoff > 0)
|
||||
for (std::unordered_map<std::string, double>::iterator iter = v_count.begin(); iter != v_count.end(); iter++)
|
||||
if (iter->second <= cutoff)
|
||||
|
@ -646,10 +647,10 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
|
|||
double dd = 0;
|
||||
if (nbrCls > 0)
|
||||
{
|
||||
for (std::unordered_map<std::string, double>::iterator iter = removed.begin(); iter != removed.end(); iter++)
|
||||
total += iter->second;
|
||||
for (std::unordered_map<std::string, double>::iterator iter = removed.begin(); iter != removed.end(); iter++)
|
||||
dd += sqrt(iter->second / total);
|
||||
for (std::unordered_map<std::string, double>::iterator iter = removed.begin(); iter != removed.end(); iter++)
|
||||
total += iter->second;
|
||||
for (std::unordered_map<std::string, double>::iterator iter = removed.begin(); iter != removed.end(); iter++)
|
||||
dd += sqrt(iter->second / total);
|
||||
}
|
||||
|
||||
double df = 0;
|
||||
|
@ -662,11 +663,11 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
|
|||
double freq = p.top().second;
|
||||
if (nbrCls > 0)
|
||||
{
|
||||
df += sqrt(freq / total) / dd;
|
||||
if (df > 1)
|
||||
df = 1;
|
||||
if (df > 1.0 * (class_id + 1) / nbrCls && class_id < nbrCls)
|
||||
class_id++;
|
||||
df += sqrt(freq / total) / dd;
|
||||
if (df > 1)
|
||||
df = 1;
|
||||
if (df > 1.0 * (class_id + 1) / nbrCls && class_id < nbrCls)
|
||||
class_id++;
|
||||
}
|
||||
|
||||
size_t wid = m_words.size();
|
||||
|
@ -676,7 +677,7 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
|
|||
|
||||
m_count[wid] = freq;
|
||||
if (nbrCls > 0)
|
||||
m_class[wid] = class_id;
|
||||
m_class[wid] = class_id;
|
||||
p.pop();
|
||||
}
|
||||
|
||||
|
@ -685,7 +686,7 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
|
|||
for (size_t i = 0; i < m_index.size(); i++)
|
||||
{
|
||||
if (nbrCls > 0)
|
||||
wrd2cls(i, 0) = (ElemType)m_class[i];
|
||||
wrd2cls(i, 0) = (ElemType)m_class[i];
|
||||
long long clsIdx = nbrCls > 0 ? m_class[i] : 0;
|
||||
if (nbrCls > 0 && clsIdx != prevClsIdx)
|
||||
{
|
||||
|
@ -1059,49 +1060,49 @@ void DoConvertFromDbn(const ConfigParameters& config)
|
|||
template <typename ElemType>
|
||||
void DoTopologyPlot(const ConfigParameters& config)
|
||||
{
|
||||
wstring modelPath = config("modelPath");
|
||||
wstring outdot = config("outputDotFile"); // filename for the dot language output, if not specified, %modelpath%.dot will be used
|
||||
wstring outRending = config("outputFile"); // filename for the rendered topology plot
|
||||
// this can be empty, in that case no rendering will be done
|
||||
// or if this is set, renderCmd must be set, so CNTK will call re
|
||||
wstring RenderCmd = config("RenderCmd"); // if this option is set, then CNTK will call the render to convert the outdotFile to a graph
|
||||
// e.g. "d:\Tools\graphviz\bin\dot.exe -Tpng -x <IN> -o<OUT>"
|
||||
// where <IN> and <OUT> are two special placeholders
|
||||
wstring modelPath = config("modelPath");
|
||||
wstring outdot = config("outputDotFile"); // filename for the dot language output, if not specified, %modelpath%.dot will be used
|
||||
wstring outRending = config("outputFile"); // filename for the rendered topology plot
|
||||
// this can be empty, in that case no rendering will be done
|
||||
// or if this is set, renderCmd must be set, so CNTK will call re
|
||||
wstring RenderCmd = config("RenderCmd"); // if this option is set, then CNTK will call the render to convert the outdotFile to a graph
|
||||
// e.g. "d:\Tools\graphviz\bin\dot.exe -Tpng -x <IN> -o<OUT>"
|
||||
// where <IN> and <OUT> are two special placeholders
|
||||
|
||||
//========================================
|
||||
// Sec. 1 option check
|
||||
//========================================
|
||||
if (outdot.empty())
|
||||
{
|
||||
outdot = modelPath +L".dot";
|
||||
}
|
||||
//========================================
|
||||
// Sec. 1 option check
|
||||
//========================================
|
||||
if (outdot.empty())
|
||||
{
|
||||
outdot = modelPath +L".dot";
|
||||
}
|
||||
|
||||
wstring rescmd;
|
||||
if (!outRending.empty()) // we need to render the plot
|
||||
{
|
||||
std::wregex inputPlaceHolder(L"(.+)(<IN>)(.*)");
|
||||
std::wregex outputPlaceHolder(L"(.+)(<OUT>)(.*)");
|
||||
wstring rescmd;
|
||||
if (!outRending.empty()) // we need to render the plot
|
||||
{
|
||||
std::wregex inputPlaceHolder(L"(.+)(<IN>)(.*)");
|
||||
std::wregex outputPlaceHolder(L"(.+)(<OUT>)(.*)");
|
||||
|
||||
rescmd = regex_replace(RenderCmd, inputPlaceHolder, L"$1"+outdot+L"$3");
|
||||
rescmd = regex_replace(rescmd, outputPlaceHolder, L"$1"+outRending+L"$3");
|
||||
}
|
||||
rescmd = regex_replace(RenderCmd, inputPlaceHolder, L"$1"+outdot+L"$3");
|
||||
rescmd = regex_replace(rescmd, outputPlaceHolder, L"$1"+outRending+L"$3");
|
||||
}
|
||||
|
||||
|
||||
ComputationNetwork<ElemType> net(-1);
|
||||
net.LoadFromFile(modelPath);
|
||||
net.PlotNetworkTopology(outdot);
|
||||
ComputationNetwork<ElemType> net(-1);
|
||||
net.LoadFromFile(modelPath);
|
||||
net.PlotNetworkTopology(outdot);
|
||||
fprintf(stderr, "Output network description in dot language to %S\n", outdot.c_str());
|
||||
|
||||
if (!outRending.empty())
|
||||
{
|
||||
if (!outRending.empty())
|
||||
{
|
||||
fprintf(stderr, "Executing a third-part tool for rendering dot:\n%S\n", rescmd.c_str());
|
||||
#ifdef __unix__
|
||||
system(msra::strfun::utf8(rescmd).c_str());
|
||||
#else
|
||||
_wsystem(rescmd.c_str());
|
||||
_wsystem(rescmd.c_str());
|
||||
#endif
|
||||
fprintf(stderr, "Done\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -1152,7 +1153,7 @@ void DoCommand(const ConfigParameters& config)
|
|||
else if (action[j] == "createLabelMap")
|
||||
DoCreateLabelMap<ElemType>(commandParams);
|
||||
else if (action[j] == "writeWordAndClass")
|
||||
DoWriteWordAndClassInfo<ElemType>(commandParams);
|
||||
DoWriteWordAndClassInfo<ElemType>(commandParams);
|
||||
else if (action[j] == "plot")
|
||||
DoTopologyPlot<ElemType>(commandParams);
|
||||
else if (action[j] == "SVD")
|
||||
|
|
|
@ -2943,70 +2943,8 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
call unit test of each node
|
||||
this adds a verification of the correctness of node operations.
|
||||
*/
|
||||
bool UnitTest(bool allowFragment = false)
|
||||
{
|
||||
// currently only validates nodes, we should validate everything we can
|
||||
if (FeatureNodes().size() == 0 && !allowFragment)
|
||||
{
|
||||
throw std::runtime_error("No Feature nodes specified");
|
||||
}
|
||||
// first give criteria nodes as root node
|
||||
if (FinalCriterionNodes().size() > 0)
|
||||
{
|
||||
for (auto node : FinalCriterionNodes())
|
||||
{
|
||||
if (!allowFragment) FormRecurentLoops(node);
|
||||
size_t actualMBSize = this->GetActualMBSize();
|
||||
this->SetActualMiniBatchSize(actualMBSize);
|
||||
if (UnitTest(node) == false)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (!allowFragment)
|
||||
{
|
||||
throw std::runtime_error("No Criterion nodes specified");
|
||||
}
|
||||
// now output nodes
|
||||
if (OutputNodes().size() > 0)
|
||||
{
|
||||
for (auto node : OutputNodes())
|
||||
if (UnitTest(node) == false)
|
||||
return false;
|
||||
}
|
||||
else if (!allowFragment)
|
||||
{
|
||||
throw std::runtime_error("No Output nodes specified");
|
||||
}
|
||||
// now evaluation nodes
|
||||
if (EvaluationNodes().size() > 0)
|
||||
{
|
||||
for (auto node : EvaluationNodes())
|
||||
if (UnitTest(node) == false)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool UnitTest(const ComputationNodePtr rootNode)
|
||||
{
|
||||
fprintf(stderr, "\n\n Unit test node %ws \n", rootNode->NodeName().c_str());
|
||||
|
||||
std::list<ComputationNodePtr>& nodes = GetEvalOrder(rootNode);
|
||||
|
||||
for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++)
|
||||
{
|
||||
if ((*nodeIter)->UnitTest() == false)
|
||||
return false;
|
||||
}
|
||||
|
||||
fprintf(stderr, "\n\n");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//========================================
|
||||
// This function performs SVD decomposition for different groups of learnable parameters
|
||||
|
|
|
@ -58,10 +58,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
protected:
|
||||
//std containers such as list and map does not support class reference so we need to use pointer
|
||||
typedef ComputationNode<ElemType>* ComputationNodePtr;
|
||||
typedef std::pair<ComputationNodePtr, ComputationNodePtr> ComputationArc;
|
||||
typedef std::pair<ComputationNodePtr, ComputationNodePtr> ComputationArc;
|
||||
|
||||
public:
|
||||
ComputationNode(DEVICEID_TYPE deviceId) : m_functionValues(deviceId), m_gradientValues(deviceId)
|
||||
ComputationNode(DEVICEID_TYPE deviceId): m_functionValues(deviceId), m_gradientValues(deviceId)
|
||||
{
|
||||
m_deviceId = deviceId;
|
||||
m_loopId = -1;
|
||||
|
@ -367,15 +367,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return m_indexInLoop;
|
||||
}
|
||||
|
||||
std::wstring GetName() const
|
||||
{
|
||||
return m_nodeName;
|
||||
}
|
||||
std::wstring GetName() const
|
||||
{
|
||||
return m_nodeName;
|
||||
}
|
||||
|
||||
std::vector<ComputationNodePtr> GetChildren() const
|
||||
{
|
||||
return m_children;
|
||||
}
|
||||
std::vector<ComputationNodePtr> GetChildren() const
|
||||
{
|
||||
return m_children;
|
||||
}
|
||||
|
||||
bool isVisisted()
|
||||
{
|
||||
|
@ -687,39 +687,38 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
}
|
||||
|
||||
// [1/13/2015 erw] add to enumerate all the edges
|
||||
void EnumerateArcs(std::unordered_set<ComputationNodePtr>& vistied, std::list<ComputationArc>& arcs )
|
||||
// enumerate arcs that can be reached starting from the current node's children
|
||||
// [in/out] visited record already visited nodes
|
||||
{
|
||||
std::list<ComputationNodePtr> tovisit;
|
||||
// [1/13/2015 erw] add to enumerate all the edges
|
||||
void EnumerateArcs(std::unordered_set<ComputationNodePtr>& vistied, std::list<ComputationArc>& arcs)
|
||||
// enumerate arcs that can be reached starting from the current node's children
|
||||
// [in/out] visited record already visited nodes
|
||||
{
|
||||
std::list<ComputationNodePtr> tovisit;
|
||||
|
||||
if (vistied.find(this) == vistied.end()) // only do when this node has not been visited before
|
||||
{
|
||||
tovisit.push_back(this);
|
||||
if (vistied.find(this) == vistied.end()) // only do when this node has not been visited before
|
||||
{
|
||||
tovisit.push_back(this);
|
||||
|
||||
while (!tovisit.empty())
|
||||
{
|
||||
ComputationNodePtr curNode = tovisit.front();
|
||||
tovisit.pop_front();
|
||||
while (!tovisit.empty())
|
||||
{
|
||||
ComputationNodePtr curNode = tovisit.front();
|
||||
tovisit.pop_front();
|
||||
|
||||
if (vistied.find(curNode) == vistied.end())
|
||||
{
|
||||
for (size_t i = 0; i < curNode->m_children.size(); i++)
|
||||
{
|
||||
arcs.push_back(ComputationArc(curNode, curNode->m_children[i]));
|
||||
if (vistied.find(curNode) == vistied.end())
|
||||
{
|
||||
for (size_t i = 0; i < curNode->m_children.size(); i++)
|
||||
{
|
||||
arcs.push_back(ComputationArc(curNode, curNode->m_children[i]));
|
||||
|
||||
if (vistied.find(curNode->m_children[i]) == vistied.end()) // this children has not been visited before
|
||||
{
|
||||
tovisit.push_front(curNode->m_children[i]); // going to visit each of the children
|
||||
}
|
||||
}
|
||||
vistied.insert(curNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (vistied.find(curNode->m_children[i]) == vistied.end()) // this children has not been visited before
|
||||
{
|
||||
tovisit.push_front(curNode->m_children[i]); // going to visit each of the children
|
||||
}
|
||||
}
|
||||
vistied.insert(curNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: we should reimplement this to be thread-safe and use a larger than requested initialized memory block
|
||||
// we can then just wrap that memory block in a matrix of the correct dimensions since it will be const no one can change it
|
||||
|
@ -826,8 +825,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return nodes;
|
||||
}
|
||||
|
||||
|
||||
|
||||
std::wstring CreateUniqNodeName() const
|
||||
{
|
||||
#ifdef USE_GUID_AS_NAME
|
||||
|
|
|
@ -515,72 +515,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
node->CopyTo(this, newName, flags);
|
||||
}
|
||||
|
||||
bool UnitTest()
|
||||
{
|
||||
try{
|
||||
size_t nInput = 2;
|
||||
size_t nHidden = 3;
|
||||
size_t nOutput = 3;
|
||||
|
||||
Inputs(0)->FunctionValues().Resize(nInput, nHidden);
|
||||
Inputs(0)->FunctionValues().SetValue(1.0);
|
||||
Inputs(1)->FunctionValues().TransferFromDeviceToDevice(m_deviceId, CPUDEVICE, true);
|
||||
Inputs(1)->FunctionValues().SwitchToMatrixType(DENSE, matrixFormatDense, false);
|
||||
Inputs(1)->FunctionValues().Resize(nHidden, nOutput);
|
||||
Inputs(1)->FunctionValues().SetValue(0.0);
|
||||
Inputs(1)->FunctionValues().SetValue(0, 0, 1.0);
|
||||
Inputs(1)->FunctionValues().SetValue(1, 1, 2.0);
|
||||
Inputs(1)->FunctionValues().TransferFromDeviceToDevice(CPUDEVICE, m_deviceId, true);
|
||||
Inputs(1)->FunctionValues().SwitchToMatrixType(SPARSE, matrixFormatSparseCSC, true);
|
||||
FunctionValues().Resize(nInput, nOutput);
|
||||
|
||||
EvaluateThisNode();
|
||||
|
||||
/// check with expected values
|
||||
FunctionValues().TransferFromDeviceToDevice(m_deviceId, CPUDEVICE, true);
|
||||
if (!ISCLOSE(FunctionValues()(0, 0), 1.0, EPSILON) ||
|
||||
!ISCLOSE(FunctionValues()(0, 1), 2.0, EPSILON) ||
|
||||
!ISCLOSE(FunctionValues()(1, 1), 2.0, EPSILON) )
|
||||
throw("LSTMNode forward computation error");
|
||||
|
||||
if (FunctionValues().GetDeviceId() != m_deviceId)
|
||||
FunctionValues().TransferFromDeviceToDevice(FunctionValues().GetDeviceId(), m_deviceId, true);
|
||||
|
||||
GradientValues().Resize(nInput, nOutput);
|
||||
GradientValues().SetValue(1.0);
|
||||
for (size_t i = 0; i < 2; i++)
|
||||
{
|
||||
Inputs(i)->GradientValues().Resize(Inputs(i)->FunctionValues().GetNumRows(), Inputs(i)->FunctionValues().GetNumCols());
|
||||
Inputs(i)->GradientValues().SetValue(0);
|
||||
}
|
||||
for (size_t i = 0; i < 2; i++)
|
||||
ComputeInputPartial(i);
|
||||
|
||||
/// check with expected values
|
||||
if (!ISCLOSE(Inputs(1)->GradientValues()(0, 0), 2, EPSILON) /// bi
|
||||
|| !ISCLOSE(Inputs(1)->GradientValues()(0, 1), 2, EPSILON) // Wxi
|
||||
|| !ISCLOSE(Inputs(1)->GradientValues()(1, 0), 2, EPSILON) // Whi
|
||||
|| !ISCLOSE(Inputs(1)->GradientValues()(2, 1), 2, EPSILON) // Wci
|
||||
)
|
||||
throw("LSTMNode gradient error on input gates");
|
||||
|
||||
for (size_t i = 0; i < 2; i++)
|
||||
{
|
||||
if (Inputs(i)->GradientValues().GetDeviceId() != m_deviceId)
|
||||
Inputs(i)->GradientValues().TransferFromDeviceToDevice(Inputs(i)->GradientValues().GetDeviceId(), m_deviceId, true);
|
||||
}
|
||||
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
fprintf(stderr, "LookupTableNode unit test is not passed!");
|
||||
return false;
|
||||
}
|
||||
|
||||
fprintf(stderr, "LookupTableNode unit test passed!\n");
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template class LookupTableNode<float>;
|
||||
|
|
|
@ -2521,8 +2521,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
Matrix<ElemType> m_temp;
|
||||
};
|
||||
|
||||
template class CosDistanceWithNegativeSamplesNode<float>;
|
||||
template class CosDistanceWithNegativeSamplesNode<double>;
|
||||
template class CosDistanceWithNegativeSamplesNode<float>;
|
||||
template class CosDistanceWithNegativeSamplesNode<double>;
|
||||
|
||||
template<class ElemType>
|
||||
class TransposeNode : public ComputationNode<ElemType>
|
||||
|
|
|
@ -417,113 +417,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
CopyImageSizeFromInputs();
|
||||
}
|
||||
|
||||
bool UnitTest()
|
||||
{
|
||||
try{
|
||||
size_t nT = 3;
|
||||
size_t nInput = 2;
|
||||
|
||||
/// backup
|
||||
Matrix<ElemType> f0(m_deviceId), func(m_deviceId);
|
||||
Matrix<ElemType> target(m_deviceId);
|
||||
|
||||
Matrix<ElemType> boundary(m_deviceId);
|
||||
boundary.Resize(1, nT);
|
||||
boundary.SetValue(SENTENCE_MIDDLE);
|
||||
boundary.ColumnSlice(0, 1).SetValue(SENTENCE_BEGIN);
|
||||
vector<MinibatchPackingFlag> minibatchPackingFlag;
|
||||
minibatchPackingFlag.resize(nT);
|
||||
std::fill(minibatchPackingFlag.begin(), minibatchPackingFlag.end(), MinibatchPackingFlag::None);
|
||||
minibatchPackingFlag[1] = MinibatchPackingFlag::UtteranceStart;
|
||||
ResetBound(&boundary, &minibatchPackingFlag);
|
||||
|
||||
f0 = Inputs(0)->FunctionValues();
|
||||
func = FunctionValues();
|
||||
|
||||
target.Resize(nInput, nT);
|
||||
for (size_t i = 0; i < nT; i++)
|
||||
target(0, i) = 1;
|
||||
|
||||
Inputs(0)->FunctionValues().Resize(nInput, nT);
|
||||
Inputs(0)->FunctionValues().SetValue(ConstOnes(nInput, nT, m_deviceId));
|
||||
Inputs(0)->FunctionValues().SetValue((ElemType)0.1);
|
||||
FunctionValues().Resize(nInput, nT);
|
||||
FunctionValues().SetValue(0.0);
|
||||
|
||||
for (size_t t = 0; t < nT; t++)
|
||||
{
|
||||
EvaluateThisNode(t);
|
||||
|
||||
if (t == 0)
|
||||
{
|
||||
/// check with expected values
|
||||
if (!ISCLOSE(FunctionValues()(0, 0), 0.0, EPSILON) ||
|
||||
!ISCLOSE(FunctionValues()(0, 1), 0, EPSILON) ||
|
||||
!ISCLOSE(FunctionValues()(0, 2), 0, EPSILON) )
|
||||
throw("Delaynode forward computation error");
|
||||
}
|
||||
|
||||
if (t == 1)
|
||||
{
|
||||
/// check with expected values
|
||||
if (!ISCLOSE(FunctionValues()(0, 0), 0.0, EPSILON) ||
|
||||
!ISCLOSE(FunctionValues()(0, 1), 0.1, EPSILON) ||
|
||||
!ISCLOSE(FunctionValues()(0, 2), 0, EPSILON) )
|
||||
throw("Delaynode forward computation error");
|
||||
}
|
||||
|
||||
if (t == 2)
|
||||
{
|
||||
/// check with expected values
|
||||
if (!ISCLOSE(FunctionValues()(0, 0), 0.0, EPSILON) ||
|
||||
!ISCLOSE(FunctionValues()(0, 1), 0.1, EPSILON) ||
|
||||
!ISCLOSE(FunctionValues()(0, 2), 0.1, EPSILON) )
|
||||
throw("Delaynode forward computation error");
|
||||
}
|
||||
|
||||
if (FunctionValues().GetDeviceId() != m_deviceId)
|
||||
FunctionValues().TransferFromDeviceToDevice(FunctionValues().GetDeviceId(), m_deviceId, true);
|
||||
}
|
||||
|
||||
GradientValues().Resize(nInput, nT);
|
||||
GradientValues().SetValue(1.0);
|
||||
Inputs(0)->GradientValues().Resize(nInput, nT);
|
||||
Inputs(0)->GradientValues().SetValue(0);
|
||||
|
||||
for (int t = nT - 1; t >= 0; t--)
|
||||
{
|
||||
ComputeInputPartial(0, t);
|
||||
|
||||
if (t==nT-1)
|
||||
/// check with expected values
|
||||
if (!ISCLOSE(Inputs(0)->GradientValues()(0, 0), 0, EPSILON)
|
||||
|| !ISCLOSE(Inputs(0)->GradientValues()(0, 1), 1, EPSILON)
|
||||
|| !ISCLOSE(Inputs(0)->GradientValues()(0, 2), 0, EPSILON))
|
||||
throw("DelayNode gradient error on input gates");
|
||||
if (t==nT-2)
|
||||
if (!ISCLOSE(Inputs(0)->GradientValues()(0, 0), 1, EPSILON)
|
||||
|| !ISCLOSE(Inputs(0)->GradientValues()(0, 1), 1, EPSILON)
|
||||
|| !ISCLOSE(Inputs(0)->GradientValues()(0, 2), 0, EPSILON))
|
||||
throw("DelayNode gradient error on input gates");
|
||||
if (t == 0)
|
||||
if (!ISCLOSE(Inputs(0)->GradientValues()(0, 1), 1, EPSILON)
|
||||
|| !ISCLOSE(Inputs(0)->GradientValues()(0, 1), 1, EPSILON)
|
||||
|| !ISCLOSE(Inputs(0)->GradientValues()(0, 2), 0, EPSILON))
|
||||
throw("DelayNode gradient error on input gates");
|
||||
}
|
||||
|
||||
if (Inputs(0)->GradientValues().GetDeviceId() != m_deviceId)
|
||||
Inputs(0)->GradientValues().TransferFromDeviceToDevice(Inputs(0)->GradientValues().GetDeviceId(), m_deviceId, true);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
fprintf(stderr, "Delaynode unit test is not passed!");
|
||||
return false;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Delaynode unit test passed!\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void AttachInputs(const ComputationNodePtr inputNode)
|
||||
{
|
||||
|
|
|
@ -822,105 +822,105 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
ComputationNetwork<ElemType>& SimpleNetworkBuilder<ElemType>::BuildSeqTrnLSTMNetworkFromDescription(size_t mbSize)
|
||||
{
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
{
|
||||
ULONG randomSeed = 1;
|
||||
ComputationNetwork<ElemType>& SimpleNetworkBuilder<ElemType>::BuildSeqTrnLSTMNetworkFromDescription(size_t mbSize)
|
||||
{
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
{
|
||||
ULONG randomSeed = 1;
|
||||
|
||||
size_t numHiddenLayers = m_layerSizes.size() - 2;
|
||||
size_t numHiddenLayers = m_layerSizes.size() - 2;
|
||||
|
||||
size_t numRecurrentLayers = m_recurrentLayers.size();
|
||||
size_t numRecurrentLayers = m_recurrentLayers.size();
|
||||
|
||||
ComputationNodePtr input = nullptr, w = nullptr, b = nullptr, u = nullptr, e = nullptr, delay = nullptr, output = nullptr, label = nullptr, prior = nullptr;
|
||||
ComputationNodePtr Wxo = nullptr, Who = nullptr, Wco = nullptr, bo = nullptr, Wxi = nullptr, Whi = nullptr, Wci = nullptr, bi = nullptr;
|
||||
ComputationNodePtr Wxf = nullptr, Whf = nullptr, Wcf = nullptr, bf = nullptr, Wxc = nullptr, Whc = nullptr, bc = nullptr;
|
||||
ComputationNodePtr ot = nullptr, it = nullptr, ft = nullptr, ct = nullptr, ht = nullptr;
|
||||
ComputationNodePtr delayHI = nullptr, delayCI = nullptr, delayHO = nullptr, delayHF = nullptr, delayHC = nullptr, delayCF = nullptr, delayCC = nullptr;
|
||||
ComputationNodePtr directWIO = nullptr, directInput = nullptr, directOutput = nullptr;
|
||||
ComputationNodePtr outputFromEachLayer[MAX_DEPTH] = { nullptr };
|
||||
ComputationNodePtr trans = nullptr;
|
||||
ComputationNodePtr input = nullptr, w = nullptr, b = nullptr, u = nullptr, e = nullptr, delay = nullptr, output = nullptr, label = nullptr, prior = nullptr;
|
||||
ComputationNodePtr Wxo = nullptr, Who = nullptr, Wco = nullptr, bo = nullptr, Wxi = nullptr, Whi = nullptr, Wci = nullptr, bi = nullptr;
|
||||
ComputationNodePtr Wxf = nullptr, Whf = nullptr, Wcf = nullptr, bf = nullptr, Wxc = nullptr, Whc = nullptr, bc = nullptr;
|
||||
ComputationNodePtr ot = nullptr, it = nullptr, ft = nullptr, ct = nullptr, ht = nullptr;
|
||||
ComputationNodePtr delayHI = nullptr, delayCI = nullptr, delayHO = nullptr, delayHF = nullptr, delayHC = nullptr, delayCF = nullptr, delayCC = nullptr;
|
||||
ComputationNodePtr directWIO = nullptr, directInput = nullptr, directOutput = nullptr;
|
||||
ComputationNodePtr outputFromEachLayer[MAX_DEPTH] = { nullptr };
|
||||
ComputationNodePtr trans = nullptr;
|
||||
|
||||
input = m_net->CreateInputNode(L"features", m_layerSizes[0], mbSize);
|
||||
m_net->FeatureNodes().push_back(input);
|
||||
input = m_net->CreateInputNode(L"features", m_layerSizes[0], mbSize);
|
||||
m_net->FeatureNodes().push_back(input);
|
||||
|
||||
if (m_applyMeanVarNorm)
|
||||
{
|
||||
w = m_net->Mean(input);
|
||||
b = m_net->InvStdDev(input);
|
||||
output = m_net->PerDimMeanVarNormalization(input, w, b);
|
||||
if (m_applyMeanVarNorm)
|
||||
{
|
||||
w = m_net->Mean(input);
|
||||
b = m_net->InvStdDev(input);
|
||||
output = m_net->PerDimMeanVarNormalization(input, w, b);
|
||||
|
||||
input = output;
|
||||
}
|
||||
input = output;
|
||||
}
|
||||
|
||||
if (m_lookupTableOrder > 0)
|
||||
{
|
||||
e = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"E%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
|
||||
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
output = m_net->LookupTable(e, input, L"LookupTable");
|
||||
if (m_lookupTableOrder > 0)
|
||||
{
|
||||
e = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"E%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
|
||||
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
output = m_net->LookupTable(e, input, L"LookupTable");
|
||||
|
||||
if (m_addDropoutNodes)
|
||||
input = m_net->Dropout(output);
|
||||
else
|
||||
input = output;
|
||||
if (m_addDropoutNodes)
|
||||
input = m_net->Dropout(output);
|
||||
else
|
||||
input = output;
|
||||
|
||||
outputFromEachLayer[1] = input;
|
||||
}
|
||||
outputFromEachLayer[1] = input;
|
||||
}
|
||||
|
||||
/// direct connect from input node to output node
|
||||
/// direct connect from input node to output node
|
||||
|
||||
int recur_idx = 0;
|
||||
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
||||
if (numHiddenLayers > 0)
|
||||
{
|
||||
int recur_idx = 0;
|
||||
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
||||
if (numHiddenLayers > 0)
|
||||
{
|
||||
for (int i = offset; i<numHiddenLayers; i++)
|
||||
{
|
||||
if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1)
|
||||
{
|
||||
{
|
||||
if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1)
|
||||
{
|
||||
output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, i, m_layerSizes[i] * (offset ? m_lookupTableOrder : 1), m_layerSizes[i + 1], input);
|
||||
input = output;
|
||||
|
||||
recur_idx++;
|
||||
}
|
||||
else
|
||||
{
|
||||
recur_idx++;
|
||||
}
|
||||
else
|
||||
{
|
||||
u = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"U%d", i), m_layerSizes[i + 1], m_layerSizes[i] * (offset ? m_lookupTableOrder : 1));
|
||||
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
b = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"B%d", i), m_layerSizes[i + 1], 1);
|
||||
output = ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
|
||||
}
|
||||
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
b = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"B%d", i), m_layerSizes[i + 1], 1);
|
||||
output = ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
|
||||
}
|
||||
|
||||
if (m_addDropoutNodes)
|
||||
input = m_net->Dropout(output);
|
||||
else
|
||||
input = output;
|
||||
}
|
||||
}
|
||||
if (m_addDropoutNodes)
|
||||
input = m_net->Dropout(output);
|
||||
else
|
||||
input = output;
|
||||
}
|
||||
}
|
||||
|
||||
w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"TimesBeforeSoftMax%d", numHiddenLayers), m_layerSizes[numHiddenLayers + 1], m_layerSizes[numHiddenLayers]);
|
||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"TimesBeforeSoftMax%d", numHiddenLayers), m_layerSizes[numHiddenLayers + 1], m_layerSizes[numHiddenLayers]);
|
||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
output = m_net->Times(w, input, L"outputsBeforeSoftmax");
|
||||
output = m_net->Times(w, input, L"outputsBeforeSoftmax");
|
||||
|
||||
trans = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"TransProb%d", numHiddenLayers), m_layerSizes[numHiddenLayers + 1], m_layerSizes[numHiddenLayers + 1]);
|
||||
trans = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"TransProb%d", numHiddenLayers), m_layerSizes[numHiddenLayers + 1], m_layerSizes[numHiddenLayers + 1]);
|
||||
trans->FunctionValues().SetValue((ElemType)1.0 / m_layerSizes[numHiddenLayers + 1]);
|
||||
// m_net->InitLearnableParameters(trans, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
// m_net->InitLearnableParameters(trans, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
trans->NeedGradient() = true;
|
||||
label = m_net->CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1], mbSize);
|
||||
AddTrainAndEvalCriterionNodes(output, label, nullptr, L"CRFTrainCriterion", L"CRFEvalCriterion", nullptr, trans);
|
||||
label = m_net->CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1], mbSize);
|
||||
AddTrainAndEvalCriterionNodes(output, label, nullptr, L"CRFTrainCriterion", L"CRFEvalCriterion", nullptr, trans);
|
||||
|
||||
input = output;
|
||||
output = m_net->SequenceDecoder(label, input, trans, L"outputs");
|
||||
m_net->OutputNodes().push_back(output);
|
||||
m_net->OutputNodes().push_back(output);
|
||||
|
||||
output = m_net->Softmax(input, L"PosteriorProb");
|
||||
output = m_net->Softmax(input, L"PosteriorProb");
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
m_net->ResetEvalTimeStamp();
|
||||
m_net->ResetEvalTimeStamp();
|
||||
|
||||
return *m_net;
|
||||
}
|
||||
return *m_net;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
ComputationNetwork<ElemType>& SimpleNetworkBuilder<ElemType>::BuildCLASSLSTMNetworkFromDescription(size_t mbSize)
|
||||
|
@ -1756,13 +1756,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ComputationNodePtr Wxo = nullptr, Who = nullptr, Wco = nullptr, bo = nullptr, Wxi = nullptr, Whi = nullptr, Wci = nullptr, bi = nullptr;
|
||||
ComputationNodePtr clslogpostprob = nullptr;
|
||||
ComputationNodePtr bias = nullptr;
|
||||
ComputationNodePtr outputFromEachLayer[MAX_DEPTH] = { nullptr };
|
||||
|
||||
input = m_net->CreateSparseInputNode(L"features", m_layerSizes[0], mbSize);
|
||||
m_net->FeatureNodes().push_back(input);
|
||||
|
||||
if (input->FunctionValues().GetDeviceId() != CPUDEVICE)
|
||||
RuntimeError("BuildNCELSTMNetworkFromDescription : only support CPU sparse matrix input at this moment. Contact Yinggong Zhao (v-yinggz@microsoft.com) or Kaisheng Yao (kaisheny@microsoft.com) for updates.");
|
||||
|
||||
if (m_applyMeanVarNorm)
|
||||
{
|
||||
w = m_net->Mean(input);
|
||||
|
@ -1782,28 +1780,53 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
input = m_net->Dropout(output);
|
||||
else
|
||||
input = output;
|
||||
|
||||
outputFromEachLayer[1] = input;
|
||||
}
|
||||
|
||||
/// direct connect from input node to output node
|
||||
|
||||
int recur_idx = 0;
|
||||
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
||||
if (numHiddenLayers > 0)
|
||||
{
|
||||
output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||
input = output;
|
||||
outputFromEachLayer[offset + 1] = input;
|
||||
|
||||
for (int i = 1 + offset; i<numHiddenLayers; i++)
|
||||
{
|
||||
output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, i, m_layerSizes[i], m_layerSizes[i + 1], input);
|
||||
if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i)
|
||||
{
|
||||
output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, i, m_layerSizes[i], m_layerSizes[i + 1], input);
|
||||
|
||||
recur_idx++;
|
||||
}
|
||||
else
|
||||
{
|
||||
u = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"U%d", i), m_layerSizes[i + 1], m_layerSizes[i]);
|
||||
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
b = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"B%d", i), m_layerSizes[i + 1], 1);
|
||||
output = ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
|
||||
}
|
||||
|
||||
if (m_addDropoutNodes)
|
||||
input = m_net->Dropout(output);
|
||||
else
|
||||
input = output;
|
||||
|
||||
outputFromEachLayer[i + 1] = input;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = offset; i < m_layerSizes.size(); i++)
|
||||
{
|
||||
/// add direct connect from each layers' output to the layer before the output layer
|
||||
output = BuildDirectConnect(randomSeed, mbSize, i, (i > 1) ? m_layerSizes[i] : ((offset == 0) ? m_layerSizes[i] : m_layerSizes[i] * m_lookupTableOrder), m_layerSizes[numHiddenLayers], outputFromEachLayer[i], input);
|
||||
if (output != nullptr)
|
||||
input = output;
|
||||
}
|
||||
|
||||
/// need to have [input_dim x output_dim] matrix
|
||||
/// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||
|
|
|
@ -3727,7 +3727,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
CUDA_CALL(cudaMemcpy(res,d_res,sizeof(long)*1,cudaMemcpyDeviceToHost));
|
||||
CUDA_CALL(cudaFree(d_res));
|
||||
if (res[0]!=0)
|
||||
return bResult = true;
|
||||
bResult = true;
|
||||
delete [] res;
|
||||
return bResult;
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче