Merge branch 'master' of https://git01.codeplex.com/cntk into bmitra/MatrixMorphSupplements

This commit is contained in:
UnderdogGeek 2015-07-02 07:35:17 +10:00
Родитель 284bfd3643 e918efa8b1
Коммит aae4d9c240
22 изменённых файлов: 1726 добавлений и 1587 удалений

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -934,16 +934,17 @@ public:
if (!isendworkaround.empty() && isendworkaround[k]) // secondary criterion to detect ends in broken lattices
{
k--; // don't advance, since nothing to advance over
goto skipscores;
}
// this is a regular token: update it in-place
auto & ai = uniquededgedatatokens[k];
if (ai.unit >= idmap.size())
throw std::runtime_error ("fread: broken-file heuristics failed");
ai.updateunit (idmap); // updates itself
if (!ai.last)
continue;
skipscores:
else
{
// this is a regular token: update it in-place
auto & ai = uniquededgedatatokens[k];
if (ai.unit >= idmap.size())
throw std::runtime_error ("fread: broken-file heuristics failed");
ai.updateunit (idmap); // updates itself
if (!ai.last)
continue;
}
// if last then skip over the lm and ac scores
k += skipscoretokens;
uniquealignments++;

Просмотреть файл

@ -57,17 +57,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_truncated = readerConfig("Truncated", "false");
m_convertLabelsToTargets = false;
m_numberOfuttsPerMinibatch = readerConfig("nbruttsineachrecurrentiter", "1");
ConfigArray numberOfuttsPerMinibatchForAllEpochs = readerConfig("nbruttsineachrecurrentiter", "1");
m_numberOfuttsPerMinibatchForAllEpochs = numberOfuttsPerMinibatchForAllEpochs;
if (m_numberOfuttsPerMinibatch < 1)
for (int i = 0; i < m_numberOfuttsPerMinibatchForAllEpochs.size(); i++)
{
LogicError("nbrUttsInEachRecurrentIter cannot be less than 1.");
m_numberOfuttsPerMinibatch = m_numberOfuttsPerMinibatchForAllEpochs[i];
if (m_numberOfuttsPerMinibatch < 1)
{
LogicError("nbrUttsInEachRecurrentIter cannot be less than 1.");
}
if (!m_truncated && m_numberOfuttsPerMinibatch != 1)
{
LogicError("nbrUttsInEachRecurrentIter has to be 1 if Truncated is set to false.");
}
}
if (!m_truncated && m_numberOfuttsPerMinibatch != 1)
{
LogicError("nbrUttsInEachRecurrentIter has to be 1 if Truncated is set to false.");
}
m_numberOfuttsPerMinibatch = m_numberOfuttsPerMinibatchForAllEpochs[0];
m_actualnumberOfuttsPerMinibatch = m_numberOfuttsPerMinibatch;
m_sentenceEnd.assign(m_numberOfuttsPerMinibatch, true);
@ -129,7 +136,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
RuntimeError("network needs at least 1 input and 1 output specified!");
}
//load data for all real-valued inputs (features)
foreach_index(i, featureNames)
{
@ -158,7 +165,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// update m_featDims to reflect the total input dimension (featDim x contextWindow), not the native feature dimension
// that is what the lower level feature readers expect
m_featDims[i] = m_featDims[i] * (1 + numContextLeft[i] + numContextRight[i]);
string type = thisFeature("type","Real");
if (type=="Real"){
m_nameToTypeMap[featureNames[i]] = InputOutputTypes::real;
@ -272,6 +279,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// get the read method, defaults to "blockRandomize" other option is "rollingWindow"
std::string readMethod(readerConfig("readMethod","blockRandomize"));
if (readMethod == "blockRandomize" && randomize == randomizeNone)
{
fprintf(stderr, "WARNING: Randomize cannot be set to None when readMethod is set to blockRandomize. Change it Auto");
randomize = randomizeAuto;
}
// see if they want to use readAhead
//m_readAhead = readerConfig("readAhead", "false");
@ -298,6 +312,25 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (n!=numFiles)
throw std::runtime_error (msra::strfun::strprintf ("number of files in each scriptfile inconsistent (%d vs. %d)", numFiles,n));
/*
do "..." expansion if SCP uses relative path names
"..." in the SCP means full path is the same as the SCP file
for example, if scp file is "//aaa/bbb/ccc/ddd.scp"
and contains entry like
.../file1.feat
.../file2.feat
etc.
the features will be read from
//aaa/bbb/ccc/file1.feat
//aaa/bbb/ccc/file2.feat
etc.
This works well if you store the scp file with the features but
do not want different scp files everytime you move or create new features
*/
wstring scpdircached;
for (auto & entry : filelist)
ExpandDotDotDot(entry, scriptpath, scpdircached);
infilesmulti.push_back(filelist);
}
#ifdef _WIN32
@ -346,8 +379,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//std::vector<std::wstring> pagepath;
foreach_index(i, mlfpathsmulti)
{
const map<string,size_t>* wordmap = NULL;
#ifdef WIN32
wordmap = unigram ? &unigramsymbols : (map<string,size_t>*) NULL;
#endif
msra::asr::htkmlfreader<msra::asr::htkmlfentry,msra::lattices::lattice::htkmlfwordsequence>
labels(mlfpathsmulti[i], restrictmlftokeys, statelistpaths[i], /*unigram ? &unigramsymbols :*/(map<string,size_t>*) NULL, (map<string,size_t>*) NULL, htktimetoframe); // label MLF
labels(mlfpathsmulti[i], restrictmlftokeys, statelistpaths[i], wordmap, (map<string,size_t>*) NULL, htktimetoframe); // label MLF
// get the temp file name for the page file
labelsmulti.push_back(labels);
}
@ -362,6 +399,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// now get the frame source. This has better randomization and doesn't create temp files
m_frameSource = new msra::dbn::minibatchutterancesourcemulti(infilesmulti, labelsmulti, m_featDims, m_labelDims, numContextLeft, numContextRight, randomize, *m_lattices, m_latticeMap, framemode);
m_frameSource->setverbosity(verbosity);
//m_frameSource = new msra::dbn::minibatchutterancesource(infilesmulti[0], labelsmulti[0], m_featDims[0], m_labelDims[0], numContextLeft[0], numContextRight[0], randomize, *m_lattices, m_latticeMap, framemode);
}
@ -540,7 +578,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_fileEvalSource = new msra::dbn::FileEvalSource(realDims, numContextLeft, numContextRight, evalchunksize);
}
// destructor - virtual so it gets called properly
template<class ElemType>
@ -599,6 +637,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
void HTKMLFReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples)
{
m_mbSize = mbSize;
m_numberOfuttsPerMinibatch = m_numberOfuttsPerMinibatchForAllEpochs[epoch];
m_actualnumberOfuttsPerMinibatch = m_numberOfuttsPerMinibatch;
m_sentenceEnd.assign(m_numberOfuttsPerMinibatch, true);
m_processedFrame.assign(m_numberOfuttsPerMinibatch, 0);
m_toProcess.assign(m_numberOfuttsPerMinibatch, 0);
m_switchFrame.assign(m_numberOfuttsPerMinibatch, 0);
if (m_trainOrTest)
{
@ -649,18 +694,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
delete m_mbiter;
msra::dbn::minibatchsource* source = m_frameSource;
/*if (m_readAhead)
{
if (m_readAheadSource == NULL)
{
m_readAheadSource = new msra::dbn::minibatchreadaheadsource (*source, requestedEpochSamples);
}
else if (m_readAheadSource->epochsize() != requestedEpochSamples)
{
delete m_readAheadSource;
m_readAheadSource = new msra::dbn::minibatchreadaheadsource (*source, requestedEpochSamples);
}
source = m_readAheadSource;
}*/
{
if (m_readAheadSource == NULL)
{
m_readAheadSource = new msra::dbn::minibatchreadaheadsource (*source, requestedEpochSamples);
}
else if (m_readAheadSource->epochsize() != requestedEpochSamples)
{
delete m_readAheadSource;
m_readAheadSource = new msra::dbn::minibatchreadaheadsource (*source, requestedEpochSamples);
}
source = m_readAheadSource;
}*/
m_mbiter = new msra::dbn::minibatchiterator(*source, epoch, requestedEpochSamples, mbSize, datapasses);
if (!m_featuresBufferMultiIO.empty())
{
@ -698,7 +743,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
delete[] m_featuresBufferMultiUtt[u];
m_featuresBufferMultiUtt[u] = NULL;
m_featuresBufferAllocatedMultiUtt[u] = 0;
}
}
if (m_labelsBufferMultiUtt[u] != NULL)
{
delete[] m_labelsBufferMultiUtt[u];
@ -761,7 +806,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
for (auto iter=matrices.begin();iter!=matrices.end();iter++)
{
if (m_nameToTypeMap.find(iter->first)==m_nameToTypeMap.end())
throw std::runtime_error(msra::strfun::strprintf("minibatch requested for input node %S not found in reader - cannot generate input\n",iter->first.c_str()));
throw std::runtime_error(msra::strfun::strprintf("minibatch requested for input node %ls not found in reader - cannot generate input\n",iter->first.c_str()));
}
m_checkDictionaryKeys=false;
@ -771,144 +816,144 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
if (m_truncated == false)
{
if (!(*m_mbiter))
return false;
if (!(*m_mbiter))
return false;
// now, access all features and and labels by iterating over map of "matrices"
typename std::map<std::wstring, Matrix<ElemType>*>::iterator iter;
for (iter = matrices.begin();iter!=matrices.end(); iter++)
{
// dereference matrix that corresponds to key (input/output name) and
// populate based on whether its a feature or a label
Matrix<ElemType>& data = *matrices[iter->first]; // can be features or labels
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
// now, access all features and and labels by iterating over map of "matrices"
typename std::map<std::wstring, Matrix<ElemType>*>::iterator iter;
for (iter = matrices.begin();iter!=matrices.end(); iter++)
{
// dereference matrix that corresponds to key (input/output name) and
// populate based on whether its a feature or a label
Matrix<ElemType>& data = *matrices[iter->first]; // can be features or labels
id = m_featureNameToIdMap[iter->first];
dim = m_featureNameToDimMap[iter->first];
const msra::dbn::matrixstripe feat = m_mbiter->frames(id);
const size_t actualmbsize = feat.cols(); // it may still return less if at end of sweep TODO: this check probably only needs to happen once
assert (actualmbsize == m_mbiter->currentmbframes());
skip = (!m_partialMinibatch && m_mbiter->requestedframes() != actualmbsize && m_frameSource->totalframes() > actualmbsize);
// check to see if we got the number of frames we requested
if (!skip)
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
{
// copy the features over to our array type
assert(feat.rows()==dim); // check feature dimension matches what's expected
if (m_featuresBufferMultiIO[id]==NULL)
{
m_featuresBufferMultiIO[id] = new ElemType[feat.rows()*feat.cols()];
m_featuresBufferAllocatedMultiIO[id] = feat.rows()*feat.cols();
}
else if (m_featuresBufferAllocatedMultiIO[id]<feat.rows()*feat.cols()) //buffer size changed. can be partial minibatch
{
delete[] m_featuresBufferMultiIO[id];
m_featuresBufferMultiIO[id] = new ElemType[feat.rows()*feat.cols()];
m_featuresBufferAllocatedMultiIO[id] = feat.rows()*feat.cols();
}
// shouldn't need this since we fill up the entire buffer below
//memset(m_featuresBufferMultiIO[id],0,sizeof(ElemType)*feat.rows()*feat.cols());
id = m_featureNameToIdMap[iter->first];
dim = m_featureNameToDimMap[iter->first];
const msra::dbn::matrixstripe feat = m_mbiter->frames(id);
const size_t actualmbsize = feat.cols(); // it may still return less if at end of sweep TODO: this check probably only needs to happen once
assert (actualmbsize == m_mbiter->currentmbframes());
skip = (!m_partialMinibatch && m_mbiter->requestedframes() != actualmbsize && m_frameSource->totalframes() > actualmbsize);
if (sizeof(ElemType) == sizeof(float))
// check to see if we got the number of frames we requested
if (!skip)
{
for (int j=0; j < feat.cols(); j++) // column major, so iterate columns
// copy the features over to our array type
assert(feat.rows()==dim); // check feature dimension matches what's expected
if (m_featuresBufferMultiIO[id]==NULL)
{
// copy over the entire column at once, need to do this because SSEMatrix may have gaps at the end of the columns
memcpy_s(&m_featuresBufferMultiIO[id][j*feat.rows()],sizeof(ElemType)*feat.rows(),&feat(0,j),sizeof(ElemType)*feat.rows());
m_featuresBufferMultiIO[id] = new ElemType[feat.rows()*feat.cols()];
m_featuresBufferAllocatedMultiIO[id] = feat.rows()*feat.cols();
}
}
else
{
for (int j=0; j < feat.cols(); j++) // column major, so iterate columns in outside loop
else if (m_featuresBufferAllocatedMultiIO[id]<feat.rows()*feat.cols()) //buffer size changed. can be partial minibatch
{
for (int i = 0; i < feat.rows(); i++)
delete[] m_featuresBufferMultiIO[id];
m_featuresBufferMultiIO[id] = new ElemType[feat.rows()*feat.cols()];
m_featuresBufferAllocatedMultiIO[id] = feat.rows()*feat.cols();
}
// shouldn't need this since we fill up the entire buffer below
//memset(m_featuresBufferMultiIO[id],0,sizeof(ElemType)*feat.rows()*feat.cols());
if (sizeof(ElemType) == sizeof(float))
{
for (int j=0; j < feat.cols(); j++) // column major, so iterate columns
{
m_featuresBufferMultiIO[id][j*feat.rows()+i] = feat(i,j);
// copy over the entire column at once, need to do this because SSEMatrix may have gaps at the end of the columns
memcpy_s(&m_featuresBufferMultiIO[id][j*feat.rows()],sizeof(ElemType)*feat.rows(),&feat(0,j),sizeof(ElemType)*feat.rows());
}
}
else
{
for (int j=0; j < feat.cols(); j++) // column major, so iterate columns in outside loop
{
for (int i = 0; i < feat.rows(); i++)
{
m_featuresBufferMultiIO[id][j*feat.rows()+i] = feat(i,j);
}
}
}
data.SetValue(feat.rows(), feat.cols(), m_featuresBufferMultiIO[id],matrixFlagNormal);
}
data.SetValue(feat.rows(), feat.cols(), m_featuresBufferMultiIO[id],matrixFlagNormal);
}
}
else if (m_nameToTypeMap[iter->first] == InputOutputTypes::category)
{
id = m_labelNameToIdMap[iter->first];
dim = m_labelNameToDimMap[iter->first];
const vector<size_t> & uids = m_mbiter->labels(id);
// need skip logic here too in case labels are first in map not features
const size_t actualmbsize = uids.size(); // it may still return less if at end of sweep TODO: this check probably only needs to happen once
assert (actualmbsize == m_mbiter->currentmbframes());
skip = (!m_partialMinibatch && m_mbiter->requestedframes() != actualmbsize && m_frameSource->totalframes() > actualmbsize);
if (!skip)
else if (m_nameToTypeMap[iter->first] == InputOutputTypes::category)
{
// copy the labels over to array type
//data.Resize(udims[id], uids.size());
//data.SetValue((ElemType)0);
id = m_labelNameToIdMap[iter->first];
dim = m_labelNameToDimMap[iter->first];
const vector<size_t> & uids = m_mbiter->labels(id);
// loop through the columns and set one value to 1
// in the future we want to use a sparse matrix here
//for (int i = 0; i < uids.size(); i++)
//{
// assert(uids[i] <udims[id]);
// data(uids[i], i) = (ElemType)1;
//}
// need skip logic here too in case labels are first in map not features
const size_t actualmbsize = uids.size(); // it may still return less if at end of sweep TODO: this check probably only needs to happen once
assert (actualmbsize == m_mbiter->currentmbframes());
skip = (!m_partialMinibatch && m_mbiter->requestedframes() != actualmbsize && m_frameSource->totalframes() > actualmbsize);
if (m_labelsBufferMultiIO[id]==NULL)
if (!skip)
{
m_labelsBufferMultiIO[id] = new ElemType[dim*uids.size()];
m_labelsBufferAllocatedMultiIO[id] = dim*uids.size();
}
else if (m_labelsBufferAllocatedMultiIO[id]<dim*uids.size())
{
delete[] m_labelsBufferMultiIO[id];
m_labelsBufferMultiIO[id] = new ElemType[dim*uids.size()];
m_labelsBufferAllocatedMultiIO[id] = dim*uids.size();
}
memset(m_labelsBufferMultiIO[id],0,sizeof(ElemType)*dim*uids.size());
// copy the labels over to array type
//data.Resize(udims[id], uids.size());
//data.SetValue((ElemType)0);
if (m_convertLabelsToTargetsMultiIO[id])
{
size_t labelDim = m_labelToTargetMapMultiIO[id].size();
for (int i = 0; i < uids.size(); i++)
{
assert(uids[i] < labelDim); labelDim;
size_t labelId = uids[i];
for (int j = 0; j < dim; j++)
{
m_labelsBufferMultiIO[id][i*dim + j] = m_labelToTargetMapMultiIO[id][labelId][j];
}
}
}
else
{
// loop through the columns and set one value to 1
// in the future we want to use a sparse matrix here
for (int i = 0; i < uids.size(); i++)
//for (int i = 0; i < uids.size(); i++)
//{
// assert(uids[i] <udims[id]);
// data(uids[i], i) = (ElemType)1;
//}
if (m_labelsBufferMultiIO[id]==NULL)
{
assert(uids[i] < dim);
//labels(uids[i], i) = (ElemType)1;
m_labelsBufferMultiIO[id][i*dim+uids[i]]=(ElemType)1;
m_labelsBufferMultiIO[id] = new ElemType[dim*uids.size()];
m_labelsBufferAllocatedMultiIO[id] = dim*uids.size();
}
else if (m_labelsBufferAllocatedMultiIO[id]<dim*uids.size())
{
delete[] m_labelsBufferMultiIO[id];
m_labelsBufferMultiIO[id] = new ElemType[dim*uids.size()];
m_labelsBufferAllocatedMultiIO[id] = dim*uids.size();
}
memset(m_labelsBufferMultiIO[id],0,sizeof(ElemType)*dim*uids.size());
if (m_convertLabelsToTargetsMultiIO[id])
{
size_t labelDim = m_labelToTargetMapMultiIO[id].size();
for (int i = 0; i < uids.size(); i++)
{
assert(uids[i] < labelDim); labelDim;
size_t labelId = uids[i];
for (int j = 0; j < dim; j++)
{
m_labelsBufferMultiIO[id][i*dim + j] = m_labelToTargetMapMultiIO[id][labelId][j];
}
}
}
else
{
// loop through the columns and set one value to 1
// in the future we want to use a sparse matrix here
for (int i = 0; i < uids.size(); i++)
{
assert(uids[i] < dim);
//labels(uids[i], i) = (ElemType)1;
m_labelsBufferMultiIO[id][i*dim+uids[i]]=(ElemType)1;
}
}
data.SetValue(dim,uids.size(),m_labelsBufferMultiIO[id],matrixFlagNormal);
}
data.SetValue(dim,uids.size(),m_labelsBufferMultiIO[id],matrixFlagNormal);
}
}
else{
//default:
throw runtime_error(msra::strfun::strprintf("GetMinibatchMultiIO:: unknown InputOutputType for %S\n",(iter->first).c_str()));
}
else{
//default:
throw runtime_error(msra::strfun::strprintf("GetMinibatchMultiIO:: unknown InputOutputType for %S\n",(iter->first).c_str()));
}
}
// advance to the next minibatch
(*m_mbiter)++;
}
// advance to the next minibatch
(*m_mbiter)++;
}
else
{
@ -1184,17 +1229,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
if (matrices.find(iter->first)==matrices.end())
{
fprintf(stderr,"GetMinibatchToWrite: feature node %S specified in reader not found in the network\n",iter->first.c_str());
fprintf(stderr,"GetMinibatchToWrite: feature node %ls specified in reader not found in the network\n",iter->first.c_str());
throw std::runtime_error("GetMinibatchToWrite: feature node specified in reader not found in the network.");
}
}
/*
for (auto iter=matrices.begin();iter!=matrices.end();iter++)
{
if (m_featureNameToIdMap.find(iter->first)==m_featureNameToIdMap.end())
throw std::runtime_error(msra::strfun::strprintf("minibatch requested for input node %ws not found in reader - cannot generate input\n",iter->first.c_str()));
}
*/
for (auto iter=matrices.begin();iter!=matrices.end();iter++)
{
if (m_featureNameToIdMap.find(iter->first)==m_featureNameToIdMap.end())
throw std::runtime_error(msra::strfun::strprintf("minibatch requested for input node %ls not found in reader - cannot generate input\n",iter->first.c_str()));
}
*/
m_checkDictionaryKeys=false;
}
@ -1329,7 +1374,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_labelsStartIndexMultiUtt[id+i*numOfLabel] = totalLabelsNum;
totalLabelsNum = m_labelsStartIndexMultiUtt[id+i*numOfLabel] + dim * actualmbsizeOri;
}
if (m_labelsBufferMultiUtt[i]==NULL)
{
m_labelsBufferMultiUtt[i] = new ElemType[totalLabelsNum];
@ -1383,7 +1428,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
}
for (auto it = m_labelNameToIdMap.begin(); it != m_labelNameToIdMap.end(); ++it)
{
size_t id = m_labelNameToIdMap[it->first];
@ -1425,8 +1470,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return true;
}
// GetLabelMapping - Gets the label mapping from integer to type in file
@ -1451,7 +1496,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
if (labelListFile==L"")
throw std::runtime_error("HTKMLFReader::ReadLabelToTargetMappingFile(): cannot read labelToTargetMappingFile without a labelMappingFile!");
vector<std::wstring> labelList;
size_t count, numLabels;
count=0;
@ -1573,7 +1618,30 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
template<class ElemType>
void HTKMLFReader<ElemType>::ExpandDotDotDot(wstring & featPath, const wstring & scpPath, wstring & scpDirCached)
{
wstring delim = L"/\\";
if (scpDirCached.empty())
{
scpDirCached = scpPath;
wstring tail;
auto pos = scpDirCached.find_last_of(delim);
if (pos != wstring::npos)
{
tail = scpDirCached.substr(pos + 1);
scpDirCached.resize(pos);
}
if (tail.empty()) // nothing was split off: no dir given, 'dir' contains the filename
scpDirCached.swap(tail);
}
size_t pos = featPath.find(L"...");
if (pos != featPath.npos)
featPath = featPath.substr(0, pos) + scpDirCached + featPath.substr(pos + 3);
}
template class HTKMLFReader<float>;
template class HTKMLFReader<double>;
}}}
}}}

Просмотреть файл

@ -6,6 +6,7 @@
// HTKMLFReader.h - Include file for the MTK and MLF format of features and samples
#pragma once
#include "DataReader.h"
#include "commandArgUtil.h" // for intargvector
namespace Microsoft { namespace MSR { namespace CNTK {
@ -13,6 +14,9 @@ template<class ElemType>
class HTKMLFReader : public IDataReader<ElemType>
{
private:
const static size_t m_htkRandomizeAuto = 0;
const static size_t m_htkRandomizeDisable = (size_t)-1;
msra::dbn::minibatchiterator* m_mbiter;
msra::dbn::minibatchsource* m_frameSource;
//msra::dbn::minibatchreadaheadsource* m_readAheadSource;
@ -24,6 +28,7 @@ private:
bool m_readAhead;
bool m_truncated;
vector<size_t> m_processedFrame;
intargvector m_numberOfuttsPerMinibatchForAllEpochs;
size_t m_numberOfuttsPerMinibatch;
size_t m_actualnumberOfuttsPerMinibatch;
size_t m_mbSize;
@ -86,6 +91,7 @@ private:
size_t ReadLabelToTargetMappingFile (const std::wstring& labelToTargetMappingFile, const std::wstring& labelListFile, std::vector<std::vector<ElemType>>& labelToTargetMap);
void ExpandDotDotDot(wstring & featPath, const wstring & scpPath, wstring & scpDirCached);
enum InputOutputTypes
{
real,

Просмотреть файл

@ -13,15 +13,6 @@
//#ifndef __unix__
#include "ssematrix.h"
//#endif
//#include "latticearchive.h" // for reading HTK phoneme lattices (MMI training)
//#include "simplesenonehmm.h" // for MMI scoring
//#include "msra_mgram.h" // for unigram scores of ground-truth path in sequence training
//#include "rollingwindowsource.h" // minibatch sources
//#include "utterancesource.h"
//#include "readaheadsource.h"
//#include "chunkevalsource.h"
//#include "minibatchiterator.h"
#define DATAWRITER_EXPORTS // creating the exports here
#include "DataWriter.h"

Просмотреть файл

@ -341,8 +341,20 @@ namespace msra { namespace dbn {
const size_t framesInBlock = framesMulti[i].size();
feat[i].resize(vdims[i], framesInBlock); // input features for whole utt (col vectors)
// augment the features
size_t leftextent, rightextent;
// page in the needed range of frames
if (leftcontext[i] == 0 && rightcontext[i] == 0)
{
leftextent = rightextent = augmentationextent(framesMulti[i][0].size(), vdims[i]);
}
else
{
leftextent = leftcontext[i];
rightextent = rightcontext[i];
}
//msra::dbn::augmentneighbors(framesMulti[i], boundaryFlags, 0, leftcontext[i], rightcontext[i],)
msra::dbn::augmentneighbors (framesMulti[i], boundaryFlags, leftcontext[i], rightcontext[i], 0, framesInBlock, feat[i]);
msra::dbn::augmentneighbors (framesMulti[i], boundaryFlags, leftextent, rightextent, 0, framesInBlock, feat[i]);
}
minibatchReady=true;
}

Просмотреть файл

@ -242,6 +242,30 @@ void fflushOrDie (FILE * f)
// ----------------------------------------------------------------------------
size_t filesize (FILE * f)
{
#ifdef WIN32
size_t curPos = _ftelli64 (f);
if (curPos == -1L)
{
RuntimeError ("error determining file position: %s", strerror (errno));
}
int rc = _fseeki64 (f, 0, SEEK_END);
if (rc != 0)
{
RuntimeError ("error seeking to end of file: %s", strerror (errno));
}
size_t len = _ftelli64 (f);
if (len == -1L)
{
RuntimeError ("error determining file position: %s", strerror (errno));
}
rc = _fseeki64 (f, curPos, SEEK_SET);
if (rc != 0)
{
RuntimeError ("error resetting file position: %s", strerror (errno));
}
return len;
#else
// linux version
long curPos = ftell (f);
if (curPos == -1L)
{

Просмотреть файл

@ -230,7 +230,7 @@ public:
// We write to a tmp file first to ensure we don't leave broken files that would confuse make mode.
template<class MATRIX> static void write (const wstring & path, const string & kindstr, unsigned int period, const MATRIX & feat)
{
wstring tmppath = path + L""; // tmp path for make-mode compliant
wstring tmppath = path + L"$$"; // tmp path for make-mode compliant
unlinkOrDie (path); // delete if old file is already there
// write it out
size_t featdim = feat.rows();
@ -613,7 +613,7 @@ public:
struct htkmlfentry
{
unsigned int firstframe; // range [firstframe,firstframe+numframes)
unsigned short numframes;
unsigned int numframes;
//unsigned short classid; // numeric state id
unsigned int classid; // numeric state id - mseltzer changed from ushort to uint for untied cd phones > 2^16
@ -624,7 +624,7 @@ private:
if (te < ts) throw std::runtime_error ("htkmlfentry: end time below start time??");
// save
firstframe = (unsigned int) ts;
numframes = (unsigned short) (te - ts);
numframes = (unsigned int) (te - ts);
classid = (unsigned int) uid;
// check for numeric overflow
if (firstframe != ts || firstframe + numframes != te || classid != uid)

Просмотреть файл

@ -933,20 +933,20 @@ public:
const size_t skipscoretokens = info.hasacscores ? 2 : 1;
for (size_t k = skipscoretokens; k < uniquededgedatatokens.size(); k++)
{
auto & ai = uniquededgedatatokens[k];
if (!isendworkaround.empty() && isendworkaround[k]) // secondary criterion to detect ends in broken lattices
{
k--; // don't advance, since nothing to advance over
goto skipscores;
}
// this is a regular token: update it in-place
if (ai.unit >= idmap.size())
throw std::runtime_error ("fread: broken-file heuristics failed");
ai.updateunit (idmap); // updates itself
if (!ai.last)
continue;
skipscores:
else
{
// this is a regular token: update it in-place
auto & ai = uniquededgedatatokens[k];
if (ai.unit >= idmap.size())
throw std::runtime_error ("fread: broken-file heuristics failed");
ai.updateunit (idmap); // updates itself
if (!ai.last)
continue;
}
// if last then skip over the lm and ac scores
k += skipscoretokens;
uniquealignments++;

Просмотреть файл

@ -10,6 +10,7 @@
#pragma once
#include "Platform.h"
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms
#ifndef __unix__

Просмотреть файл

@ -749,7 +749,8 @@ private:
if (!chunkdata.isinram())
return; // already out
fprintf (stderr, "releaserandomizedchunk: paging out randomized chunk %zu (frame range [%zu..%zu]), %zu resident in RAM\n",
if (verbosity)
fprintf (stderr, "releaserandomizedchunk: paging out randomized chunk %zu (frame range [%zu..%zu]), %zu resident in RAM\n",
k, randomizedchunks[k].globalts, randomizedchunks[k].globalte()-1, chunksinram-1);
chunkdata.releasedata();
chunksinram--;
@ -768,7 +769,8 @@ private:
if (chunkdata.isinram())
return false;
fprintf (stderr, "requirerandomizedchunk: paging in randomized chunk %zu (frame range [%zu..%zu]), %zu resident in RAM\n", chunkindex, chunk.globalts, chunk.globalte()-1, chunksinram+1);
if (verbosity)
fprintf (stderr, "requirerandomizedchunk: paging in randomized chunk %zu (frame range [%zu..%zu]), %zu resident in RAM\n", chunkindex, chunk.globalts, chunk.globalte()-1, chunksinram+1);
msra::util::attempt (5, [&]() // (reading from network)
{
chunkdata.requiredata (featkind, featdim, sampperiod, this->lattices);
@ -858,7 +860,8 @@ public:
transcripts.clear();
// return these utterances
fprintf (stderr, "getbatch: getting utterances %zu..%zu (%zu frames out of %zu requested) in sweep %zu\n", spos, epos -1, mbframes, framesrequested, sweep);
if (verbosity > 0)
fprintf (stderr, "getbatch: getting utterances %zu..%zu (%zu frames out of %zu requested) in sweep %zu\n", spos, epos -1, mbframes, framesrequested, sweep);
size_t tspos = 0; // relative start of utterance 'pos' within the returned minibatch
for (size_t pos = spos; pos < epos; pos++)
{
@ -922,7 +925,8 @@ public:
const size_t lastchunk = chunkforframepos (globalte-1);
const size_t windowbegin = randomizedchunks[firstchunk].windowbegin;
const size_t windowend = randomizedchunks[lastchunk].windowend;
fprintf (stderr, "getbatch: getting randomized frames [%zu..%zu] (%zu frames out of %zu requested) in sweep %zu; chunks [%zu..%zu] -> chunk window [%zu..%zu)\n",
if (verbosity > 0)
fprintf (stderr, "getbatch: getting randomized frames [%zu..%zu] (%zu frames out of %zu requested) in sweep %zu; chunks [%zu..%zu] -> chunk window [%zu..%zu)\n",
globalts, globalte, mbframes, framesrequested, sweep, firstchunk, lastchunk, windowbegin, windowend);
// release all data outside, and page in all data inside
for (size_t k = 0; k < windowbegin; k++)

Просмотреть файл

@ -117,7 +117,7 @@ class minibatchutterancesourcemulti : public minibatchsource
}
// page in data for this chunk
// We pass in the feature info variables by ref which will be filled lazily upon first read
void requiredata (string & featkind, size_t & featdim, unsigned int & sampperiod, const latticesource & latticesource) const
void requiredata (string & featkind, size_t & featdim, unsigned int & sampperiod, const latticesource & latticesource, int verbosity=0) const
{
if (numutterances() == 0)
@ -148,7 +148,8 @@ class minibatchutterancesourcemulti : public minibatchsource
latticesource.getlattices (utteranceset[i].key(), lattices[i], uttframes.cols());
}
//fprintf (stderr, "\n");
fprintf (stderr, "requiredata: %zu utterances read\n", utteranceset.size());
if (verbosity)
fprintf (stderr, "requiredata: %zu utterances read\n", utteranceset.size());
}
catch (...)
{
@ -403,15 +404,14 @@ public:
// TODO: we can store labels more efficiently now since we don't do frame-wise random access anymore.
// OK, utterance has all we need --remember it
utteranceset.push_back (std::move (utterance));
if (m==0)
{
_totalframes += uttframes;
framesaccum.push_back(uttframes); //track number of frames in each utterance - first feature is the reference
if (!labels.empty() && !lacksmlf)
//if (!labels.empty() && labelsiter != labels[0].end())
{
// first verify that all the label files have the proper duration
bool durationmatch = true;
foreach_index (j, labels)
{
const auto & labseq = labels[j].find(key)->second;
@ -421,31 +421,43 @@ public:
{
fprintf (stderr, " [duration mismatch (%zu in label vs. %zu in feat file), skipping %S]", labframes, uttframes, key.c_str());
nomlf++;
continue; // skip this utterance at all
durationmatch = false;
break; // continue; // skip this utterance at all
}
// expand classid sequence into flat array
foreach_index (i, labseq)
}
if (durationmatch){
utteranceset.push_back(std::move(utterance));
_totalframes += uttframes;
framesaccum.push_back(uttframes); //track number of frames in each utterance - first feature is the reference
// then parse each mlf if the durations are consistent
foreach_index(j, labels)
{
const auto & e = labseq[i];
if ((i > 0 && labseq[i-1].firstframe + labseq[i-1].numframes != e.firstframe) || (i == 0 && e.firstframe != 0))
throw std::runtime_error (msra::strfun::strprintf ("minibatchutterancesource: labels not in consecutive order MLF in label set: %S", key.c_str()));
if (e.classid >= udim[j])
const auto & labseq = labels[j].find(key)->second;
// expand classid sequence into flat array
foreach_index (i, labseq)
{
throw std::runtime_error (msra::strfun::strprintf ("minibatchutterancesource: class id exceeds model output dimension"));
const auto & e = labseq[i];
if ((i > 0 && labseq[i-1].firstframe + labseq[i-1].numframes != e.firstframe) || (i == 0 && e.firstframe != 0))
throw std::runtime_error (msra::strfun::strprintf ("minibatchutterancesource: labels not in consecutive order MLF in label set: %S", key.c_str()));
if (e.classid >= udim[j])
{
throw std::runtime_error (msra::strfun::strprintf ("minibatchutterancesource: class id exceeds model output dimension"));
}
if (e.classid != (CLASSIDTYPE) e.classid)
throw std::runtime_error ("CLASSIDTYPE has too few bits");
for (size_t t = e.firstframe; t < e.firstframe + e.numframes; t++)
classids[j]->push_back ((CLASSIDTYPE) e.classid);
numclasses[j] = max (numclasses[j], (size_t)(1u + e.classid));
counts[j].resize (numclasses[j], 0);
counts[j][e.classid] += e.numframes;
}
if (e.classid != (CLASSIDTYPE) e.classid)
throw std::runtime_error ("CLASSIDTYPE has too few bits");
for (size_t t = e.firstframe; t < e.firstframe + e.numframes; t++)
classids[j]->push_back ((CLASSIDTYPE) e.classid);
numclasses[j] = max (numclasses[j], (size_t)(1u + e.classid));
counts[j].resize (numclasses[j], 0);
counts[j][e.classid] += e.numframes;
classids[j]->push_back ((CLASSIDTYPE) -1); // append a boundary marker marker for checking
if (!labels[j].empty() && classids[j]->size() != _totalframes + utteranceset.size())
throw std::logic_error (msra::strfun::strprintf ("minibatchutterancesource: label duration inconsistent with feature file in MLF label set: %S", key.c_str()));
assert (labels[j].empty() || classids[j]->size() == _totalframes + utteranceset.size());
}
classids[j]->push_back ((CLASSIDTYPE) -1); // append a boundary marker marker for checking
if (!labels[j].empty() && classids[j]->size() != _totalframes + utteranceset.size())
throw std::logic_error (msra::strfun::strprintf ("minibatchutterancesource: label duration inconsistent with feature file in MLF label set: %S", key.c_str()));
assert (labels[j].empty() || classids[j]->size() == _totalframes + utteranceset.size());
}
}
else{
@ -474,7 +486,7 @@ public:
}
if (nomlf + nolat > 0)
{
fprintf (stderr, "minibatchutterancesource: out of %zu files, %zu files not found in label set and %zu have no lattice\n", infiles.size(), nomlf, nolat);
fprintf (stderr, "minibatchutterancesource: out of %zu files, %zu files not found in label set and %zu have no lattice\n", infiles[0].size(), nomlf, nolat);
if (nomlf + nolat > infiles[m].size() / 2)
throw std::runtime_error ("minibatchutterancesource: too many files not found in label set--assuming broken configuration\n");
}
@ -600,7 +612,8 @@ private:
return sweep;
currentsweep = sweep;
fprintf (stderr, "lazyrandomization: re-randomizing for sweep %zu in %s mode\n", currentsweep, framemode ? "frame" : "utterance");
if (verbosity>0)
fprintf (stderr, "lazyrandomization: re-randomizing for sweep %zu in %s mode\n", currentsweep, framemode ? "frame" : "utterance");
const size_t sweepts = sweep * _totalframes; // first global frame index for this sweep
@ -912,8 +925,9 @@ private:
auto & chunkdata = randomizedchunks[m][k].getchunkdata();
if (chunkdata.isinram())
{
fprintf (stderr, "releaserandomizedchunk: paging out randomized chunk %zu (frame range [%zu..%zu]), %zu resident in RAM\n",
k, randomizedchunks[m][k].globalts, randomizedchunks[m][k].globalte()-1, chunksinram-1);
if (verbosity)
fprintf (stderr, "releaserandomizedchunk: paging out randomized chunk %zu (frame range [%zu..%zu]), %zu resident in RAM\n",
k, randomizedchunks[m][k].globalts, randomizedchunks[m][k].globalte()-1, chunksinram-1);
chunkdata.releasedata();
numreleased++;
}
@ -957,10 +971,11 @@ private:
{
auto & chunk = randomizedchunks[m][chunkindex];
auto & chunkdata = chunk.getchunkdata();
fprintf (stderr, "feature set %d: requirerandomizedchunk: paging in randomized chunk %zu (frame range [%zu..%zu]), %zu resident in RAM\n", m, chunkindex, chunk.globalts, chunk.globalte()-1, chunksinram+1);
if (verbosity)
fprintf (stderr, "feature set %d: requirerandomizedchunk: paging in randomized chunk %zu (frame range [%zu..%zu]), %zu resident in RAM\n", m, chunkindex, chunk.globalts, chunk.globalte()-1, chunksinram+1);
msra::util::attempt (5, [&]() // (reading from network)
{
chunkdata.requiredata (featkind[m], featdim[m], sampperiod[m], this->lattices);
chunkdata.requiredata (featkind[m], featdim[m], sampperiod[m], this->lattices, verbosity);
});
}
chunksinram++;
@ -1069,7 +1084,8 @@ public:
}
}
// return these utterances
fprintf (stderr, "getbatch: getting utterances %zu..%zu (%zu frames out of %zu requested) in sweep %zu\n", spos, epos -1, mbframes, framesrequested, sweep);
if (verbosity > 0)
fprintf (stderr, "getbatch: getting utterances %zu..%zu (%zu frames out of %zu requested) in sweep %zu\n", spos, epos -1, mbframes, framesrequested, sweep);
size_t tspos = 0; // relative start of utterance 'pos' within the returned minibatch
for (size_t pos = spos; pos < epos; pos++)
{
@ -1147,7 +1163,8 @@ public:
const size_t lastchunk = chunkforframepos (globalte-1);
const size_t windowbegin = randomizedchunks[0][firstchunk].windowbegin;
const size_t windowend = randomizedchunks[0][lastchunk].windowend;
fprintf (stderr, "getbatch: getting randomized frames [%zu..%zu] (%zu frames out of %zu requested) in sweep %zu; chunks [%zu..%zu] -> chunk window [%zu..%zu)\n",
if (verbosity > 0)
fprintf (stderr, "getbatch: getting randomized frames [%zu..%zu] (%zu frames out of %zu requested) in sweep %zu; chunks [%zu..%zu] -> chunk window [%zu..%zu)\n",
globalts, globalte, mbframes, framesrequested, sweep, firstchunk, lastchunk, windowbegin, windowend);
// release all data outside, and page in all data inside
for (size_t k = 0; k < windowbegin; k++)

Просмотреть файл

@ -25,6 +25,8 @@
#include "minibatchiterator.h"
#define DATAREADER_EXPORTS // creating the exports here
#include "DataReader.h"
#include "commandArgUtil.h"
#include "HTKMLFReader.h"
#ifdef LEAKDETECT
#include <vld.h> // for memory leak detection

Просмотреть файл

@ -27,6 +27,8 @@
#define DATAWRITER_EXPORTS // creating the exports here
#include "DataWriter.h"
#include "HTKMLFWriter.h"
#include "commandArgUtil.h"
#ifdef LEAKDETECT
#include <vld.h> // for memory leak detection
#endif

Просмотреть файл

@ -20,8 +20,6 @@ Profiler::Profiler(int numSamples)
:m_numSamples(numSamples),
m_isProfilingActive(false)
{
if (m_numSamples > 0)
Start();
}
Profiler::~Profiler()
@ -45,6 +43,11 @@ void Profiler::NextSample()
if (--m_numSamples == 0)
Stop();
}
else
{
if (m_numSamples > 0)
Start();
}
}
void Profiler::Stop()

Просмотреть файл

@ -1276,7 +1276,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
/// the label is a dense matrix. each element is the word index
label = m_net->CreateInputNode(L"labels", 2 * (this->nce_noises + 1), mbSize);
bias = m_net->CreateLearnableParameter(L"BiasVector", m_layerSizes[m_layerSizes.size() - 1], 1);
bias = m_net->CreateLearnableParameter(L"BiasVector", 1, m_layerSizes[m_layerSizes.size() - 1]);
bias->FunctionValues().SetValue((ElemType)-std::log(m_layerSizes[m_layerSizes.size() - 1]));
//m_net->InitLearnableParameters(bias, m_uniformInit, randomSeed++, std::log(m_layerSizes[m_layerSizes.size() - 1])* m_initValueScale);
//clslogpostprob = m_net->Times(clsweight, input, L"ClassPostProb");

Просмотреть файл

@ -958,10 +958,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
// evaluation uses softmax
m_logSoftmax.AssignProductOf(Inputs(1)->FunctionValues(), true, Inputs(2)->FunctionValues(), false);
/*
#pragma omp parallel for
for (int i = 0; i < Inputs(0)->FunctionValues().GetNumCols(); i++)
for (int j = 0; j < Inputs(3)->FunctionValues().GetNumRows(); j++)
m_logSoftmax(i, j) += Inputs(3)->FunctionValues()(j, 0);
*/
m_logSoftmax += Inputs(3)->FunctionValues().Transpose();
m_logSoftmax.InplaceLogSoftmax(false);
FunctionValues().Resize(1, 1);
FunctionValues().SetValue(0);

Просмотреть файл

@ -85,7 +85,7 @@ MATH_SRC = Math/Math/Matrix.cpp Math/Math/GPUMatrix.cu Math/Math/GPUMatrixCUDAKe
Math/Math/CPUMatrix.cpp Math/Math/CPUSparseMatrix.cpp #Math/Math/InstantiateTemplates.cu
CN_SRC = MachineLearning/CNTK/NetworkDescriptionLanguage.cpp MachineLearning/CNTK/CNTK.cpp MachineLearning/CNTK/ComputationNode.cpp \
MachineLearning/CNTK/ModelEditLanguage.cpp \
MachineLearning/CNTK/SimpleNetworkBuilder.cpp MachineLearning/CNTK/tests.cpp MachineLearning/CNTKEval/CNTKEval.cpp
MachineLearning/CNTK/SimpleNetworkBuilder.cpp MachineLearning/CNTK/tests.cpp MachineLearning/CNTK/Profiler.cpp MachineLearning/CNTKEval/CNTKEval.cpp
BINARYREADER_SRC = #DataReader/BinaryReader/BinaryWriter.cpp DataReader/BinaryReader/BinaryReader.cpp DataReader/BinaryReader/BinaryFile.cpp
HTKMLFREADER_SRC = DataReader/HTKMLFReader_linux/HTKMLFWriter.cpp DataReader/HTKMLFReader_linux/DataWriter.cpp DataReader/HTKMLFReader_linux/DataReader.cpp DataReader/HTKMLFReader_linux/HTKMLFReader.cpp
KALDIREADER_SRC = DataReader/KaldiReader/HTKMLFWriter.cpp DataReader/KaldiReader/DataWriter.cpp DataReader/KaldiReader/DataReader.cpp DataReader/KaldiReader/HTKMLFReader.cpp

Просмотреть файл

@ -3908,7 +3908,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
for (int sample_id = 0; sample_id < sample_size; sample_id++)
{
int sample =(int) (*this)(2 * sample_id, instance_id);
c(sample, 0) -= tmp(sample_id, instance_id);
c(0, sample) -= tmp(sample_id, instance_id);
}
}
return *this;
@ -3941,7 +3941,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
for (int sample_id = 0; sample_id < sample_size; sample_id++)
{
int sample =(int) (*this)(2 * sample_id, instance_id);
double score = bias(sample, 0);
double score = bias(0, sample);
for (int dim = 0; dim < b.GetNumRows(); dim++)
score += a(dim, instance_id)* b(dim, sample);
double sample_prob = -(*this)(2 * sample_id + 1, instance_id);

Просмотреть файл

@ -1870,17 +1870,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
//a: dim * minibatch
//b: dim * |vocab|
int p = 512;
int width = a.GetNumCols();
int width = a.GetNumRows(); //dimension of hidden vector
//int width = a.GetNumCols(); original setup, considering column-major
//
while (p / 2 > width) p = p / 2;
_computeNceOutput<ElemType> << <this->GetNumElements() / 2, p >> >(
this->GetArray(),
m_numRows / 2,
this->GetArray(),
sampleCount,
m_numRows / 2,
my_a.GetArray(),//a
a.GetNumCols(),
a.GetNumRows(),
my_b.GetArray(),//b
my_bias.GetArray(),
tmp.GetArray());//tmp
@ -1891,8 +1894,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// summing up objective must be done in one block
_assignNoiseContrastiveEstimation<ElemType> << <1, p >> >(
this->GetArray(),
m_numRows,
sampleCount, my_a.GetArray(),
sampleCount,
m_numRows / 2,
my_a.GetArray(),
a.GetNumCols(),
my_b.GetArray(),
tmp.GetArray(),
@ -1900,7 +1904,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
_computeNceError<ElemType> << <1, p >> >(
this->GetArray(),
m_numRows,
m_numRows / 2,
tmp.GetNumCols(),
tmp.GetArray());
@ -1919,20 +1923,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
int p = 512;
int width = a.GetNumCols();
int width = a.GetNumRows();
while (p / 2 > width) p = p / 2;
_assignNceDerivative<ElemType> << <m_nz, p >> >(
GetArray(),
m_numRows,
tmp.GetNumCols(),
m_numRows / 2,
my_a.GetArray(),
a.GetNumCols(),
a.GetNumRows(),
my_b.GetArray(),
tmp.GetArray(),
c.GetArray(),
inputIndex);
if (do_sync) CUDA_CALL(cudaEventRecord(done));
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventDestroy(done));

Просмотреть файл

@ -2836,15 +2836,15 @@ __global__ void _computeNceOutput(
for (int i = start; i < end; i++)
{
int colIndex = (int)col[2 * i];
int rowIndex = i / sampleCount;
int wid = (int)col[2 * i];
int batchid = i / sampleCount;
int loadPerThread = (numCols_a + blockDim.x - 1) / blockDim.x;
int tstart = loadPerThread * threadIdx.x;
int tend = min(numCols_a, loadPerThread * (threadIdx.x + 1));
for (int j = tstart; j < tend; j++)
partials[threadIdx.x] = a[IDX2C(rowIndex, j, numRows)] * b[IDX2C(j, colIndex, numCols_a)];
partials[threadIdx.x] = a[IDX2C(j, batchid, numCols_a)] * b[IDX2C(j, wid, numCols_a)];
__syncthreads();
@ -3262,19 +3262,23 @@ __global__ void _assignNceDerivative(
for (int j = tstart; j < tend; j++)
{
ElemType val = er * b[IDX2C(j, colIndex, width)];
atomicAdd(c + IDX2C(rowIndex, j, numRows), val);
atomicAdd(c + IDX2C(j, rowIndex, width), val);
//c[IDX2C(rowIndex, j, numRows)] += val;
}
}
else // weight
else if (inputIndex == 2) // weight
{
for (int j = tstart; j < tend; j++)
{
ElemType val = er * a[IDX2C(rowIndex, j, numRows)];
ElemType val = er * a[IDX2C(j, rowIndex, width)];
atomicAdd(c + IDX2C(j, colIndex, width), val);
//c[IDX2C(j, colIndex, width)] += val;
}
}
else //bias vector
{
c[colIndex] += er;
}
}
}

Просмотреть файл

@ -3645,9 +3645,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (a.GetDeviceId() != b.GetDeviceId() || b.GetDeviceId() != c.GetDeviceId() || c.GetDeviceId() != this->GetDeviceId())
NOT_IMPLEMENTED;
//if (a.GetMatrixType() == MatrixType::DENSE)
// NOT_IMPLEMENTED;
this->Resize(1, 1);
if (this->GetDeviceId() < 0)