Merge branch 'master' of https://git01.codeplex.com/cntk into linux-gcc
This commit is contained in:
Коммит
9a90db4f91
|
@ -301,39 +301,95 @@ public:
|
|||
std::vector<size_t> numclasses; // number of output classes as found in the label file (diagnostics)
|
||||
_totalframes = 0;
|
||||
wstring key;
|
||||
std::vector<size_t>framesaccum;
|
||||
size_t numutts=0;
|
||||
|
||||
std::vector<bool>uttisvalid; // boolean flag to check that utterance is valid. valid means number of
|
||||
//frames is consistent across all feature and label streams
|
||||
std::vector<size_t>uttduration; // track utterance durations to determine utterance validity
|
||||
|
||||
std::vector<size_t> classidsbegin;
|
||||
if (!lattices.empty())
|
||||
{
|
||||
LogicError("lattices not supported in utterancereadermulti");
|
||||
}
|
||||
|
||||
allchunks = std::vector<std::vector<utterancechunkdata>>(infiles.size(), std::vector<utterancechunkdata>());
|
||||
featdim = std::vector<size_t>(infiles.size(), 0);
|
||||
sampperiod = std::vector<unsigned int>(infiles.size(), 0);
|
||||
featkind = std::vector<string>(infiles.size(), "");
|
||||
|
||||
numclasses = std::vector<size_t>(labels.size(), 0);
|
||||
counts = std::vector<std::vector<size_t>>(labels.size(), std::vector<size_t>());
|
||||
|
||||
foreach_index (i, labels)
|
||||
{
|
||||
//classids.push_back(biggrowablevector<CLASSIDTYPE>());
|
||||
classids.push_back(unique_ptr<biggrowablevector<CLASSIDTYPE>>(new biggrowablevector<CLASSIDTYPE>()));
|
||||
numclasses.push_back(0);
|
||||
counts.push_back(std::vector<size_t>());
|
||||
//std::pair<std::vector<wstring>,std::vector<wstring>> latticetocs;
|
||||
//std::unordered_map<std::string,size_t> modelsymmap;
|
||||
//lattices.push_back(shared_ptr<latticesource>(new latticesource(latticetocs, modelsymmap)));
|
||||
|
||||
}
|
||||
foreach_index(i, infiles){
|
||||
allchunks.push_back(std::vector<utterancechunkdata>());
|
||||
featdim.push_back(0); // initialize
|
||||
sampperiod.push_back(0);
|
||||
featkind.push_back("");
|
||||
|
||||
|
||||
// first check consistency across feature streams
|
||||
// We'll go through the SCP files for each stream to make sure the duration is consistent
|
||||
// If not, we'll plan to ignore the utterance, and inform the user
|
||||
// m indexes the feature stream
|
||||
// i indexes the files within a stream, i.e. in the SCP file)
|
||||
foreach_index(m, infiles){
|
||||
if (m == 0){
|
||||
numutts = infiles[m].size();
|
||||
uttisvalid = std::vector<bool>(numutts, true);
|
||||
uttduration = std::vector<size_t>(numutts, 0);
|
||||
}
|
||||
else if (infiles[m].size()!=numutts)
|
||||
throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances");
|
||||
|
||||
foreach_index(i, infiles[m]){
|
||||
utterancedesc utterance(msra::asr::htkfeatreader::parsedpath(infiles[m][i]), 0); //mseltzer - is this foolproof for multiio? is classids always non-empty?
|
||||
const size_t uttframes = utterance.numframes(); // will throw if frame bounds not given --required to be given in this mode
|
||||
// we need at least 2 frames for boundary markers to work
|
||||
if (uttframes < 2)
|
||||
throw std::runtime_error("minibatchutterancesource: utterances < 2 frames not supported");
|
||||
if (uttframes > frameref::maxframesperutterance)
|
||||
{
|
||||
fprintf(stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S\n", i, uttframes, frameref::maxframesperutterance, key.c_str());
|
||||
uttduration[i] = 0;
|
||||
uttisvalid[i] = false;
|
||||
}
|
||||
else{
|
||||
if (m == 0){
|
||||
uttduration[i] = uttframes;
|
||||
uttisvalid[i] = true;
|
||||
}
|
||||
else if (uttduration[i] != uttframes){
|
||||
fprintf(stderr, "minibatchutterancesource: skipping %d-th file due to inconsistency in duration in different feature streams (%d vs %d frames)\n", i, uttduration[i], uttframes);
|
||||
uttduration[i] = 0;
|
||||
uttisvalid[i] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
size_t invalidutts=0;
|
||||
foreach_index(i, uttisvalid){
|
||||
if (!uttisvalid[i])
|
||||
invalidutts++;
|
||||
}
|
||||
if (invalidutts > uttisvalid.size() / 2)
|
||||
throw std::runtime_error("minibatchutterancesource: too many files with inconsistent durations, assuming broken configuration\n");
|
||||
else if (invalidutts>0)
|
||||
fprintf(stderr, "Found inconsistent durations across feature streams in %d out of %d files\n", invalidutts, uttisvalid.size());
|
||||
|
||||
|
||||
// now process the features and labels
|
||||
size_t utterancesetsize = 0;
|
||||
foreach_index (m, infiles)
|
||||
{
|
||||
utteranceset.clear();
|
||||
if (m==0)
|
||||
numutts = infiles[m].size();
|
||||
else
|
||||
if (infiles[m].size()!=numutts)
|
||||
throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances");
|
||||
//if (m==0)
|
||||
// numutts = infiles[m].size();
|
||||
//else
|
||||
// if (infiles[m].size()!=numutts)
|
||||
// throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances\n");
|
||||
if (m==0)
|
||||
classidsbegin.clear();
|
||||
|
||||
|
@ -344,16 +400,19 @@ public:
|
|||
if (m == 0 && !labels.empty())
|
||||
classidsbegin.push_back(classids[0]->size());
|
||||
|
||||
if (uttisvalid[i]){
|
||||
utterancedesc utterance (msra::asr::htkfeatreader::parsedpath (infiles[m][i]), labels.empty() ? 0 : classidsbegin[i] ); //mseltzer - is this foolproof for multiio? is classids always non-empty?
|
||||
const size_t uttframes = utterance.numframes(); // will throw if frame bounds not given --required to be given in this mode
|
||||
// we need at least 2 frames for boundary markers to work
|
||||
if (uttframes < 2)
|
||||
throw std::runtime_error ("minibatchutterancesource: utterances < 2 frames not supported");
|
||||
if (uttframes > frameref::maxframesperutterance)
|
||||
{
|
||||
fprintf (stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S", i, uttframes, frameref::maxframesperutterance, key.c_str());
|
||||
continue;
|
||||
}
|
||||
assert(uttframes == uttduration[i]); // ensure nothing funky happened
|
||||
// already performed these checks above
|
||||
// we need at least 2 frames for boundary markers to work
|
||||
//if (uttframes < 2)
|
||||
// throw std::runtime_error ("minibatchutterancesource: utterances < 2 frames not supported");
|
||||
//if (uttframes > frameref::maxframesperutterance)
|
||||
//{
|
||||
// fprintf (stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S", i, uttframes, frameref::maxframesperutterance, key.c_str());
|
||||
// continue;
|
||||
//}
|
||||
|
||||
// check whether we have the ref transcript
|
||||
//auto labelsiter = labels[0].end();
|
||||
|
@ -375,9 +434,11 @@ public:
|
|||
if (nolat++ < 5)
|
||||
fprintf (stderr, " [no lattice for %S]", key.c_str());
|
||||
// skip if either one is missing
|
||||
if (lacksmlf || lackslat)
|
||||
if (lacksmlf || lackslat){
|
||||
uttisvalid[i] = false;
|
||||
continue; // skip this utterance at all
|
||||
}
|
||||
}
|
||||
// push the label sequence into classids[], since we already looked it up
|
||||
// TODO: we can store labels more efficiently now since we don't do frame-wise random access anymore.
|
||||
|
||||
|
@ -389,7 +450,6 @@ public:
|
|||
//if (!labels.empty() && labelsiter != labels[0].end())
|
||||
{
|
||||
// first verify that all the label files have the proper duration
|
||||
bool durationmatch = true;
|
||||
foreach_index(j, labels)
|
||||
{
|
||||
const auto & labseq = labels[j].find(key)->second;
|
||||
|
@ -399,14 +459,15 @@ public:
|
|||
{
|
||||
fprintf(stderr, " [duration mismatch (%d in label vs. %d in feat file), skipping %S]", labframes, uttframes, key.c_str());
|
||||
nomlf++;
|
||||
durationmatch = false;
|
||||
break; // continue; // skip this utterance at all
|
||||
uttisvalid[i] = false;
|
||||
//continue; // skip this utterance at all
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (durationmatch){
|
||||
if (uttisvalid[i])
|
||||
{
|
||||
utteranceset.push_back(std::move(utterance));
|
||||
_totalframes += uttframes;
|
||||
framesaccum.push_back(uttframes); //track number of frames in each utterance - first feature is the reference
|
||||
// then parse each mlf if the durations are consistent
|
||||
foreach_index(j, labels)
|
||||
{
|
||||
|
@ -437,14 +498,22 @@ public:
|
|||
}
|
||||
}
|
||||
else{
|
||||
assert(classids.empty());
|
||||
assert(classids.empty() && labels.empty());
|
||||
utteranceset.push_back(std::move(utterance));
|
||||
_totalframes += uttframes;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(uttframes==framesaccum[i]); //ensure that number of frames is consistent in each input feature "stream"
|
||||
else
|
||||
{
|
||||
utteranceset.push_back(std::move(utterance));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (m == 0)
|
||||
utterancesetsize = utteranceset.size();
|
||||
else
|
||||
assert(utteranceset.size() == utterancesetsize);
|
||||
|
||||
fprintf (stderr, "feature set %d: %d frames in %d out of %d utterances\n", m, _totalframes, utteranceset.size(),infiles[m].size());
|
||||
|
||||
if (!labels.empty()){
|
||||
|
|
|
@ -323,39 +323,92 @@ public:
|
|||
std::vector<size_t> numclasses; // number of output classes as found in the label file (diagnostics)
|
||||
_totalframes = 0;
|
||||
wstring key;
|
||||
std::vector<size_t>framesaccum;
|
||||
size_t numutts=0;
|
||||
|
||||
std::vector<bool>uttisvalid; // boolean flag to check that utterance is valid. valid means number of
|
||||
//frames is consistent across all feature and label streams
|
||||
std::vector<size_t>uttduration; // track utterance durations to determine utterance validity
|
||||
|
||||
std::vector<size_t> classidsbegin;
|
||||
if (!lattices.empty())
|
||||
{
|
||||
LogicError("lattices not supported in utterancereadermulti");
|
||||
}
|
||||
|
||||
allchunks = std::vector<std::vector<utterancechunkdata>>(infiles.size(), std::vector<utterancechunkdata>());
|
||||
featdim = std::vector<size_t>(infiles.size(), 0);
|
||||
sampperiod = std::vector<unsigned int>(infiles.size(), 0);
|
||||
featkind = std::vector<string>(infiles.size(), "");
|
||||
numclasses = std::vector<size_t>(labels.size(), 0);
|
||||
counts = std::vector<std::vector<size_t>>(labels.size(), std::vector<size_t>());
|
||||
foreach_index (i, labels)
|
||||
{
|
||||
//classids.push_back(biggrowablevector<CLASSIDTYPE>());
|
||||
classids.push_back(unique_ptr<biggrowablevector<CLASSIDTYPE>>(new biggrowablevector<CLASSIDTYPE>()));
|
||||
numclasses.push_back(0);
|
||||
counts.push_back(std::vector<size_t>());
|
||||
//std::pair<std::vector<wstring>,std::vector<wstring>> latticetocs;
|
||||
//std::unordered_map<std::string,size_t> modelsymmap;
|
||||
//lattices.push_back(shared_ptr<latticesource>(new latticesource(latticetocs, modelsymmap)));
|
||||
|
||||
}
|
||||
foreach_index(i, infiles){
|
||||
allchunks.push_back(std::vector<utterancechunkdata>());
|
||||
featdim.push_back(0); // initialize
|
||||
sampperiod.push_back(0);
|
||||
featkind.push_back("");
|
||||
|
||||
|
||||
// first check consistency across feature streams
|
||||
// We'll go through the SCP files for each stream to make sure the duration is consistent
|
||||
// If not, we'll plan to ignore the utterance, and inform the user
|
||||
foreach_index(m, infiles){
|
||||
if (m == 0){
|
||||
numutts = infiles[m].size();
|
||||
uttisvalid = std::vector<bool>(numutts, true);
|
||||
uttduration = std::vector<size_t>(numutts, 0);
|
||||
}
|
||||
else if (infiles[m].size()!=numutts)
|
||||
throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances");
|
||||
|
||||
foreach_index(i, infiles[m]){
|
||||
utterancedesc utterance(msra::asr::htkfeatreader::parsedpath(infiles[m][i]), 0); //mseltzer - is this foolproof for multiio? is classids always non-empty?
|
||||
const size_t uttframes = utterance.numframes(); // will throw if frame bounds not given --required to be given in this mode
|
||||
// we need at least 2 frames for boundary markers to work
|
||||
if (uttframes < 2)
|
||||
throw std::runtime_error("minibatchutterancesource: utterances < 2 frames not supported");
|
||||
if (uttframes > frameref::maxframesperutterance)
|
||||
{
|
||||
fprintf(stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S\n", i, uttframes, frameref::maxframesperutterance, key.c_str());
|
||||
uttduration[i] = 0;
|
||||
uttisvalid[i] = false;
|
||||
}
|
||||
else{
|
||||
if (m == 0){
|
||||
uttduration[i] = uttframes;
|
||||
uttisvalid[i] = true;
|
||||
}
|
||||
else if (uttduration[i] != uttframes){
|
||||
fprintf(stderr, "minibatchutterancesource: skipping %d-th file due to inconsistency in duration in different feature streams (%d vs %d frames)\n", i, uttduration[i], uttframes);
|
||||
uttduration[i] = 0;
|
||||
uttisvalid[i] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
size_t invalidutts=0;
|
||||
foreach_index(i, uttisvalid){
|
||||
if (!uttisvalid[i])
|
||||
invalidutts++;
|
||||
}
|
||||
if (invalidutts > uttisvalid.size() / 2)
|
||||
throw std::runtime_error("minibatchutterancesource: too many files with inconsistent durations, assuming broken configuration\n");
|
||||
else if (invalidutts>0)
|
||||
fprintf(stderr, "Found inconsistent durations across feature streams in %d out of %d files\n", invalidutts, uttisvalid.size());
|
||||
|
||||
|
||||
// now process the features and labels
|
||||
size_t utterancesetsize = 0;
|
||||
foreach_index (m, infiles)
|
||||
{
|
||||
utteranceset.clear();
|
||||
if (m==0)
|
||||
numutts = infiles[m].size();
|
||||
else
|
||||
if (infiles[m].size()!=numutts)
|
||||
throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances");
|
||||
//if (m==0)
|
||||
// numutts = infiles[m].size();
|
||||
//else
|
||||
// if (infiles[m].size()!=numutts)
|
||||
// throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances\n");
|
||||
if (m==0)
|
||||
classidsbegin.clear();
|
||||
|
||||
|
@ -366,16 +419,19 @@ public:
|
|||
if (m == 0 && !labels.empty())
|
||||
classidsbegin.push_back(classids[0]->size());
|
||||
|
||||
if (uttisvalid[i]){
|
||||
utterancedesc utterance (msra::asr::htkfeatreader::parsedpath (infiles[m][i]), labels.empty() ? 0 : classidsbegin[i] ); //mseltzer - is this foolproof for multiio? is classids always non-empty?
|
||||
const size_t uttframes = utterance.numframes(); // will throw if frame bounds not given --required to be given in this mode
|
||||
// we need at least 2 frames for boundary markers to work
|
||||
if (uttframes < 2)
|
||||
throw std::runtime_error ("minibatchutterancesource: utterances < 2 frames not supported");
|
||||
if (uttframes > frameref::maxframesperutterance)
|
||||
{
|
||||
fprintf (stderr, "minibatchutterancesource: skipping %d-th file (%zu frames) because it exceeds max. frames (%zu) for frameref bit field: %S", i, uttframes, frameref::maxframesperutterance, key.c_str());
|
||||
continue;
|
||||
}
|
||||
assert(uttframes == uttduration[i]); // ensure nothing funky happened
|
||||
// already performed these checks above
|
||||
// we need at least 2 frames for boundary markers to work
|
||||
//if (uttframes < 2)
|
||||
// throw std::runtime_error ("minibatchutterancesource: utterances < 2 frames not supported");
|
||||
//if (uttframes > frameref::maxframesperutterance)
|
||||
//{
|
||||
// fprintf (stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S", i, uttframes, frameref::maxframesperutterance, key.c_str());
|
||||
// continue;
|
||||
//}
|
||||
|
||||
// check whether we have the ref transcript
|
||||
//auto labelsiter = labels[0].end();
|
||||
|
@ -397,9 +453,11 @@ public:
|
|||
if (nolat++ < 5)
|
||||
fprintf (stderr, " [no lattice for %S]", key.c_str());
|
||||
// skip if either one is missing
|
||||
if (lacksmlf || lackslat)
|
||||
continue; // skip this utterance at all
|
||||
}
|
||||
if (lacksmlf || lackslat){
|
||||
uttisvalid[i] = false;
|
||||
continue; // skip this utterance at all
|
||||
}
|
||||
}
|
||||
// push the label sequence into classids[], since we already looked it up
|
||||
// TODO: we can store labels more efficiently now since we don't do frame-wise random access anymore.
|
||||
|
||||
|
@ -411,7 +469,6 @@ public:
|
|||
//if (!labels.empty() && labelsiter != labels[0].end())
|
||||
{
|
||||
// first verify that all the label files have the proper duration
|
||||
bool durationmatch = true;
|
||||
foreach_index (j, labels)
|
||||
{
|
||||
const auto & labseq = labels[j].find(key)->second;
|
||||
|
@ -421,14 +478,14 @@ public:
|
|||
{
|
||||
fprintf (stderr, " [duration mismatch (%zu in label vs. %zu in feat file), skipping %S]", labframes, uttframes, key.c_str());
|
||||
nomlf++;
|
||||
durationmatch = false;
|
||||
uttisvalid[i] = false;
|
||||
break; // continue; // skip this utterance at all
|
||||
}
|
||||
}
|
||||
if (durationmatch){
|
||||
if (uttisvalid[i])
|
||||
{
|
||||
utteranceset.push_back(std::move(utterance));
|
||||
_totalframes += uttframes;
|
||||
framesaccum.push_back(uttframes); //track number of frames in each utterance - first feature is the reference
|
||||
// then parse each mlf if the durations are consistent
|
||||
foreach_index(j, labels)
|
||||
{
|
||||
|
@ -461,14 +518,22 @@ public:
|
|||
}
|
||||
}
|
||||
else{
|
||||
assert(classids.empty());
|
||||
assert(classids.empty() && labels.empty());
|
||||
utteranceset.push_back(std::move(utterance));
|
||||
_totalframes += uttframes;
|
||||
}
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
assert(uttframes==framesaccum[i]); //ensure that number of frames is consistent in each input feature "stream"
|
||||
utteranceset.push_back(std::move(utterance));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (m == 0)
|
||||
utterancesetsize = utteranceset.size();
|
||||
else
|
||||
assert(utteranceset.size() == utterancesetsize);
|
||||
|
||||
fprintf (stderr, "feature set %d: %zu frames in %zu out of %zu utterances\n", m, _totalframes, utteranceset.size(),infiles[m].size());
|
||||
|
||||
if (!labels.empty()){
|
||||
|
|
|
@ -316,43 +316,98 @@ public:
|
|||
std::vector<size_t> numclasses; // number of output classes as found in the label file (diagnostics)
|
||||
_totalframes = 0;
|
||||
wstring key;
|
||||
std::vector<size_t>framesaccum;
|
||||
size_t numutts=0;
|
||||
|
||||
std::vector<bool>uttisvalid; // boolean flag to check that utterance is valid. valid means number of
|
||||
//frames is consistent across all feature and label streams
|
||||
std::vector<size_t>uttduration; // track utterance durations to determine utterance validity
|
||||
|
||||
std::vector<size_t> classidsbegin;
|
||||
if (!lattices.empty())
|
||||
{
|
||||
LogicError("lattices not supported in utterancereadermulti");
|
||||
}
|
||||
|
||||
allchunks = std::vector<std::vector<utterancechunkdata>>(infiles.size(), std::vector<utterancechunkdata>());
|
||||
featdim = std::vector<size_t>(infiles.size(), 0);
|
||||
sampperiod = std::vector<unsigned int>(infiles.size(), 0);
|
||||
featkind = std::vector<string>(infiles.size(), "");
|
||||
|
||||
numclasses = std::vector<size_t>(labels.size(), 0);
|
||||
counts = std::vector<std::vector<size_t>>(labels.size(), std::vector<size_t>());
|
||||
foreach_index (i, labels)
|
||||
{
|
||||
//classids.push_back(biggrowablevector<CLASSIDTYPE>());
|
||||
classids.push_back(unique_ptr<biggrowablevector<CLASSIDTYPE>>(new biggrowablevector<CLASSIDTYPE>()));
|
||||
numclasses.push_back(0);
|
||||
counts.push_back(std::vector<size_t>());
|
||||
//std::pair<std::vector<wstring>,std::vector<wstring>> latticetocs;
|
||||
//std::unordered_map<std::string,size_t> modelsymmap;
|
||||
//lattices.push_back(shared_ptr<latticesource>(new latticesource(latticetocs, modelsymmap)));
|
||||
|
||||
}
|
||||
foreach_index(i, infiles){
|
||||
allchunks.push_back(std::vector<utterancechunkdata>());
|
||||
featdim.push_back(0); // initialize
|
||||
sampperiod.push_back(0);
|
||||
featkind.push_back("");
|
||||
|
||||
|
||||
// first check consistency across feature streams
|
||||
// We'll go through the SCP files for each stream to make sure the duration is consistent
|
||||
// If not, we'll plan to ignore the utterance, and inform the user
|
||||
// m indexes the feature stream
|
||||
// i indexes the files within a stream, i.e. in the SCP file)
|
||||
foreach_index(m, infiles){
|
||||
if (m == 0){
|
||||
numutts = infiles[m].size();
|
||||
uttisvalid = std::vector<bool>(numutts, true);
|
||||
uttduration = std::vector<size_t>(numutts, 0);
|
||||
}
|
||||
else if (infiles[m].size()!=numutts)
|
||||
throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances");
|
||||
|
||||
foreach_index(i, infiles[m]){
|
||||
utterancedesc utterance(msra::asr::htkfeatreader::parsedpath(infiles[m][i],featuresections[m]), 0); //mseltzer - is this foolproof for multiio? is classids always non-empty?
|
||||
const size_t uttframes = utterance.numframes(); // will throw if frame bounds not given --required to be given in this mode
|
||||
// we need at least 2 frames for boundary markers to work
|
||||
if (uttframes < 2)
|
||||
throw std::runtime_error("minibatchutterancesource: utterances < 2 frames not supported");
|
||||
if (uttframes > frameref::maxframesperutterance)
|
||||
{
|
||||
fprintf(stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S", i, uttframes, frameref::maxframesperutterance, key.c_str());
|
||||
uttduration[i] = 0;
|
||||
uttisvalid[i] = false;
|
||||
}
|
||||
else{
|
||||
if (m == 0){
|
||||
uttduration[i] = uttframes;
|
||||
uttisvalid[i] = true;
|
||||
}
|
||||
else if (uttduration[i] != uttframes){
|
||||
fprintf(stderr, "minibatchutterancesource: skipping %d-th file due to inconsistency in duration in different feature streams (%d vs %d frames)", i, uttduration[i], uttframes);
|
||||
uttduration[i] = 0;
|
||||
uttisvalid[i] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
size_t invalidutts=0;
|
||||
foreach_index(i, uttisvalid){
|
||||
if (!uttisvalid[i])
|
||||
invalidutts++;
|
||||
}
|
||||
if (invalidutts > uttisvalid.size() / 2)
|
||||
throw std::runtime_error("minibatchutterancesource: too many files not found in with inconsistent durations, assuming broken configuration\n");
|
||||
else if (invalidutts>0)
|
||||
fprintf(stderr, "Found inconsistent durations across feature streams in %d out of %d files.", invalidutts, uttisvalid.size());
|
||||
|
||||
|
||||
// now process the features and labels
|
||||
size_t utterancesetsize = 0;
|
||||
foreach_index (m, infiles)
|
||||
{
|
||||
utteranceset.clear();
|
||||
if (m==0)
|
||||
numutts = infiles[m].size();
|
||||
else
|
||||
if (infiles[m].size()!=numutts)
|
||||
throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances");
|
||||
//if (m==0)
|
||||
// numutts = infiles[m].size();
|
||||
//else
|
||||
// if (infiles[m].size()!=numutts)
|
||||
// throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances\n");
|
||||
if (m==0)
|
||||
classidsbegin.clear();
|
||||
|
||||
size_t uttRealNum = 0;
|
||||
foreach_index (i, infiles[m])
|
||||
{
|
||||
if (i % (infiles[m].size() / 100 + 1) == 0) { fprintf (stderr, "."); fflush (stderr); }
|
||||
|
@ -360,16 +415,20 @@ public:
|
|||
if (m == 0 && !labels.empty())
|
||||
classidsbegin.push_back(classids[0]->size());
|
||||
|
||||
if (uttisvalid[i]){
|
||||
utterancedesc utterance (msra::asr::htkfeatreader::parsedpath (infiles[m][i], featuresections[m]), labels.empty() ? 0 : classidsbegin[i] ); //mseltzer - is this foolproof for multiio? is classids always non-empty?
|
||||
const size_t uttframes = utterance.numframes(); // will throw if frame bounds not given --required to be given in this mode
|
||||
// we need at least 2 frames for boundary markers to work
|
||||
if (uttframes < 2)
|
||||
throw std::runtime_error ("minibatchutterancesource: utterances < 2 frames not supported");
|
||||
if (uttframes > frameref::maxframesperutterance)
|
||||
{
|
||||
fprintf (stderr, "minibatchutterancesource: skipping %d-th file (%zu frames) because it exceeds max. frames (%zu) for frameref bit field: %S", i, uttframes, frameref::maxframesperutterance, key.c_str());
|
||||
continue;
|
||||
}
|
||||
assert(uttframes == uttduration[i]); // ensure nothing funky happened
|
||||
|
||||
// already performed these checks above
|
||||
// we need at least 2 frames for boundary markers to work
|
||||
//if (uttframes < 2)
|
||||
// throw std::runtime_error ("minibatchutterancesource: utterances < 2 frames not supported");
|
||||
//if (uttframes > frameref::maxframesperutterance)
|
||||
//{
|
||||
// fprintf (stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S", i, uttframes, frameref::maxframesperutterance, key.c_str());
|
||||
// continue;
|
||||
//}
|
||||
|
||||
// check whether we have the ref transcript
|
||||
//auto labelsiter = labels[0].end();
|
||||
|
@ -391,8 +450,10 @@ public:
|
|||
if (nolat++ < 5)
|
||||
fprintf (stderr, " [no lattice for %S]", key.c_str());
|
||||
// skip if either one is missing
|
||||
if (lacksmlf || lackslat)
|
||||
if (lacksmlf || lackslat){
|
||||
uttisvalid[i] = false;
|
||||
continue; // skip this utterance at all
|
||||
}
|
||||
}
|
||||
// push the label sequence into classids[], since we already looked it up
|
||||
// TODO: we can store labels more efficiently now since we don't do frame-wise random access anymore.
|
||||
|
@ -405,7 +466,6 @@ public:
|
|||
//if (!labels.empty() && labelsiter != labels[0].end())
|
||||
{
|
||||
// first verify that all the label files have the proper duration
|
||||
bool durationmatch = true;
|
||||
foreach_index (j, labels)
|
||||
{
|
||||
const auto & labseq = labels[j].find(key)->second;
|
||||
|
@ -415,14 +475,14 @@ public:
|
|||
{
|
||||
fprintf (stderr, " [duration mismatch (%zu in label vs. %zu in feat file), skipping %S]", labframes, uttframes, key.c_str());
|
||||
nomlf++;
|
||||
durationmatch = false;
|
||||
uttisvalid[i] = false;
|
||||
break; // continue; // skip this utterance at all
|
||||
}
|
||||
}
|
||||
if (durationmatch){
|
||||
if (uttisvalid[i])
|
||||
{
|
||||
utteranceset.push_back(std::move(utterance));
|
||||
_totalframes += uttframes;
|
||||
framesaccum.push_back(uttframes); //track number of frames in each utterance - first feature is the reference
|
||||
// then parse each mlf if the durations are consistent
|
||||
foreach_index(j, labels)
|
||||
{
|
||||
|
@ -455,16 +515,21 @@ public:
|
|||
}
|
||||
}
|
||||
else{
|
||||
assert(classids.empty());
|
||||
assert(classids.empty() && labels.empty());
|
||||
utteranceset.push_back(std::move(utterance));
|
||||
_totalframes += uttframes;
|
||||
}
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
assert(uttframes==framesaccum[uttRealNum]); //ensure that number of frames is consistent in each input feature "stream"
|
||||
uttRealNum++;
|
||||
|
||||
utteranceset.push_back(std::move(utterance));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (m == 0)
|
||||
utterancesetsize = utteranceset.size();
|
||||
else
|
||||
assert(utteranceset.size() == utterancesetsize);
|
||||
fprintf (stderr, "feature set %d: %zu frames in %zu out of %zu utterances\n", m, _totalframes, utteranceset.size(),infiles[m].size());
|
||||
|
||||
if (!labels.empty()){
|
||||
|
|
|
@ -171,9 +171,7 @@
|
|||
<ClInclude Include="ComputationNetworkHelper.h" />
|
||||
<ClInclude Include="ComputationNode.h" />
|
||||
<ClInclude Include="ConvolutionalNodes.h" />
|
||||
<ClInclude Include="DecoderNode.h" />
|
||||
<ClInclude Include="MinibatchFetcher.h" />
|
||||
<ClInclude Include="MinibatchPrefetcher.h" />
|
||||
<ClInclude Include="DecoderNode.h" />
|
||||
<ClInclude Include="EvaluationCriterionNodes.h" />
|
||||
<ClInclude Include="IComputationNetBuilder.h" />
|
||||
<ClInclude Include="IExecutionEngine.h" />
|
||||
|
@ -221,4 +219,4 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets" />
|
||||
</Project>
|
||||
</Project>
|
|
@ -74,12 +74,6 @@
|
|||
<ClInclude Include="IExecutionEngine.h">
|
||||
<Filter>Execution Engine</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="MinibatchFetcher.h">
|
||||
<Filter>Network</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="MinibatchPrefetcher.h">
|
||||
<Filter>Network</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="ModelEditLanguage.h">
|
||||
<Filter>Model Editing</Filter>
|
||||
</ClInclude>
|
||||
|
|
|
@ -1,51 +0,0 @@
|
|||
//
|
||||
// <copyright file="MinibatchFetcher.h" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "ComputationNetwork.h"
|
||||
#include "DataReader.h"
|
||||
#include "TimerUtility.h"
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// This base class represent the old, sequential way of fetching a single minibatch of input data.
|
||||
// Essentially, it simply calls GetMinibatch on the reader.
|
||||
template<class ElemType>
|
||||
class MinibatchFetcher
|
||||
{
|
||||
public:
|
||||
MinibatchFetcher(IDataReader<ElemType>* trainSetDataReader,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
|
||||
Matrix<ElemType>* sentenceBegin,
|
||||
vector<MinibatchPackingFlag>* sentenceExistsBeginOrNoLabels)
|
||||
:
|
||||
m_reader(trainSetDataReader),
|
||||
m_inputMatrices(inputMatrices),
|
||||
m_sentenceBegin(sentenceBegin),
|
||||
m_sentenceExistsBeginOrNoLabels(sentenceExistsBeginOrNoLabels)
|
||||
{
|
||||
assert((m_sentenceBegin != nullptr) && (m_sentenceExistsBeginOrNoLabels != nullptr));
|
||||
}
|
||||
|
||||
// This virtual dtor is necessary to allow invocation of derived dtors, which have some required synchronization points
|
||||
virtual ~MinibatchFetcher() {}
|
||||
|
||||
virtual bool GetMinibatch()
|
||||
{
|
||||
bool retVal = m_reader->GetMinibatch(*m_inputMatrices);
|
||||
m_reader->SetSentenceSegBatch(*m_sentenceBegin, *m_sentenceExistsBeginOrNoLabels);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
protected:
|
||||
IDataReader<ElemType>* m_reader;
|
||||
std::map<std::wstring, Matrix<ElemType>*>* m_inputMatrices;
|
||||
Matrix<ElemType>* m_sentenceBegin;
|
||||
vector<MinibatchPackingFlag>* m_sentenceExistsBeginOrNoLabels;
|
||||
};
|
||||
|
||||
}}}
|
|
@ -1,220 +0,0 @@
|
|||
//
|
||||
// <copyright file="MinibatchFetcher.h" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "ComputationNetwork.h"
|
||||
#include "DataReader.h"
|
||||
#include "MinibatchFetcher.h"
|
||||
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <atomic>
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// This derived class is an implementation of a prefetcher for minibatches. It contains a simple producer-consumer synchronization
|
||||
// between reader and compute. It creates a separate thread for the reader and it allows a single compute to execute concurrently
|
||||
// with a single read-ahead of a minibatch. This ensures that compute always has input data to work on, and is not blocked on
|
||||
// reads off the disk, nor transfers of memory from host to device in the GPU case.
|
||||
template<class ElemType>
|
||||
class MinibatchPrefetcher : public MinibatchFetcher<ElemType>
|
||||
{
|
||||
public:
|
||||
using MinibatchFetcher<ElemType>::m_sentenceBegin;
|
||||
using MinibatchFetcher<ElemType>::m_sentenceExistsBeginOrNoLabels;
|
||||
|
||||
MinibatchPrefetcher(IDataReader<ElemType>* trainSetDataReader,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
|
||||
Matrix<ElemType>* sentenceBegin,
|
||||
vector<MinibatchPackingFlag>* sentenceExistsBeginOrNoLabels) :
|
||||
MinibatchFetcher<ElemType>(trainSetDataReader, inputMatrices, sentenceBegin, sentenceExistsBeginOrNoLabels),
|
||||
m_prefetchSentenceBegin(nullptr),
|
||||
m_prefetchSentenceExistsBeginOrNoLabels(nullptr),
|
||||
m_isEpochReadingDone(false),
|
||||
m_minibatchReady(false),
|
||||
m_isTerminating(false)
|
||||
{
|
||||
m_deviceId = this->m_inputMatrices->begin()->second->GetDeviceId();
|
||||
|
||||
for (auto iter = this->m_inputMatrices->begin(); iter != this->m_inputMatrices->end(); iter++)
|
||||
{
|
||||
assert(m_deviceId == iter->second->GetDeviceId());
|
||||
m_prefetchInput[iter->first] = new Matrix<ElemType>(iter->second->GetNumRows(),
|
||||
iter->second->GetNumCols(),
|
||||
iter->second->GetDeviceId(),
|
||||
iter->second->GetMatrixType(),
|
||||
iter->second->GetFormat());
|
||||
}
|
||||
|
||||
if (sentenceBegin != nullptr)
|
||||
{
|
||||
m_prefetchSentenceBegin = new Matrix<ElemType>(sentenceBegin->GetNumRows(),
|
||||
sentenceBegin->GetNumCols(),
|
||||
sentenceBegin->GetDeviceId(),
|
||||
sentenceBegin->GetMatrixType(),
|
||||
sentenceBegin->GetFormat());
|
||||
}
|
||||
|
||||
if (sentenceExistsBeginOrNoLabels != nullptr)
|
||||
{
|
||||
m_prefetchSentenceExistsBeginOrNoLabels = new vector<MinibatchPackingFlag>();
|
||||
}
|
||||
|
||||
// Launch a worker thread
|
||||
m_prefetchThread = std::thread([this]() { this->PrefetchWorker(); });
|
||||
}
|
||||
|
||||
virtual ~MinibatchPrefetcher()
|
||||
{
|
||||
// Send a signal to the worker thread that we are in shutdown mode
|
||||
m_isTerminating = true;
|
||||
|
||||
// Make sure that worker thread is unblocked because we are about to wait to join with it. If
|
||||
// worker thread is in the middle of reading, let it finish so that we can safely grab the lock.
|
||||
if (!m_isEpochReadingDone)
|
||||
{
|
||||
fprintf(stderr, "Exiting minibatch loop before reading all the data, waiting to sync with the prefetch thread...\n");
|
||||
m_cv.notify_one();
|
||||
}
|
||||
|
||||
m_prefetchThread.join();
|
||||
|
||||
// Clean up prefetch matrix inputs
|
||||
for (auto iter = m_prefetchInput.begin(); iter != m_prefetchInput.end(); iter++)
|
||||
{
|
||||
delete iter->second;
|
||||
}
|
||||
|
||||
delete m_prefetchSentenceBegin;
|
||||
delete m_prefetchSentenceExistsBeginOrNoLabels;
|
||||
}
|
||||
|
||||
virtual bool GetMinibatch()
|
||||
{
|
||||
bool hasMoreEpochReading = false;
|
||||
|
||||
// Wait until minibatch is ready to be consumed
|
||||
{
|
||||
std::unique_lock<std::mutex> mutexLock(m_mutex);
|
||||
m_cv.wait(mutexLock, [this] { return this->m_minibatchReady == true; });
|
||||
|
||||
// This function now owns the lock
|
||||
|
||||
// m_isTerminating is set on this same thread, but only in destructor
|
||||
assert(!m_isTerminating);
|
||||
|
||||
if (!m_isEpochReadingDone)
|
||||
{
|
||||
// Record an event after all computation for the previous minibatch has been scheduled
|
||||
// ensuring that this event can safely be observed after all compute has finished.
|
||||
Matrix<ElemType>::RecordComputeSyncPoint(m_deviceId);
|
||||
|
||||
// Swap the input matrices to make use of data that has already been read.
|
||||
// This should be as simple as "m_prefetchInput.swap(m_inputMatrices)", but unfortunately
|
||||
// underlying Matrix<ElemType> pointers are cached, so we need to dig deeper to do a swap.
|
||||
for (auto iter = this->m_inputMatrices->begin(); iter != this->m_inputMatrices->end(); iter++)
|
||||
{
|
||||
assert(m_deviceId == iter->second->GetDeviceId());
|
||||
std::swap(*(iter->second), *m_prefetchInput[iter->first]);
|
||||
}
|
||||
|
||||
if (m_sentenceBegin != nullptr)
|
||||
{
|
||||
assert(m_sentenceBegin->GetDeviceId() == m_prefetchSentenceBegin->GetDeviceId());
|
||||
std::swap(*m_sentenceBegin, *m_prefetchSentenceBegin);
|
||||
}
|
||||
|
||||
if (m_sentenceExistsBeginOrNoLabels != nullptr)
|
||||
{
|
||||
std::swap(*m_sentenceExistsBeginOrNoLabels, *m_prefetchSentenceExistsBeginOrNoLabels);
|
||||
}
|
||||
|
||||
hasMoreEpochReading = true;
|
||||
}
|
||||
|
||||
// Announce to worker thread to fetch another batch.
|
||||
m_minibatchReady = false;
|
||||
}
|
||||
m_cv.notify_one();
|
||||
|
||||
return hasMoreEpochReading;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void PrefetchWorker()
|
||||
{
|
||||
Matrix<ElemType>::EnableConcurrentRead(m_deviceId);
|
||||
|
||||
while (!m_isEpochReadingDone)
|
||||
{
|
||||
// Wait until prefetch is requested
|
||||
std::unique_lock<std::mutex> mutexLock(m_mutex);
|
||||
m_cv.wait(mutexLock, [this] { return (!this->m_minibatchReady || this->m_isTerminating); });
|
||||
|
||||
// We now own the lock
|
||||
|
||||
// If the main thread has an early exit due to break or exception, it
|
||||
// will initiate a shutdown and it will wait for this thread to complete.
|
||||
// Thus, we need to check for that condition before proceeding.
|
||||
m_isEpochReadingDone = m_isTerminating ? true : PrefetchOneMiniBatch();
|
||||
|
||||
// Signal to main thread that minibatch is ready to be consumed
|
||||
m_minibatchReady = true;
|
||||
|
||||
// Manual unlocking is done before notifying, to avoid waking up
|
||||
// the waiting thread only to block again (see notify_one for details)
|
||||
mutexLock.unlock();
|
||||
m_cv.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
bool PrefetchOneMiniBatch()
|
||||
{
|
||||
// This function must be called while holding a lock
|
||||
|
||||
// Schedule a wait event on the read stream that ensures that nothing can be further
|
||||
// scheduled on that stream until dependent compute event has been observed.
|
||||
// Please note that first two calls will be special cases:
|
||||
//
|
||||
// 1) First mini-batch is fetched before RecordComputeSyncPoint() is ever called
|
||||
// 2) Second mini-batch is fetched depending on RecordComputeSyncPoint() reported before
|
||||
// scheduling any actual work on the compute thread
|
||||
//
|
||||
// Dependency chain looks like this (F = fetch, C = compute):
|
||||
//
|
||||
// F1 -> C1 -> F3 (fetch #3 depends on compute #1 completing, which depended on fetch #1 completing)
|
||||
// F2 -> C2 -> F4
|
||||
// F3 -> C3 -> F5
|
||||
//
|
||||
// It is fetch #3 that *must* observe the event that happened between computes #1 and #2
|
||||
// before proceeding to read into the buffer that was used by compute #1.
|
||||
Matrix<ElemType>::SyncComputeBeforeRead(m_deviceId);
|
||||
|
||||
// Get the next minibatch and wait for it to be available on the device
|
||||
bool isDone = !this->m_reader->GetMinibatch(m_prefetchInput);
|
||||
this->m_reader->SetSentenceSegBatch(*m_prefetchSentenceBegin, *m_prefetchSentenceExistsBeginOrNoLabels);
|
||||
|
||||
Matrix<ElemType>::SyncPendingRead(m_deviceId);
|
||||
|
||||
return isDone;
|
||||
}
|
||||
|
||||
// @TODO: We need to add support for a larger number of prefetch buffers, larger than 1
|
||||
std::map<std::wstring, Matrix<ElemType>*> m_prefetchInput;
|
||||
Matrix<ElemType>* m_prefetchSentenceBegin;
|
||||
vector<MinibatchPackingFlag>* m_prefetchSentenceExistsBeginOrNoLabels;
|
||||
std::thread m_prefetchThread;
|
||||
std::mutex m_mutex;
|
||||
std::condition_variable m_cv;
|
||||
DEVICEID_TYPE m_deviceId;
|
||||
std::atomic<bool> m_isEpochReadingDone;
|
||||
std::atomic<bool> m_minibatchReady;
|
||||
std::atomic<bool> m_isTerminating;
|
||||
};
|
||||
|
||||
}}}
|
|
@ -19,8 +19,6 @@
|
|||
#include <random>
|
||||
#include "TimerUtility.h"
|
||||
#include "Profiler.h"
|
||||
#include "MinibatchFetcher.h"
|
||||
#include "MinibatchPrefetcher.h"
|
||||
|
||||
#ifdef MPI_SUPPORT
|
||||
#include "mpi.h"
|
||||
|
@ -201,9 +199,6 @@ public:
|
|||
size_t numMBsToShowResult = configSGD("numMBsToShowResult", "10");
|
||||
size_t numMBsToCUDAProfile = configSGD("numMBsToCUDAProfile", "0");
|
||||
|
||||
// Whether it is OK for read to happen on a separate thread while compute is happening
|
||||
bool doPrefetchTrainingData = configSGD("prefetchTrainingData", "true");
|
||||
|
||||
bool keepCheckPointFiles = configSGD("keepCheckPointFiles", "false");
|
||||
|
||||
bool gradientClippingWithTruncation = configSGD("gradientClippingWithTruncation", "true");
|
||||
|
@ -269,8 +264,7 @@ public:
|
|||
gradientCheckSigDigit, validateAfterModelReloading, rpi,
|
||||
learnRateAdjustInterval, UsingAllDataForPreComputedNode,
|
||||
needAveMultiplier, L2RegWeight, L1RegWeight,
|
||||
autoAdjustMinibatch, minibatchSizeTuningFrequency, minibatchSizeTuningMax,
|
||||
doPrefetchTrainingData);
|
||||
autoAdjustMinibatch, minibatchSizeTuningFrequency, minibatchSizeTuningMax);
|
||||
}
|
||||
|
||||
//autoLearnRateSearchType is applied only if the learning rate for the epoch is not specified in learningRatesPerMB and learningRatesPerSample
|
||||
|
@ -316,8 +310,7 @@ public:
|
|||
const ElemType L1RegWeight = 0,
|
||||
const bool autoAdjustMinibatch = false,
|
||||
const size_t minibatchSizeTuningFrequency = 1,
|
||||
const size_t minibatchSizeTuningMax = 1048576,
|
||||
bool doPrefetchTrainingData = true)
|
||||
const size_t minibatchSizeTuningMax = 1048576)
|
||||
{
|
||||
m_numPrevLearnRates = numPrevLearnRates;
|
||||
m_prevChosenMinibatchSize = 0;
|
||||
|
@ -477,7 +470,6 @@ public:
|
|||
m_doGradientCheck = doGradientCheck;
|
||||
m_gradientCheckSigDigit = gradientCheckSigDigit;
|
||||
m_validateAfterModelReloading = validateAfterModelReloading;
|
||||
m_doPrefetchTrainingData = doPrefetchTrainingData;
|
||||
|
||||
msra::files::make_intermediate_dirs(m_modelPath);
|
||||
}
|
||||
|
@ -865,9 +857,6 @@ protected:
|
|||
break;
|
||||
}
|
||||
|
||||
#ifdef MPI_SUPPORT
|
||||
INT32 mySamples = (INT32)
|
||||
#endif
|
||||
size_t chosenMinibatchSize;
|
||||
size_t actualMinibatchSize;
|
||||
|
||||
|
@ -910,20 +899,23 @@ protected:
|
|||
fprintf(stderr, "Starting Epoch %d: learning rate per sample = %f momentum = %f \n",
|
||||
i + 1, learnRatePerSample, MomentumPerMB(m_momentumPerSample[i], actualMinibatchSize));
|
||||
|
||||
#ifdef MPI_SUPPORT
|
||||
INT32 mySamples = (INT32)
|
||||
#endif
|
||||
TrainOneEpoch(net,
|
||||
refNet,
|
||||
refNode,
|
||||
i,
|
||||
m_epochSize,
|
||||
trainSetDataReader,
|
||||
learnRatePerSample,
|
||||
chosenMinibatchSize,
|
||||
FeatureNodes,
|
||||
labelNodes,
|
||||
criterionNodes,
|
||||
evaluationNodes,
|
||||
inputMatrices,
|
||||
learnableNodes, smoothedGradients,
|
||||
refNet,
|
||||
refNode,
|
||||
i,
|
||||
m_epochSize,
|
||||
trainSetDataReader,
|
||||
learnRatePerSample,
|
||||
chosenMinibatchSize,
|
||||
FeatureNodes,
|
||||
labelNodes,
|
||||
criterionNodes,
|
||||
evaluationNodes,
|
||||
inputMatrices,
|
||||
learnableNodes, smoothedGradients,
|
||||
epochCriterion, epochEvalErrors, totalSamplesSeen);
|
||||
|
||||
timer.Stop();
|
||||
|
@ -1708,22 +1700,15 @@ protected:
|
|||
trainSetDataReader->StartMinibatchLoop(tunedMBSize, epochNumber, m_epochSize);
|
||||
|
||||
AttemptUtteranceDerivativeFeatures(net, trainSetDataReader, FeatureNodes, inputMatrices);
|
||||
std::unique_ptr<MinibatchFetcher<ElemType>> mbFetcher(
|
||||
m_doPrefetchTrainingData ?
|
||||
new MinibatchPrefetcher<ElemType>(trainSetDataReader, inputMatrices, &(net.SentenceBoundary()), &(net.MinibatchPackingFlags())) :
|
||||
new MinibatchFetcher<ElemType>(trainSetDataReader, inputMatrices, &(net.SentenceBoundary()), &(net.MinibatchPackingFlags())));
|
||||
|
||||
fprintf(stderr, "\nStarting minibatch loop, prefetching is: %s\n", m_doPrefetchTrainingData ? "ENABLED" : "DISABLED");
|
||||
|
||||
Timer timer;
|
||||
timer.Start();
|
||||
|
||||
while (mbFetcher->GetMinibatch())
|
||||
while (trainSetDataReader->GetMinibatch(*inputMatrices))
|
||||
{
|
||||
#ifdef MPI_SUPPORT
|
||||
DecimateMinibatch(inputMatrices);
|
||||
#endif
|
||||
|
||||
UpdateEvalTimeStamps(FeatureNodes);
|
||||
UpdateEvalTimeStamps(labelNodes);
|
||||
|
||||
|
@ -1735,6 +1720,7 @@ protected:
|
|||
|
||||
net.SetActualMiniBatchSize(actualMBSize);
|
||||
net.SetActualNbrSlicesInEachRecIter(trainSetDataReader->NumberSlicesInEachRecurrentIter());
|
||||
trainSetDataReader->SetSentenceSegBatch(net.SentenceBoundary(), net.MinibatchPackingFlags());
|
||||
|
||||
#ifndef EVALDLL
|
||||
if (m_doGradientCheck && GradientCheck(net, criterionNodes, learnableNodes, 0) == false)
|
||||
|
@ -1843,6 +1829,7 @@ protected:
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
timer.Restart();
|
||||
totalEpochSamples += actualMBSize;
|
||||
totalSamplesSeen += actualMBSize;
|
||||
|
@ -2401,7 +2388,7 @@ protected:
|
|||
bool m_needAveMultiplier;
|
||||
ElemType m_L2RegWeight;
|
||||
ElemType m_L1RegWeight;
|
||||
bool m_doPrefetchTrainingData;
|
||||
|
||||
};
|
||||
template class SGD<float>;
|
||||
template class SGD<double>;
|
||||
|
|
|
@ -69,6 +69,7 @@
|
|||
<SDLCheck>true</SDLCheck>
|
||||
<AdditionalIncludeDirectories>..\Common\include;..\Math\Math;"c:\Program Files\NVIDIA Corporation\GDK\gdk_win7_amd64_release\nvml\include"</AdditionalIncludeDirectories>
|
||||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
<AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
|
@ -93,7 +94,7 @@
|
|||
<SDLCheck>true</SDLCheck>
|
||||
<AdditionalIncludeDirectories>..\Common\include; ..\Math\Math; "c:\Program Files\NVIDIA Corporation\GDK\gdk_win7_amd64_release\nvml\include"</AdditionalIncludeDirectories>
|
||||
<OpenMPSupport>false</OpenMPSupport>
|
||||
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
|
||||
<AdditionalOptions>/d2Zi+ /bigobj %(AdditionalOptions)</AdditionalOptions>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
|
|
|
@ -47,8 +47,6 @@ __declspec (thread)
|
|||
static
|
||||
#endif
|
||||
cudaStream_t t_stream = cudaStreamDefault;
|
||||
cudaStream_t t_readAheadStream = nullptr;
|
||||
cudaEvent_t t_computeEvent = nullptr;
|
||||
|
||||
#define DEFAULT_THREAD_PER_DIM 16
|
||||
|
||||
|
@ -66,6 +64,7 @@ cudaStream_t MATH_API GetStream()
|
|||
return t_stream;
|
||||
}
|
||||
|
||||
|
||||
void CURAND_CALL(curandStatus x)
|
||||
{
|
||||
if (x != CURAND_STATUS_SUCCESS)
|
||||
|
@ -161,51 +160,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return cuHandle;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void GPUMatrix<ElemType>::RecordComputeSyncPoint()
|
||||
{
|
||||
if (t_computeEvent == nullptr)
|
||||
{
|
||||
CUDA_CALL(cudaEventCreate(&t_computeEvent));
|
||||
}
|
||||
CUDA_CALL(cudaEventRecord(t_computeEvent, t_stream));
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void GPUMatrix<ElemType>::SyncComputeBeforeRead()
|
||||
{
|
||||
assert(t_readAheadStream != nullptr);
|
||||
if (t_computeEvent != nullptr)
|
||||
{
|
||||
CUDA_CALL(cudaStreamWaitEvent(t_readAheadStream, t_computeEvent, 0 /*flags must be 0*/));
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: We are leaking t_readAheadStream, call cudaStreamDestroy
|
||||
// Not a big issue since it will be cleaned up on process shutdown
|
||||
template<class ElemType>
|
||||
void GPUMatrix<ElemType>::EnableConcurrentRead(DEVICEID_TYPE devId)
|
||||
{
|
||||
CUDA_CALL(cudaSetDevice(devId));
|
||||
if (t_readAheadStream == nullptr)
|
||||
{
|
||||
CUDA_CALL(cudaStreamCreateWithFlags(&t_readAheadStream, cudaStreamNonBlocking));
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void GPUMatrix<ElemType>::SyncPendingRead()
|
||||
{
|
||||
assert(t_readAheadStream != nullptr);
|
||||
CUDA_CALL(cudaStreamSynchronize(t_readAheadStream));
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void GPUMatrix<ElemType>::SyncPendingCompute()
|
||||
{
|
||||
CUDA_CALL(cudaStreamSynchronize(t_stream));
|
||||
}
|
||||
|
||||
// GetBestGPUDeviceId - Get the best GPU DeviceId, based on cuda information
|
||||
// TODO: should be replaced by BestGpu class instead, it's much better
|
||||
template<class ElemType>
|
||||
|
@ -1101,17 +1055,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
if (!(matrixFlags&matrixFormatRowMajor))
|
||||
{
|
||||
if (t_readAheadStream == nullptr)
|
||||
{
|
||||
CUDA_CALL(cudaMemcpy(m_pArray, pArray, sizeof(ElemType)*GetNumElements(),
|
||||
(matrixFlags&matrixFlagSetValueOnDevice) ? cudaMemcpyDeviceToDevice : cudaMemcpyHostToDevice));
|
||||
}
|
||||
else
|
||||
{
|
||||
// We are calling async version of the memcpy API to do the copy on a separate stream so that it can overlap with compute.
|
||||
CUDA_CALL(cudaMemcpyAsync(m_pArray, pArray, sizeof(ElemType)*GetNumElements(),
|
||||
(matrixFlags&matrixFlagSetValueOnDevice) ? cudaMemcpyDeviceToDevice : cudaMemcpyHostToDevice, t_readAheadStream));
|
||||
}
|
||||
CUDA_CALL(cudaMemcpy(m_pArray, pArray, sizeof(ElemType)*GetNumElements(),
|
||||
(matrixFlags&matrixFlagSetValueOnDevice)?cudaMemcpyDeviceToDevice:cudaMemcpyHostToDevice));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -105,11 +105,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
~GPUMatrix(void);
|
||||
|
||||
static int GetBestGPUDeviceId();
|
||||
static void RecordComputeSyncPoint();
|
||||
static void SyncComputeBeforeRead();
|
||||
static void SyncPendingRead();
|
||||
static void SyncPendingCompute();
|
||||
static void EnableConcurrentRead(DEVICEID_TYPE devId);
|
||||
int GetComputeDeviceId() const;
|
||||
DEVICEID_TYPE PrepareDevice(DEVICEID_TYPE deviceId = -1) const;
|
||||
|
||||
|
|
|
@ -4444,56 +4444,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return (DEVICEID_TYPE)GPUMatrix<ElemType>::GetBestGPUDeviceId();
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void Matrix<ElemType>::RecordComputeSyncPoint(DEVICEID_TYPE devId)
|
||||
{
|
||||
// This function is necessary and has any effect only on GPU
|
||||
if (devId >= 0)
|
||||
{
|
||||
GPUMatrix<ElemType>::RecordComputeSyncPoint();
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void Matrix<ElemType>::SyncComputeBeforeRead(DEVICEID_TYPE devId)
|
||||
{
|
||||
// This function is necessary and has any effect only on GPU
|
||||
if (devId >= 0)
|
||||
{
|
||||
GPUMatrix<ElemType>::SyncComputeBeforeRead();
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void Matrix<ElemType>::SyncPendingRead(DEVICEID_TYPE devId)
|
||||
{
|
||||
// This function is necessary and has any effect only on GPU
|
||||
if (devId >= 0)
|
||||
{
|
||||
GPUMatrix<ElemType>::SyncPendingRead();
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void Matrix<ElemType>::SyncPendingCompute(DEVICEID_TYPE devId)
|
||||
{
|
||||
// This function is necessary and has any effect only on GPU
|
||||
if (devId >= 0)
|
||||
{
|
||||
GPUMatrix<ElemType>::SyncPendingCompute();
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void Matrix<ElemType>::EnableConcurrentRead(DEVICEID_TYPE devId)
|
||||
{
|
||||
// This function is necessary and has any effect only on GPU
|
||||
if (devId >= 0)
|
||||
{
|
||||
GPUMatrix<ElemType>::EnableConcurrentRead(devId);
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
ElemType Matrix<ElemType>::Exp10(ElemType num)
|
||||
{
|
||||
|
|
|
@ -344,26 +344,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
public:
|
||||
static DEVICEID_TYPE GetBestGPUDeviceId(); //{ return GPUMatrix<ElemType>::GetBestGPUDeviceId();}
|
||||
|
||||
// This API records an event, in the case of GPU computation, that happens between two compute iterations
|
||||
// (it's a compute delimiter between two minibatch iterations)
|
||||
static void RecordComputeSyncPoint(DEVICEID_TYPE devId);
|
||||
|
||||
// This API ensures, in the case of GPU computation, that all compute is flushed before read decides to modify
|
||||
// buffers, and potentially invalidate computation.
|
||||
static void SyncComputeBeforeRead(DEVICEID_TYPE devId);
|
||||
|
||||
// This API ensures, in the case of GPU computation, that all async reads are finished before notifying compute
|
||||
// that the read buffers are ready on the device.
|
||||
static void SyncPendingRead(DEVICEID_TYPE devId);
|
||||
|
||||
// This API ensures, in the case of GPU computation, that all compute is flushed before transferring the criterion
|
||||
// back to the host. This is a workaround for contention between two memcpy calls, one host-to-device and one
|
||||
// device-to-host, which are for some reason getting serialized and cause big delays in compute.
|
||||
static void SyncPendingCompute(DEVICEID_TYPE devId);
|
||||
|
||||
// This API ensures, in the case of GPU computation, creates a separate stream for reading data into GPU buffer.
|
||||
static void EnableConcurrentRead(DEVICEID_TYPE devId);
|
||||
|
||||
//static BLAS functions
|
||||
|
||||
// singular value decomposition of A as A = U*SIGMA*VT
|
||||
|
|
|
@ -403,16 +403,6 @@ namespace Microsoft {
|
|||
return -1; // CPU
|
||||
}
|
||||
|
||||
template<class ElemType> void GPUMatrix<ElemType>::RecordComputeSyncPoint() { }
|
||||
|
||||
template<class ElemType> void GPUMatrix<ElemType>::SyncComputeBeforeRead() { }
|
||||
|
||||
template<class ElemType> void GPUMatrix<ElemType>::SyncPendingRead() { }
|
||||
|
||||
template<class ElemType> void GPUMatrix<ElemType>::SyncPendingCompute() { }
|
||||
|
||||
template<class ElemType> void GPUMatrix<ElemType>::EnableConcurrentRead(DEVICEID_TYPE devId) { }
|
||||
|
||||
// PrepareDevice - Setup the correct cuda context for an operation
|
||||
// deviceId - the device on which the operation will take place
|
||||
// defaults to -1, which means use matrices current device
|
||||
|
|
Загрузка…
Ссылка в новой задаче