Windows->Linux: finalizing reconciliation of Linux vs. Windows version of HTKMLFReader

This commit is contained in:
Vladimir Ivanov 2015-07-01 11:41:24 -07:00
Родитель 5c590f7417
Коммит f5ce4c8315
6 изменённых файлов: 30 добавлений и 25 удалений

Просмотреть файл

@ -934,16 +934,17 @@ public:
if (!isendworkaround.empty() && isendworkaround[k]) // secondary criterion to detect ends in broken lattices
{
k--; // don't advance, since nothing to advance over
goto skipscores;
}
// this is a regular token: update it in-place
auto & ai = uniquededgedatatokens[k];
if (ai.unit >= idmap.size())
throw std::runtime_error ("fread: broken-file heuristics failed");
ai.updateunit (idmap); // updates itself
if (!ai.last)
continue;
skipscores:
else
{
// this is a regular token: update it in-place
auto & ai = uniquededgedatatokens[k];
if (ai.unit >= idmap.size())
throw std::runtime_error ("fread: broken-file heuristics failed");
ai.updateunit (idmap); // updates itself
if (!ai.last)
continue;
}
// if last then skip over the lm and ac scores
k += skipscoretokens;
uniquealignments++;

Просмотреть файл

@ -249,11 +249,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
const std::string& randomizeString = readerConfig("randomize");
if (randomizeString == "None")
{
randomize = m_htkRandomizeDisable; // randomizeNone;
randomize = randomizeNone;
}
else if (randomizeString == "Auto")
{
randomize = m_htkRandomizeAuto; // randomizeAuto
randomize = randomizeAuto;
}
else
{
@ -379,8 +379,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//std::vector<std::wstring> pagepath;
foreach_index(i, mlfpathsmulti)
{
const map<string,size_t>* wordmap = NULL;
#ifdef WIN32
wordmap = unigram ? &unigramsymbols : (map<string,size_t>*) NULL;
#endif
msra::asr::htkmlfreader<msra::asr::htkmlfentry,msra::lattices::lattice::htkmlfwordsequence>
labels(mlfpathsmulti[i], restrictmlftokeys, statelistpaths[i], /*unigram ? &unigramsymbols :*/(map<string,size_t>*) NULL, (map<string,size_t>*) NULL, htktimetoframe); // label MLF
labels(mlfpathsmulti[i], restrictmlftokeys, statelistpaths[i], wordmap, (map<string,size_t>*) NULL, htktimetoframe); // label MLF
// get the temp file name for the page file
labelsmulti.push_back(labels);
}

Просмотреть файл

@ -248,7 +248,6 @@ size_t filesize (FILE * f)
{
RuntimeError ("error determining file position: %s", strerror (errno));
}
int rc = fseek (f, 0, SEEK_END);
int rc = _fseeki64 (f, 0, SEEK_END);
if (rc != 0)
{

Просмотреть файл

@ -230,7 +230,7 @@ public:
// We write to a tmp file first to ensure we don't leave broken files that would confuse make mode.
template<class MATRIX> static void write (const wstring & path, const string & kindstr, unsigned int period, const MATRIX & feat)
{
wstring tmppath = path + L""; // tmp path for make-mode compliant
wstring tmppath = path + L"$$"; // tmp path for make-mode compliant
unlinkOrDie (path); // delete if old file is already there
// write it out
size_t featdim = feat.rows();

Просмотреть файл

@ -933,20 +933,20 @@ public:
const size_t skipscoretokens = info.hasacscores ? 2 : 1;
for (size_t k = skipscoretokens; k < uniquededgedatatokens.size(); k++)
{
auto & ai = uniquededgedatatokens[k];
if (!isendworkaround.empty() && isendworkaround[k]) // secondary criterion to detect ends in broken lattices
{
k--; // don't advance, since nothing to advance over
goto skipscores;
}
// this is a regular token: update it in-place
if (ai.unit >= idmap.size())
throw std::runtime_error ("fread: broken-file heuristics failed");
ai.updateunit (idmap); // updates itself
if (!ai.last)
continue;
skipscores:
else
{
// this is a regular token: update it in-place
auto & ai = uniquededgedatatokens[k];
if (ai.unit >= idmap.size())
throw std::runtime_error ("fread: broken-file heuristics failed");
ai.updateunit (idmap); // updates itself
if (!ai.last)
continue;
}
// if last then skip over the lm and ac scores
k += skipscoretokens;
uniquealignments++;

Просмотреть файл

@ -612,7 +612,8 @@ private:
return sweep;
currentsweep = sweep;
fprintf (stderr, "lazyrandomization: re-randomizing for sweep %zu in %s mode\n", currentsweep, framemode ? "frame" : "utterance");
if (verbosity>0)
fprintf (stderr, "lazyrandomization: re-randomizing for sweep %zu in %s mode\n", currentsweep, framemode ? "frame" : "utterance");
const size_t sweepts = sweep * _totalframes; // first global frame index for this sweep