Windows->Linux: finalizing reconciliation of Linux vs. Windows version of HTKMLFReader
This commit is contained in:
Родитель
5c590f7417
Коммит
f5ce4c8315
|
@ -934,16 +934,17 @@ public:
|
|||
if (!isendworkaround.empty() && isendworkaround[k]) // secondary criterion to detect ends in broken lattices
|
||||
{
|
||||
k--; // don't advance, since nothing to advance over
|
||||
goto skipscores;
|
||||
}
|
||||
// this is a regular token: update it in-place
|
||||
auto & ai = uniquededgedatatokens[k];
|
||||
if (ai.unit >= idmap.size())
|
||||
throw std::runtime_error ("fread: broken-file heuristics failed");
|
||||
ai.updateunit (idmap); // updates itself
|
||||
if (!ai.last)
|
||||
continue;
|
||||
skipscores:
|
||||
else
|
||||
{
|
||||
// this is a regular token: update it in-place
|
||||
auto & ai = uniquededgedatatokens[k];
|
||||
if (ai.unit >= idmap.size())
|
||||
throw std::runtime_error ("fread: broken-file heuristics failed");
|
||||
ai.updateunit (idmap); // updates itself
|
||||
if (!ai.last)
|
||||
continue;
|
||||
}
|
||||
// if last then skip over the lm and ac scores
|
||||
k += skipscoretokens;
|
||||
uniquealignments++;
|
||||
|
|
|
@ -249,11 +249,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
const std::string& randomizeString = readerConfig("randomize");
|
||||
if (randomizeString == "None")
|
||||
{
|
||||
randomize = m_htkRandomizeDisable; // randomizeNone;
|
||||
randomize = randomizeNone;
|
||||
}
|
||||
else if (randomizeString == "Auto")
|
||||
{
|
||||
randomize = m_htkRandomizeAuto; // randomizeAuto
|
||||
randomize = randomizeAuto;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -379,8 +379,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
//std::vector<std::wstring> pagepath;
|
||||
foreach_index(i, mlfpathsmulti)
|
||||
{
|
||||
const map<string,size_t>* wordmap = NULL;
|
||||
#ifdef WIN32
|
||||
wordmap = unigram ? &unigramsymbols : (map<string,size_t>*) NULL;
|
||||
#endif
|
||||
msra::asr::htkmlfreader<msra::asr::htkmlfentry,msra::lattices::lattice::htkmlfwordsequence>
|
||||
labels(mlfpathsmulti[i], restrictmlftokeys, statelistpaths[i], /*unigram ? &unigramsymbols :*/(map<string,size_t>*) NULL, (map<string,size_t>*) NULL, htktimetoframe); // label MLF
|
||||
labels(mlfpathsmulti[i], restrictmlftokeys, statelistpaths[i], wordmap, (map<string,size_t>*) NULL, htktimetoframe); // label MLF
|
||||
// get the temp file name for the page file
|
||||
labelsmulti.push_back(labels);
|
||||
}
|
||||
|
|
|
@ -248,7 +248,6 @@ size_t filesize (FILE * f)
|
|||
{
|
||||
RuntimeError ("error determining file position: %s", strerror (errno));
|
||||
}
|
||||
int rc = fseek (f, 0, SEEK_END);
|
||||
int rc = _fseeki64 (f, 0, SEEK_END);
|
||||
if (rc != 0)
|
||||
{
|
||||
|
|
|
@ -230,7 +230,7 @@ public:
|
|||
// We write to a tmp file first to ensure we don't leave broken files that would confuse make mode.
|
||||
template<class MATRIX> static void write (const wstring & path, const string & kindstr, unsigned int period, const MATRIX & feat)
|
||||
{
|
||||
wstring tmppath = path + L""; // tmp path for make-mode compliant
|
||||
wstring tmppath = path + L"$$"; // tmp path for make-mode compliant
|
||||
unlinkOrDie (path); // delete if old file is already there
|
||||
// write it out
|
||||
size_t featdim = feat.rows();
|
||||
|
|
|
@ -933,20 +933,20 @@ public:
|
|||
const size_t skipscoretokens = info.hasacscores ? 2 : 1;
|
||||
for (size_t k = skipscoretokens; k < uniquededgedatatokens.size(); k++)
|
||||
{
|
||||
auto & ai = uniquededgedatatokens[k];
|
||||
if (!isendworkaround.empty() && isendworkaround[k]) // secondary criterion to detect ends in broken lattices
|
||||
{
|
||||
k--; // don't advance, since nothing to advance over
|
||||
goto skipscores;
|
||||
}
|
||||
// this is a regular token: update it in-place
|
||||
|
||||
if (ai.unit >= idmap.size())
|
||||
throw std::runtime_error ("fread: broken-file heuristics failed");
|
||||
ai.updateunit (idmap); // updates itself
|
||||
if (!ai.last)
|
||||
continue;
|
||||
skipscores:
|
||||
else
|
||||
{
|
||||
// this is a regular token: update it in-place
|
||||
auto & ai = uniquededgedatatokens[k];
|
||||
if (ai.unit >= idmap.size())
|
||||
throw std::runtime_error ("fread: broken-file heuristics failed");
|
||||
ai.updateunit (idmap); // updates itself
|
||||
if (!ai.last)
|
||||
continue;
|
||||
}
|
||||
// if last then skip over the lm and ac scores
|
||||
k += skipscoretokens;
|
||||
uniquealignments++;
|
||||
|
|
|
@ -612,7 +612,8 @@ private:
|
|||
return sweep;
|
||||
|
||||
currentsweep = sweep;
|
||||
fprintf (stderr, "lazyrandomization: re-randomizing for sweep %zu in %s mode\n", currentsweep, framemode ? "frame" : "utterance");
|
||||
if (verbosity>0)
|
||||
fprintf (stderr, "lazyrandomization: re-randomizing for sweep %zu in %s mode\n", currentsweep, framemode ? "frame" : "utterance");
|
||||
|
||||
const size_t sweepts = sweep * _totalframes; // first global frame index for this sweep
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче