Added some performance counters to HTKMLFReader
This commit is contained in:
Родитель
e70bb0fe40
Коммит
e4f6a266ab
|
@ -25,6 +25,7 @@
|
|||
#include "DataReader.h"
|
||||
#include "commandArgUtil.h"
|
||||
#include "HTKMLFReader.h"
|
||||
#include "TimerUtility.h"
|
||||
#ifdef LEAKDETECT
|
||||
#include <vld.h> // for memory leak detection
|
||||
#endif
|
||||
|
@ -104,7 +105,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
void HTKMLFReader<ElemType>::PrepareForTrainingOrTesting(const ConfigParameters& readerConfig)
|
||||
{
|
||||
vector<wstring> scriptpaths;
|
||||
vector<wstring> RootPathInScripts;
|
||||
vector<wstring> RootPathInScripts;
|
||||
vector<wstring> mlfpaths;
|
||||
vector<vector<wstring>>mlfpathsmulti;
|
||||
size_t firstfilesonly = SIZE_MAX; // set to a lower value for testing
|
||||
|
@ -180,8 +181,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
m_featureNameToIdMap[featureNames[i]]= iFeat;
|
||||
scriptpaths.push_back(thisFeature("scpFile"));
|
||||
RootPathInScripts.push_back(thisFeature("PrefixPathInSCP", ""));
|
||||
m_featureNameToDimMap[featureNames[i]] = m_featDims[i];
|
||||
RootPathInScripts.push_back(thisFeature("PrefixPathInSCP", ""));
|
||||
m_featureNameToDimMap[featureNames[i]] = m_featDims[i];
|
||||
|
||||
m_featuresBufferMultiIO.push_back(nullptr);
|
||||
m_featuresBufferAllocatedMultiIO.push_back(0);
|
||||
|
@ -323,7 +324,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
LogicError("nbrUttsInEachRecurrentIter cannot be more than 1 in frame mode reading.");
|
||||
}
|
||||
|
||||
int verbosity = readerConfig("verbosity","2");
|
||||
m_verbosity = readerConfig("verbosity","2");
|
||||
|
||||
// determine if we partial minibatches are desired
|
||||
std::string minibatchMode(readerConfig("minibatchMode","Partial"));
|
||||
|
@ -361,50 +362,50 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (n!=numFiles)
|
||||
RuntimeError(msra::strfun::strprintf ("number of files in each scriptfile inconsistent (%d vs. %d)", numFiles,n));
|
||||
|
||||
// post processing file list :
|
||||
// if users specified PrefixPath, add the prefix to each of path in filelist
|
||||
// else do the dotdotdot expansion if necessary
|
||||
wstring rootpath = RootPathInScripts[i];
|
||||
if (!rootpath.empty()) // use has specified a path prefix for this feature
|
||||
{
|
||||
// first make slash consistent (sorry for linux users:this is not necessary for you)
|
||||
std::replace(rootpath.begin(), rootpath.end(), L'\\', L'/');
|
||||
// second, remove trailling slash if there is any
|
||||
std::wregex trailer(L"/+$");
|
||||
rootpath=std::regex_replace(rootpath, trailer, wstring(L""));
|
||||
// third, join the rootpath with each entry in filelist
|
||||
if (!rootpath.empty())
|
||||
{
|
||||
for (wstring & path : filelist)
|
||||
{
|
||||
#ifdef WIN32 // sorry for windows users, we have to pay some cost here
|
||||
std::replace(path.begin(), path.end(), L'\\', L'/');
|
||||
// post processing file list :
|
||||
// if users specified PrefixPath, add the prefix to each of path in filelist
|
||||
// else do the dotdotdot expansion if necessary
|
||||
wstring rootpath = RootPathInScripts[i];
|
||||
if (!rootpath.empty()) // use has specified a path prefix for this feature
|
||||
{
|
||||
// first make slash consistent (sorry for linux users:this is not necessary for you)
|
||||
std::replace(rootpath.begin(), rootpath.end(), L'\\', L'/');
|
||||
// second, remove trailling slash if there is any
|
||||
std::wregex trailer(L"/+$");
|
||||
rootpath=std::regex_replace(rootpath, trailer, wstring(L""));
|
||||
// third, join the rootpath with each entry in filelist
|
||||
if (!rootpath.empty())
|
||||
{
|
||||
for (wstring & path : filelist)
|
||||
{
|
||||
#ifdef WIN32 // sorry for windows users, we have to pay some cost here
|
||||
std::replace(path.begin(), path.end(), L'\\', L'/');
|
||||
#endif
|
||||
path = rootpath + L"/" + path;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
do "..." expansion if SCP uses relative path names
|
||||
"..." in the SCP means full path is the same as the SCP file
|
||||
for example, if scp file is "//aaa/bbb/ccc/ddd.scp"
|
||||
and contains entry like
|
||||
.../file1.feat
|
||||
.../file2.feat
|
||||
etc.
|
||||
the features will be read from
|
||||
//aaa/bbb/ccc/file1.feat
|
||||
//aaa/bbb/ccc/file2.feat
|
||||
etc.
|
||||
This works well if you store the scp file with the features but
|
||||
do not want different scp files everytime you move or create new features
|
||||
*/
|
||||
wstring scpdircached;
|
||||
for (auto & entry : filelist)
|
||||
ExpandDotDotDot(entry, scriptpath, scpdircached);
|
||||
}
|
||||
path = rootpath + L"/" + path;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
do "..." expansion if SCP uses relative path names
|
||||
"..." in the SCP means full path is the same as the SCP file
|
||||
for example, if scp file is "//aaa/bbb/ccc/ddd.scp"
|
||||
and contains entry like
|
||||
.../file1.feat
|
||||
.../file2.feat
|
||||
etc.
|
||||
the features will be read from
|
||||
//aaa/bbb/ccc/file1.feat
|
||||
//aaa/bbb/ccc/file2.feat
|
||||
etc.
|
||||
This works well if you store the scp file with the features but
|
||||
do not want different scp files everytime you move or create new features
|
||||
*/
|
||||
wstring scpdircached;
|
||||
for (auto & entry : filelist)
|
||||
ExpandDotDotDot(entry, scriptpath, scpdircached);
|
||||
}
|
||||
|
||||
|
||||
infilesmulti.push_back(filelist);
|
||||
|
@ -467,7 +468,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// now get the frame source. This has better randomization and doesn't create temp files
|
||||
m_frameSource.reset(new msra::dbn::minibatchutterancesourcemulti(infilesmulti, labelsmulti, m_featDims, m_labelDims, numContextLeft, numContextRight, randomize, *m_lattices, m_latticeMap, m_frameMode));
|
||||
m_frameSource->setverbosity(verbosity);
|
||||
m_frameSource->setverbosity(m_verbosity);
|
||||
}
|
||||
else if (!_stricmp(readMethod.c_str(),"rollingWindow"))
|
||||
{
|
||||
|
@ -540,7 +541,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
int addEnergy = 0;
|
||||
|
||||
m_frameSource.reset(new msra::dbn::minibatchframesourcemulti(infilesmulti, labelsmulti, m_featDims, m_labelDims, numContextLeft, numContextRight, randomize, pagePaths, mayhavenoframe, addEnergy));
|
||||
m_frameSource->setverbosity(verbosity);
|
||||
m_frameSource->setverbosity(m_verbosity);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -909,17 +910,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// on first minibatch, make sure we can supply data for requested nodes
|
||||
std::map<std::wstring,size_t>::iterator iter;
|
||||
if (m_checkDictionaryKeys)
|
||||
if (m_checkDictionaryKeys)
|
||||
{
|
||||
for (auto iter=matrices.begin();iter!=matrices.end();iter++)
|
||||
{
|
||||
if (m_nameToTypeMap.find(iter->first)==m_nameToTypeMap.end())
|
||||
RuntimeError("minibatch requested for input node %ls not found in reader - cannot generate input\n", iter->first.c_str());
|
||||
|
||||
}
|
||||
m_checkDictionaryKeys=false;
|
||||
}
|
||||
|
||||
Timer aggregateTimer;
|
||||
if (m_verbosity > 2)
|
||||
aggregateTimer.Start();
|
||||
|
||||
do
|
||||
{
|
||||
if (!m_truncated) // frame mode or whole utterances
|
||||
|
@ -1291,6 +1295,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
while(skip); // keep going if we didn't get the right size minibatch
|
||||
|
||||
if (m_verbosity > 2)
|
||||
{
|
||||
aggregateTimer.Stop();
|
||||
double totalMBReadTime = aggregateTimer.ElapsedSeconds();
|
||||
fprintf(stderr, "Total Minibatch read time = %.8g\n", totalMBReadTime);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1487,6 +1498,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_numFramesToProcess[i] = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t numOfFea = m_featuresBufferMultiIO.size();
|
||||
size_t numOfLabel = m_labelsBufferMultiIO.size();
|
||||
|
||||
|
@ -1614,7 +1626,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
m_processedFrame[i] = 0;
|
||||
|
||||
Timer mbIterAdvancementTimer;
|
||||
if (m_verbosity > 2)
|
||||
mbIterAdvancementTimer.Start();
|
||||
|
||||
(*m_mbiter)++;
|
||||
|
||||
if (m_verbosity > 2)
|
||||
{
|
||||
mbIterAdvancementTimer.Stop();
|
||||
double advancementTime = mbIterAdvancementTimer.ElapsedSeconds();
|
||||
fprintf(stderr, "Time to advance mbiter = %.8g\n", advancementTime);
|
||||
}
|
||||
|
||||
if (!(*m_mbiter))
|
||||
m_noData = true;
|
||||
|
||||
|
|
|
@ -88,7 +88,9 @@ private:
|
|||
std::vector<size_t> m_labelDims;
|
||||
|
||||
std::vector<std::vector<std::vector<ElemType>>>m_labelToTargetMapMultiIO;
|
||||
|
||||
|
||||
int m_verbosity;
|
||||
|
||||
void PrepareForTrainingOrTesting(const ConfigParameters& config);
|
||||
void PrepareForWriting(const ConfigParameters& config);
|
||||
|
||||
|
|
|
@ -118,6 +118,9 @@
|
|||
<ClInclude Include="utterancesourcemulti.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\Common\TimerUtility.cpp">
|
||||
<PrecompiledHeader>NotUsing</PrecompiledHeader>
|
||||
</ClCompile>
|
||||
<ClCompile Include="DataReader.cpp" />
|
||||
<ClCompile Include="DataWriter.cpp" />
|
||||
<ClCompile Include="dllmain.cpp">
|
||||
|
@ -144,4 +147,4 @@
|
|||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
|
|
@ -14,6 +14,9 @@
|
|||
<Filter>Duplicates to remove</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="DataReader.cpp" />
|
||||
<ClCompile Include="..\..\Common\TimerUtility.cpp">
|
||||
<Filter>Common</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="biggrowablevectors.h" />
|
||||
|
|
|
@ -991,8 +991,8 @@ private:
|
|||
if (chunkdata.isinram())
|
||||
{
|
||||
if (verbosity)
|
||||
fprintf (stderr, "releaserandomizedchunk: paging out randomized chunk %d (frame range [%d..%d]), %d resident in RAM\n",
|
||||
(int)k, (int)randomizedchunks[m][k].globalts, (int)(randomizedchunks[m][k].globalte()-1), (int)(chunksinram-1));
|
||||
fprintf (stderr, "releaserandomizedchunk: paging out randomized chunk %d (frame range [%d..%d]), %d resident in RAM\n",
|
||||
(int)k, (int)randomizedchunks[m][k].globalts, (int)(randomizedchunks[m][k].globalte()-1), (int)(chunksinram-1));
|
||||
chunkdata.releasedata();
|
||||
numreleased++;
|
||||
}
|
||||
|
@ -1034,7 +1034,7 @@ private:
|
|||
auto & chunk = randomizedchunks[m][chunkindex];
|
||||
auto & chunkdata = chunk.getchunkdata();
|
||||
if (verbosity)
|
||||
fprintf (stderr, "feature set %d: requirerandomizedchunk: paging in randomized chunk %d (frame range [%d..%d]), %d resident in RAM\n", m, (int)chunkindex, (int)chunk.globalts, (int)(chunk.globalte()-1), (int)(chunksinram+1));
|
||||
fprintf (stderr, "feature set %d: requirerandomizedchunk: paging in randomized chunk %d (frame range [%d..%d]), %d resident in RAM\n", m, (int)chunkindex, (int)chunk.globalts, (int)(chunk.globalte()-1), (int)(chunksinram+1));
|
||||
msra::util::attempt (5, [&]() // (reading from network)
|
||||
{
|
||||
chunkdata.requiredata (featkind[m], featdim[m], sampperiod[m], this->lattices, verbosity);
|
||||
|
@ -1282,8 +1282,8 @@ public:
|
|||
const size_t windowbegin = randomizedchunks[0][firstchunk].windowbegin;
|
||||
const size_t windowend = randomizedchunks[0][lastchunk].windowend;
|
||||
if (verbosity > 0)
|
||||
fprintf (stderr, "getbatch: getting randomized frames [%d..%d] (%d frames out of %d requested) in sweep %d; chunks [%d..%d] -> chunk window [%d..%d)\n",
|
||||
(int)globalts, (int)globalte, (int)mbframes, (int)framesrequested, (int)sweep, (int)firstchunk, (int)lastchunk, (int)windowbegin, (int)windowend);
|
||||
fprintf (stderr, "getbatch: getting randomized frames [%d..%d] (%d frames out of %d requested) in sweep %d; chunks [%d..%d] -> chunk window [%d..%d)\n",
|
||||
(int)globalts, (int)globalte, (int)mbframes, (int)framesrequested, (int)sweep, (int)firstchunk, (int)lastchunk, (int)windowbegin, (int)windowend);
|
||||
// release all data outside, and page in all data inside
|
||||
for (size_t k = 0; k < windowbegin; k++)
|
||||
releaserandomizedchunk (k);
|
||||
|
|
Загрузка…
Ссылка в новой задаче