Integrate mahilleb/MlfDataDeserializerWeakPtr into master
This commit is contained in:
Коммит
0cdec2c2c9
|
@ -125,11 +125,6 @@ void HTKDataDeserializer::InitializeChunkDescriptions(ConfigHelper& config)
|
|||
currentChunk.Add(move(utterances[i]));
|
||||
}
|
||||
|
||||
// Creating a table of weak pointers to chunks,
|
||||
// so that if randomizer asks the same chunk twice
|
||||
// we do not need to recreated the chunk if we already uploaded in memory.
|
||||
m_weakChunks.resize(m_chunks.size());
|
||||
|
||||
fprintf(stderr,
|
||||
"HTKDataDeserializer::HTKDataDeserializer: %d utterances grouped into %d chunks, av. chunk size: %.1f utterances, %.1f frames\n",
|
||||
(int)utterances.size(),
|
||||
|
@ -289,14 +284,7 @@ private:
|
|||
// Gets a data chunk with the specified chunk id.
|
||||
ChunkPtr HTKDataDeserializer::GetChunk(size_t chunkId)
|
||||
{
|
||||
if (!m_weakChunks[chunkId].expired())
|
||||
{
|
||||
return m_weakChunks[chunkId].lock();
|
||||
}
|
||||
|
||||
auto chunk = make_shared<HTKChunk>(this, chunkId);
|
||||
m_weakChunks[chunkId] = chunk;
|
||||
return chunk;
|
||||
return make_shared<HTKChunk>(this, chunkId);
|
||||
};
|
||||
|
||||
// A matrix that stores all samples of a sequence without padding (differently from ssematrix).
|
||||
|
|
|
@ -51,11 +51,6 @@ private:
|
|||
// Chunk descriptions.
|
||||
std::vector<HTKChunkDescription> m_chunks;
|
||||
|
||||
// Weak pointers on existing chunks.
|
||||
// If randomizer asks the same chunk twice we do not need to recreate
|
||||
// the chunk if we already uploaded it in memory.
|
||||
std::vector<std::weak_ptr<Chunk>> m_weakChunks;
|
||||
|
||||
// Augmentation window.
|
||||
std::pair<size_t, size_t> m_augmentationWindow;
|
||||
|
||||
|
|
|
@ -222,8 +222,8 @@ ChunkPtr MLFDataDeserializer::GetChunk(size_t chunkId)
|
|||
{
|
||||
UNUSED(chunkId);
|
||||
assert(chunkId == 0);
|
||||
return std::make_shared<MLFChunk>(this);
|
||||
}
|
||||
return make_shared<MLFChunk>(this);
|
||||
};
|
||||
|
||||
// Sparse labels for an utterance.
|
||||
template <class ElemType>
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#define _CRT_SECURE_NO_WARNINGS
|
||||
|
||||
#include "Bundler.h"
|
||||
#include <set>
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
|
@ -150,16 +151,19 @@ void Bundler::GetSequencesForChunk(size_t chunkId, std::vector<SequenceDescripti
|
|||
std::swap(sequences, result);
|
||||
}
|
||||
|
||||
// Represents a chunk that has pointers to the underlying deserialzer chunks.
|
||||
// Represents a chunk that has pointers to the underlying deserializer chunks.
|
||||
class Bundler::BundlingChunk : public Chunk
|
||||
{
|
||||
size_t m_numberOfInputs;
|
||||
Bundler* m_parent;
|
||||
size_t m_chunkId;
|
||||
|
||||
// A mapping between exposed sequence id and inner chunk for each deserialzier.
|
||||
// A mapping between exposed sequence id and inner chunk for each deserializer.
|
||||
// Index i of the vector maps to the chunk of inner sequence (i / m_numberOfInputs) of
|
||||
// deserializer (i % m_numberOfInputs).
|
||||
std::vector<ChunkPtr> m_innerChunks;
|
||||
// A mapping between exposed sequence id and inner sequence id for each deserializer.
|
||||
// Indices as above.
|
||||
std::vector<size_t> m_sequenceToSequence;
|
||||
|
||||
DISABLE_COPY_AND_MOVE(BundlingChunk);
|
||||
|
@ -197,6 +201,8 @@ public:
|
|||
SequenceDescription s;
|
||||
for (size_t deserializerIndex = 1; deserializerIndex < m_parent->m_deserializers.size(); ++deserializerIndex)
|
||||
{
|
||||
std::map<size_t, ChunkPtr> secondaryChunks;
|
||||
|
||||
for (size_t sequenceIndex = 0; sequenceIndex < sequences.size(); ++sequenceIndex)
|
||||
{
|
||||
if (chunk->m_invalid.find(sequenceIndex) != chunk->m_invalid.end())
|
||||
|
@ -207,7 +213,20 @@ public:
|
|||
size_t currentIndex = sequenceIndex * m_numberOfInputs + deserializerIndex;
|
||||
deserializers[deserializerIndex]->GetSequenceDescriptionByKey(sequences[sequenceIndex].m_key, s);
|
||||
m_sequenceToSequence[currentIndex] = s.m_id;
|
||||
m_innerChunks[currentIndex] = deserializers[deserializerIndex]->GetChunk(s.m_chunkId);
|
||||
|
||||
ChunkPtr secondaryChunk;
|
||||
auto it = secondaryChunks.find(s.m_chunkId);
|
||||
if (it == secondaryChunks.end())
|
||||
{
|
||||
secondaryChunk = deserializers[deserializerIndex]->GetChunk(s.m_chunkId);
|
||||
secondaryChunks.insert(make_pair(s.m_chunkId, secondaryChunk));
|
||||
}
|
||||
else
|
||||
{
|
||||
secondaryChunk = it->second;
|
||||
}
|
||||
|
||||
m_innerChunks[currentIndex] = secondaryChunk;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
#include "DataDeserializer.h"
|
||||
#include "DataDeserializerBase.h"
|
||||
#include "Config.h"
|
||||
#include <set>
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче