diff --git a/Source/Readers/ExperimentalHTKMLFReader/HTKDataDeserializer.cpp b/Source/Readers/ExperimentalHTKMLFReader/HTKDataDeserializer.cpp index ee28cb6ed..48a0255aa 100644 --- a/Source/Readers/ExperimentalHTKMLFReader/HTKDataDeserializer.cpp +++ b/Source/Readers/ExperimentalHTKMLFReader/HTKDataDeserializer.cpp @@ -125,11 +125,6 @@ void HTKDataDeserializer::InitializeChunkDescriptions(ConfigHelper& config) currentChunk.Add(move(utterances[i])); } - // Creating a table of weak pointers to chunks, - // so that if randomizer asks the same chunk twice - // we do not need to recreated the chunk if we already uploaded in memory. - m_weakChunks.resize(m_chunks.size()); - fprintf(stderr, "HTKDataDeserializer::HTKDataDeserializer: %d utterances grouped into %d chunks, av. chunk size: %.1f utterances, %.1f frames\n", (int)utterances.size(), @@ -289,14 +284,7 @@ private: // Gets a data chunk with the specified chunk id. ChunkPtr HTKDataDeserializer::GetChunk(size_t chunkId) { - if (!m_weakChunks[chunkId].expired()) - { - return m_weakChunks[chunkId].lock(); - } - - auto chunk = make_shared(this, chunkId); - m_weakChunks[chunkId] = chunk; - return chunk; + return make_shared(this, chunkId); }; // A matrix that stores all samples of a sequence without padding (differently from ssematrix). diff --git a/Source/Readers/ExperimentalHTKMLFReader/HTKDataDeserializer.h b/Source/Readers/ExperimentalHTKMLFReader/HTKDataDeserializer.h index d79f05f27..529f62721 100644 --- a/Source/Readers/ExperimentalHTKMLFReader/HTKDataDeserializer.h +++ b/Source/Readers/ExperimentalHTKMLFReader/HTKDataDeserializer.h @@ -51,11 +51,6 @@ private: // Chunk descriptions. std::vector m_chunks; - // Weak pointers on existing chunks. - // If randomizer asks the same chunk twice we do not need to recreate - // the chunk if we already uploaded it in memory. - std::vector> m_weakChunks; - // Augmentation window. std::pair m_augmentationWindow; diff --git a/Source/Readers/ExperimentalHTKMLFReader/MLFDataDeserializer.cpp b/Source/Readers/ExperimentalHTKMLFReader/MLFDataDeserializer.cpp index 52150abea..1b22cda1e 100644 --- a/Source/Readers/ExperimentalHTKMLFReader/MLFDataDeserializer.cpp +++ b/Source/Readers/ExperimentalHTKMLFReader/MLFDataDeserializer.cpp @@ -222,8 +222,8 @@ ChunkPtr MLFDataDeserializer::GetChunk(size_t chunkId) { UNUSED(chunkId); assert(chunkId == 0); - return std::make_shared(this); -} + return make_shared(this); +}; // Sparse labels for an utterance. template diff --git a/Source/Readers/ReaderLib/Bundler.cpp b/Source/Readers/ReaderLib/Bundler.cpp index 9bd7eaaf8..81fc0a951 100644 --- a/Source/Readers/ReaderLib/Bundler.cpp +++ b/Source/Readers/ReaderLib/Bundler.cpp @@ -5,6 +5,7 @@ #define _CRT_SECURE_NO_WARNINGS #include "Bundler.h" +#include namespace Microsoft { namespace MSR { namespace CNTK { @@ -150,16 +151,19 @@ void Bundler::GetSequencesForChunk(size_t chunkId, std::vector m_innerChunks; // A mapping between exposed sequence id and inner sequence id for each deserializer. + // Indices as above. std::vector m_sequenceToSequence; DISABLE_COPY_AND_MOVE(BundlingChunk); @@ -197,6 +201,8 @@ public: SequenceDescription s; for (size_t deserializerIndex = 1; deserializerIndex < m_parent->m_deserializers.size(); ++deserializerIndex) { + std::map secondaryChunks; + for (size_t sequenceIndex = 0; sequenceIndex < sequences.size(); ++sequenceIndex) { if (chunk->m_invalid.find(sequenceIndex) != chunk->m_invalid.end()) @@ -207,7 +213,20 @@ public: size_t currentIndex = sequenceIndex * m_numberOfInputs + deserializerIndex; deserializers[deserializerIndex]->GetSequenceDescriptionByKey(sequences[sequenceIndex].m_key, s); m_sequenceToSequence[currentIndex] = s.m_id; - m_innerChunks[currentIndex] = deserializers[deserializerIndex]->GetChunk(s.m_chunkId); + + ChunkPtr secondaryChunk; + auto it = secondaryChunks.find(s.m_chunkId); + if (it == secondaryChunks.end()) + { + secondaryChunk = deserializers[deserializerIndex]->GetChunk(s.m_chunkId); + secondaryChunks.insert(make_pair(s.m_chunkId, secondaryChunk)); + } + else + { + secondaryChunk = it->second; + } + + m_innerChunks[currentIndex] = secondaryChunk; } } } diff --git a/Source/Readers/ReaderLib/Bundler.h b/Source/Readers/ReaderLib/Bundler.h index 13663c315..e238b5d84 100644 --- a/Source/Readers/ReaderLib/Bundler.h +++ b/Source/Readers/ReaderLib/Bundler.h @@ -8,7 +8,6 @@ #include "DataDeserializer.h" #include "DataDeserializerBase.h" #include "Config.h" -#include namespace Microsoft { namespace MSR { namespace CNTK {