diff --git a/Source/Readers/CNTKTextFormatReader/TextParser.cpp b/Source/Readers/CNTKTextFormatReader/TextParser.cpp index 009e94be023f..964c172c3b60 100644 --- a/Source/Readers/CNTKTextFormatReader/TextParser.cpp +++ b/Source/Readers/CNTKTextFormatReader/TextParser.cpp @@ -307,6 +307,12 @@ typename TextParser::SequenceBuffer TextParser::LoadSequence { size_t fileOffset = sequenceDsc.OffsetInChunk() + chunkOffsetInFile; + auto cachedSequencePos = m_fileOffsetToSequenceBuffer.find(fileOffset); + if (cachedSequencePos != m_fileOffsetToSequenceBuffer.end()) + { + return cachedSequencePos->second; + } + m_fileReader->SetFileOffset(fileOffset); size_t bytesToRead = sequenceDsc.SizeInBytes(); @@ -430,6 +436,8 @@ typename TextParser::SequenceBuffer TextParser::LoadSequence } FillSequenceMetadata(sequence, { sequenceDsc.m_key, 0 }); + + m_fileOffsetToSequenceBuffer[fileOffset] = sequence; return sequence; } diff --git a/Source/Readers/CNTKTextFormatReader/TextParser.h b/Source/Readers/CNTKTextFormatReader/TextParser.h index abf476a92922..3c4459f4ae60 100644 --- a/Source/Readers/CNTKTextFormatReader/TextParser.h +++ b/Source/Readers/CNTKTextFormatReader/TextParser.h @@ -10,6 +10,7 @@ #include "TextConfigHelper.h" #include "Index.h" #include "CorpusDescriptor.h" +#include namespace CNTK { @@ -135,6 +136,7 @@ class TextParser : public DataDeserializerBase { bool m_cacheIndex; unsigned int m_numRetries; // specifies the number of times an unsuccessful // file operation should be repeated (default value is 5). + std::unordered_map m_fileOffsetToSequenceBuffer; // Corpus descriptor. CorpusDescriptorPtr m_corpus;