CNTK/DataReader/UCIFastReader/UCIFastReader.cpp

//
// <copyright file="UCIFastReader.cpp" company="Microsoft">
//     Copyright (c) Microsoft Corporation.  All rights reserved.
// </copyright>
//
// UCIFastReader.cpp : Defines the exported functions for the DLL application.
//

#include "stdafx.h"
#define DATAREADER_EXPORTS  // creating the exports here
#include "DataReader.h"
#include "UCIFastReader.h"
#ifdef LEAKDETECT
#include <vld.h> // leak detection
#endif
#include "fileutil.h"   // for fexists()

namespace Microsoft { namespace MSR { namespace CNTK {

template<class ElemType>
size_t UCIFastReader<ElemType>::RandomizeSweep(size_t mbStartSample)
{
    //size_t randomRangePerEpoch = (m_epochSize+m_randomizeRange-1)/m_randomizeRange;
    //return m_epoch*randomRangePerEpoch + epochSample/m_randomizeRange;
    return mbStartSample/m_randomizeRange;
}

// ReadLine - Read a line
// readSample - sample to read in global sample space
// returns - true if we successfully read a record, otherwise false
template<class ElemType>
bool UCIFastReader<ElemType>::ReadRecord(size_t /*readSample*/)
{
    return false; // not used
}

// RecordsToRead - Determine number of records to read to populate record buffers
// mbStartSample - the starting sample from which to read
// tail - we are checking for possible remainer records to read (default false)
// returns - true if we have more to read, false if we hit the end of the dataset
template<class ElemType>
size_t UCIFastReader<ElemType>::RecordsToRead(size_t mbStartSample, bool tail)
{
    assert(mbStartSample >= m_epochStartSample);
    // determine how far ahead we need to read
    bool randomize = Randomize();
    // need to read to the end of the next minibatch
    size_t epochSample = mbStartSample;
    epochSample %= m_epochSize;

    // determine number left to read for this epoch
    size_t numberToEpoch = m_epochSize - epochSample;
    // we will take either a minibatch or the number left in the epoch
    size_t numberToRead = min(numberToEpoch, m_mbSize);
    if (numberToRead == 0 && !tail)
        numberToRead = m_mbSize;

    if (randomize)
    {
        size_t randomizeSweep = RandomizeSweep(mbStartSample);
        // if first read or read takes us to another randomization range
        // we need to read at least randomization range records
        if (randomizeSweep != m_randomordering.CurrentSeed()) // the range has changed since last time
        {
            numberToRead = RoundUp(epochSample, m_randomizeRange) - epochSample;
            if (numberToRead == 0 && !tail)
                numberToRead = m_randomizeRange;
        }
    }
    return numberToRead;
}

// EnsureDataAvailable - Read enough lines so we can request a minibatch starting as requested
// mbStartSample - the starting sample we are ensureing are good
// endOfDataCheck - check if we are at the end of the dataset (no wraparound)
// returns - true if we have more to read, false if we hit the end of the dataset
template<class ElemType>
bool UCIFastReader<ElemType>::EnsureDataAvailable(size_t mbStartSample, bool endOfDataCheck)
{
    assert(mbStartSample >= m_epochStartSample);
    // determine how far ahead we need to read
    Randomize();
    // need to read to the end of the next minibatch
    size_t epochSample = mbStartSample;
    epochSample %= m_epochSize;
    bool moreToRead = true;

    size_t numberToRead = RecordsToRead(mbStartSample);

    // check to see if we have the proper records read already
    if (m_readNextSample >= mbStartSample+numberToRead && mbStartSample >= m_epochStartSample)
        return true;

    // truncate the present arrays to the location we are reading from, parser appends on these arrays
    if (m_featureData.size() > epochSample*m_featureCount) // should be this size, if not, truncate
        m_featureData.resize(epochSample*m_featureCount);
    if (m_labelType != labelNone && m_labelData.size() > epochSample)
    {
        // make sure the labelId array is also the correct size
        if (m_labelType == labelCategory)
            m_labelIdData.resize(epochSample);
        m_labelData.resize(epochSample);
    }

    int recordsRead = 0;
    do
    {
        int numRead = m_parser.Parse(numberToRead-recordsRead, &m_featureData, &m_labelData);

        recordsRead += numRead;
        if (!m_endReached)
            m_totalSamples += numRead;   // total number of records in the dataset

        // we should only get less records than requested at when we hit the end of the dataset
        if (recordsRead < numberToRead)
        {
            // update dataset variables
            size_t additionalToRead = UpdateDataVariables(mbStartSample+recordsRead);

            m_parser.SetFilePosition(0);  // make another pass of the dataset

            // if doing and end of data check, and we are at the end
            // or a partial minibatch was found exit now
            if ((endOfDataCheck && recordsRead == 0) ||
                (m_partialMinibatch && recordsRead > 0))
            {
                moreToRead = false;
                break;
            }

            // get the additional number to read
            numberToRead = recordsRead + additionalToRead;
        }
    }
    while (recordsRead < numberToRead);
    m_readNextSample += recordsRead;

    // for category labels, we need to build up a list of IDs and a mapping table
    if (m_labelType == labelCategory)
    {
        // loop through all the newly read records
        for (int numberRead=0; numberRead < recordsRead; numberRead++)
        {
            LabelType& label = m_labelData[epochSample+numberRead];
            // check to see if we have seen this label before
            auto value = m_mapLabelToId.find(label);
            LabelIdType labelId;
            if (value == m_mapLabelToId.end())
            {
                if (m_labelFileToWrite.empty())
                    RuntimeError("label found in data not specified in label mapping file: %s", label.c_str());
                // new label so add it to the mapping tables
                m_mapLabelToId[label] = m_labelIdMax;
                m_mapIdToLabel[m_labelIdMax] = label;
                labelId = m_labelIdMax++;

                // if our label dimension is lower than the current labelId then increase it
                if (m_labelDim < m_labelIdMax)
                    m_labelDim = m_labelIdMax;
            }
            else
            {
                labelId = value->second;
            }

            // now add the label id to the label data array
            m_labelIdData.push_back(labelId);
        }
    }
    // if there more to read (always is, unless we want partial minibatches
    return moreToRead;
}

// UpdateDataVariables - Update variables that depend on the dataset being completely read
template<class ElemType>
size_t UCIFastReader<ElemType>::UpdateDataVariables(size_t mbStartSample)
{
    // if we haven't been all the way through the file yet
    if (!m_endReached)
    {
        // get the size of the dataset
        assert(m_totalSamples*m_featureCount >= m_featureData.size());

        // if they want us to determine epoch size based on dataset size, do that
        if (m_epochSize == requestDataSize)
        {
            // set the epoch size to be a multiple of mbSize or randomization range
            if (m_partialMinibatch)
                m_epochSize = m_totalSamples;
            else
            {
                size_t roundUpTo = m_mbSize;
                if (m_randomizeRange != randomizeAuto && m_randomizeRange != randomizeNone)
                    roundUpTo = m_randomizeRange;
                m_epochSize = RoundUp(m_totalSamples, roundUpTo);
            }
        }

        // make sure randomization range is within the sample bounds
        if (m_randomizeRange > m_epochSize)
        {
            m_randomizeRange = m_epochSize;
            m_randomordering.resize(m_randomizeRange,m_randomizeRange);
        }

        // write the label file if we hit the end of the file
        WriteLabelFile();

        // we got to the end of the dataset
        m_endReached = true;
    }

    // update the label dimension if it is not big enough, need it here because m_labelIdMax get's updated in the processing loop (after a read)
    if (m_labelType == labelCategory && m_labelIdMax > m_labelDim)
        m_labelDim = m_labelIdMax;  // update the label dimensions if different

    bool recordsToRead = mbStartSample < m_epochStartSample + m_epochSize;    // still some to read after potential epochSize change?
    return recordsToRead?RecordsToRead(mbStartSample):0;
}

template<class ElemType>
void UCIFastReader<ElemType>::WriteLabelFile()
{
    // write out the label file if they don't have one
    if (!m_labelFileToWrite.empty())
    {
        if (m_mapIdToLabel.size() > 0)
        {
            File labelFile(m_labelFileToWrite, fileOptionsWrite | fileOptionsText);
            for (int i=0; i < m_mapIdToLabel.size(); ++i)
            {
                labelFile << m_mapIdToLabel[i] << '\n';
            }
            fprintf(stderr, "label file %ls written to disk\n", m_labelFileToWrite.c_str());
            m_labelFileToWrite.clear();
        }
        else if (!m_cachingWriter)
        {
            fprintf(stderr, "WARNING: file %ls NOT written to disk yet, will be written the first time the end of the entire dataset is found.\n", m_labelFileToWrite.c_str());
        }
    }
}

// Destroy - cleanup and remove this class
// NOTE: this destroys the object, and it can't be used past this point
template<class ElemType>
void UCIFastReader<ElemType>::Destroy()
{
    delete this;
}

// Init - Reader Initialize for multiple data sets
// config - [in] configuration parameters for the datareader
// Sample format below:
//# Parameter values for the reader
//reader=[
//  # reader to use
//  readerType=UCIFastReader
//  miniBatchMode=Partial
//  randomize=None
//  features=[
//    dim=784
//    start=1
//    file=c:\speech\mnist\mnist_test.txt
//  ]
//  labels=[
//    dim=1
//      start=0
//      file=c:\speech\mnist\mnist_test.txt
//      labelMappingFile=c:\speech\mnist\labels.txt
//      labelDim=10
//      labelType=Category
//  ]
//]
template<class ElemType>
void UCIFastReader<ElemType>::Init(const ConfigParameters& readerConfig)
{
    // See if the user wants caching
    m_cachingReader = NULL;
    m_cachingWriter = NULL;

    // initialize the cache
    InitCache(readerConfig);
    readerConfig.CopyTo(m_readerConfig);

    // if we have a cache, no need to parse the test files...
    if (m_cachingReader)
        return;

    std::vector<std::wstring> features;
    std::vector<std::wstring> labels;
    GetFileConfigNames(readerConfig, features, labels);
    if (features.size() > 0)
    {
        m_featuresName = features[0];
    }
    if (labels.size() > 0)
    {
        m_labelsName = labels[0];
    }
    ConfigParameters configFeatures = readerConfig(m_featuresName,"");
    ConfigParameters configLabels = readerConfig(m_labelsName,"");;
    if (configFeatures.size() == 0)
        RuntimeError("features file not found, required in configuration: i.e. 'features=[file=c:\\myfile.txt;start=1;dim=123]'");
    if (configLabels.size() == 0)
        fprintf(stderr, "Warning: labels are not specified.");
    else if (configFeatures("file","") != configLabels("file",""))
        RuntimeError("features and label files must be the same file, use separate readers to define single use files");

    size_t vdim = configFeatures("dim");
    string name = configFeatures.Name();
    size_t udim = configLabels("labelDim","0");

    // initialize all the variables
    m_mbStartSample = m_epoch = m_totalSamples = m_epochStartSample = 0;
    m_labelIdMax = m_labelDim = 0; 
    m_partialMinibatch = m_endReached = false;
    m_labelType = labelCategory;
    m_featureCount = vdim;
    m_readNextSample = 0;
    m_traceLevel = readerConfig("traceLevel","0");
    m_parser.SetTraceLevel(m_traceLevel);

    // set the feature count to at least one (we better have one feature...)
    assert (m_featureCount != 0);

    if (readerConfig.Exists("randomize"))
    {
        string randomizeString = readerConfig("randomize");
        if (randomizeString == "None")
        {
            m_randomizeRange = randomizeNone;
        }
        else if (randomizeString == "Auto")
        {
            m_randomizeRange = randomizeAuto;
        }
        else
        {
            m_randomizeRange = readerConfig("randomize");
        }
    }
    else
    {
        m_randomizeRange = randomizeAuto;
    }

    // determine if we partial minibatches are desired
    std::string minibatchMode(readerConfig("minibatchMode","Partial"));
    m_partialMinibatch = !_stricmp(minibatchMode.c_str(),"Partial");

    // get start and dimensions for labels and features
    size_t startLabels = configLabels("start", "0");
    size_t dimLabels = configLabels("dim", "0");

    size_t startFeatures = configFeatures("start", "0");
    size_t dimFeatures = configFeatures("dim", "0");

    // determine label type desired
    std::string labelType;
    if (configLabels.size() == 0)
        labelType = "None";
    else
        labelType = configLabels("labelType","Category");

    //convert to lower case for case insensitive comparison
    msra::strfun::tolower_ascii(labelType);
    if (labelType == "category")
    {
        m_labelType = labelCategory;
    }
    else if (labelType == "regression")
    {
        m_labelType = labelRegression;
    }
    else if (labelType == "none")
    {
        m_labelType = labelNone;
        dimLabels = 0;   // override for no labels
    }

    std::wstring file = configFeatures("file");
    if (m_traceLevel > 0)
        fprintf(stderr, "reading uci file %ls\n", file.c_str());

    m_parser.ParseInit(file.c_str(), startFeatures, dimFeatures, startLabels, dimLabels);

    // if we have labels, we need a label Mapping file, it will be a file with one label per line
    if (m_labelType != labelNone)
    {
        ConfigArray arrayLabels;
        std::wstring labelPath = configLabels("labelMappingFile");
        if (fexists(labelPath))
        {
            arrayLabels.LoadConfigFile(labelPath);
            for (int i=0; i < arrayLabels.size(); ++i)
            {
                LabelType label = arrayLabels[i];
                m_mapIdToLabel[i] = label;
                m_mapLabelToId[label] = i;
            }
            m_labelIdMax = (LabelIdType)arrayLabels.size();
        }
        else
        {
            // only do label creation if we have the allow flag, should only be done as a separate command
            // to ensure that the label file will exist for verification step in training
            bool allowLabelCreation = readerConfig("allowMapCreation","false");
            if (allowLabelCreation)
                m_labelFileToWrite = labelPath;
            else
                RuntimeError("label mapping file %ls not found, can be created with a 'createLabelMap' command/action\n", labelPath.c_str());
        }
    }

    // if we know the size of the randomization now, resize, otherwise wait until we know the epochSize in StartMinibatchLoop()
    if (Randomize() && m_randomizeRange != randomizeAuto)
        m_randomordering.resize(m_randomizeRange, m_randomizeRange);

    // if the value they passed in as udim is not big enough, add something on
    if (udim < m_labelIdMax)
        udim = m_labelIdMax;
    m_labelDim = (LabelIdType)udim;

    mOneLinePerFile = false;
    mOneLinePerFile = readerConfig("onelineperfile", "false");

}

// InitCache - Initialize the caching reader if cache files exist, otherwise the writer
// readerConfig - reader configuration
template<class ElemType>
void UCIFastReader<ElemType>::InitCache(const ConfigParameters& readerConfig)
{
    // check for a writer tag first (lets us know we are caching)
    if (!readerConfig.Exists("writerType"))
        return;

    // first try to open the binary cache
    bool found = false;
    try
    {
        // TODO: need to go down to all levels, maybe search for sectionType
        ConfigArray filesList(',');
        vector<std::wstring> names;
        if (readerConfig.Exists("wfile"))
        {
            filesList.push_back(readerConfig("wfile"));
            if (fexists(readerConfig("wfile")))
                found = true;
        }
        FindConfigNames(readerConfig, "wfile", names);
        for (const auto & name : names)
        {
            ConfigParameters config = readerConfig(name);
            filesList.push_back(config("wfile"));
            if (fexists(config("wfile")))
                found = true;
        }

        // if we have a file already, we are going to read the cached files
        if (found)
        {
            ConfigParameters config;
            readerConfig.CopyTo(config);
            // mmodify the config so the reader types look correct
            config["readerType"] = config("writerType");
            config["file"] = filesList;
            m_cachingReader = new DataReader<ElemType>(config);
        }
        else
        {
            m_cachingWriter = new DataWriter<ElemType>(readerConfig);

            // now get the section names for map and category types
            std::map<std::wstring, SectionType, nocase_compare> sections;
            m_cachingWriter->GetSections(sections);
            for (const auto & pair : sections)
            {
                if (pair.second == sectionTypeCategoryLabel)
                {
                    m_labelsCategoryName = pair.first;
                }
                else if (pair.second == sectionTypeLabelMapping)
                {
                    m_labelsMapName = pair.first;
                }
            }
        }
    }
    catch (runtime_error err)
    {
        fprintf(stderr,"Error attemping to create Binary%s\n%s\n",found?"Reader":"Writer",err.what());
        delete m_cachingReader;
        m_cachingReader = NULL;
        delete m_cachingWriter;
        m_cachingWriter = NULL;
    }
    catch (...)
    {
        // if there is any error, just get rid of the object
        fprintf(stderr,"Error attemping to create Binary%s\n",found?"Reader":"Writer");
        delete m_cachingReader;
        m_cachingReader = NULL;
        delete m_cachingWriter;
        m_cachingWriter = NULL;
    }
}

// destructor - virtual so it gets called properly 
template<class ElemType>
UCIFastReader<ElemType>::~UCIFastReader()
{
    ReleaseMemory();
    delete m_cachingReader;
    delete m_cachingWriter;
}

// ReleaseMemory - release the memory footprint of UCIFastReader
// used when the caching reader is taking over
template<class ElemType>
void UCIFastReader<ElemType>::ReleaseMemory()
{
    if (m_featuresBuffer!=NULL)
        delete[] m_featuresBuffer;
    m_featuresBuffer=NULL;
    if (m_labelsBuffer!=NULL)
        delete[] m_labelsBuffer;
    m_labelsBuffer=NULL;
    if (m_labelsIdBuffer!=NULL)
        delete[] m_labelsIdBuffer;
    m_labelsIdBuffer=NULL;
    m_featureData.clear();
    m_labelIdData.clear();
    m_labelData.clear();
}

//SetupEpoch - Setup the proper position in the file, and other variable settings to start a particular epoch
template<class ElemType>
void UCIFastReader<ElemType>::SetupEpoch()
{
    // if we are starting fresh (epoch zero and no data read), init everything
    // however if we are using cachingWriter, we need to know record count, so do that first
    if (m_epoch == 0 && m_totalSamples == 0 && m_cachingWriter != NULL)
    {
        m_readNextSample = m_epochStartSample = m_mbStartSample = 0;
        m_parser.SetFilePosition(0);
    }
    else  // otherwise, position the read to start at the right location
    {
        // don't know the total number of samples yet, so count them
        if (m_totalSamples == 0)
        {
            if (m_traceLevel > 0)
                fprintf(stderr, "starting at epoch %lu counting lines to determine record count\n", (unsigned long)m_epoch);
            m_parser.SetParseMode(ParseLineCount);
            m_totalSamples = m_parser.Parse(size_t(-1), NULL, NULL);   
            m_parser.SetParseMode(ParseNormal);
            m_parser.SetFilePosition(0);
            m_mbStartSample = 0;
            UpdateDataVariables(0); // update all the variables since we read to the end...
            if (m_traceLevel > 0)
                fprintf(stderr, "\n %lu records found\n", (unsigned long)m_totalSamples);
        }

        // make sure we are in the correct location for mid-dataset epochs
        size_t mbStartSample = m_epoch * m_epochSize;

        size_t fileRecord = m_totalSamples?mbStartSample % m_totalSamples:0;
        fprintf(stderr, "starting epoch %lu at record count %lu, and file position %lu\n", (unsigned long)m_epoch, (unsigned long)mbStartSample, (unsigned long)fileRecord);
        size_t currentFileRecord = m_mbStartSample % m_totalSamples;

        // reset the next read sample
        m_readNextSample = mbStartSample;
        if (currentFileRecord == fileRecord)
        {
            fprintf(stderr, "already there from last epoch\n");

            // we have a slight delima here, if we haven't determined the end of the file yet
            // and the user told us to find how many records are in the file, we can't distinguish "almost done"
            // with a file (a character away) and the middle of the file. So read ahead a record to see if it's there.
            bool endReached = m_endReached;
            if (!endReached)
            {
                if (!m_parser.HasMoreData())
                {
                    endReached = true;
                    UpdateDataVariables(mbStartSample);
                    assert(m_endReached);
                }
            }
            // move the read pointer to the end since we have everything already in memory.
            if (endReached && m_epochStartSample % m_totalSamples == fileRecord
                && m_featureData.size() >= m_epochSize*m_featureCount)
            {
                m_readNextSample = mbStartSample + m_epochSize;    
                // write the label file here to make sure we do it somewhere. We know the entire dataset has been read at this point
                WriteLabelFile();   
            }

        }
        // not the right position, need to get there
        else
        {   
            // if we are already past the desired record, start at the beginning again
            if (currentFileRecord > fileRecord)
            {
                m_parser.SetFilePosition(0); 
                currentFileRecord = 0;
            }
            fprintf(stderr, "reading from record %lu to %lu to be positioned properly for epoch\n", (unsigned long)currentFileRecord, (unsigned long)fileRecord);
            m_parser.SetParseMode(ParseLineCount);
            m_parser.Parse(fileRecord-currentFileRecord, NULL, NULL);
            m_parser.SetParseMode(ParseNormal);
            if (!m_labelFileToWrite.empty())
            {
                fprintf(stderr, "WARNING: file %ls NOT written to disk, label file will only be written when starting epochs at the beginning of the dataset\n", m_labelFileToWrite.c_str());
                m_labelFileToWrite.clear();
                RuntimeError("LabelMappingFile not provided in config, must be provided if not starting from epoch Zero (0)");
            }
        }
        m_epochStartSample = m_mbStartSample = mbStartSample;
    }
}

// utility function to round an integer up to a multiple of size
size_t RoundUp(size_t value, size_t size) 
{
    return ((value + size -1)/size)*size;
}

template<class ElemType>
void UCIFastReader<ElemType>::SetNumParallelSequences(const size_t sz) 
{
    mBlgSize = sz; 
    if (mOneLinePerFile)
        m_mbSize = mBlgSize;
};

//StartMinibatchLoop - Startup a minibatch loop 
// mbSize - [in] size of the minibatch (number of Samples, etc.)
// epoch - [in] epoch number for this loop, if > 0 the requestedEpochSamples must be specified (unless epoch zero was completed this run)
// requestedEpochSamples - [in] number of samples to randomize, defaults to requestDataSize which uses the number of samples there are in the dataset
//   this value must be a multiple of mbSize, if it is not, it will be rounded up to one.
template<class ElemType>
void UCIFastReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples)
{
    if (mOneLinePerFile)
        mbSize = mBlgSize; /// each file has only one observation, therefore the number of data to read is the number of files

    // if we aren't currently caching, see if we can use a cache
    if (!m_cachingReader && !m_cachingWriter)
    {
        InitCache(m_readerConfig);
        if (m_cachingReader)
            ReleaseMemory();    // free the memory used by the UCIFastReader
    }

    // if we are reading from the cache, do so now and return
    if (m_cachingReader)
    {
        m_cachingReader->StartMinibatchLoop(mbSize, epoch, requestedEpochSamples);
        return;
    } 

    if (m_featuresBuffer==NULL || mbSize > m_mbSize)
    {
        // if we are reallocating bigger, release the original
        if (m_featuresBuffer != NULL)
            delete[] m_featuresBuffer;
        m_featuresBuffer = new ElemType[mbSize*m_featureCount];
        memset(m_featuresBuffer,0,sizeof(ElemType)*mbSize*m_featureCount);
    }

    if (m_labelsBuffer == NULL || mbSize > m_mbSize)
    {
        // if we are reallocating bigger, release the original
        if (m_labelsBuffer != NULL)
            delete[] m_labelsBuffer;
        if (m_labelType == labelCategory)
        {
            m_labelsBuffer = new ElemType[m_labelDim*mbSize];
            memset(m_labelsBuffer,0,sizeof(ElemType)*m_labelDim*mbSize);
            m_labelsIdBuffer = new LabelIdType[mbSize];
            memset(m_labelsIdBuffer,0,sizeof(LabelIdType)*mbSize);
        }
        else if (m_labelType != labelNone)
        {
            m_labelsBuffer = new ElemType[mbSize];
            memset(m_labelsBuffer,0,sizeof(ElemType)*mbSize);
            m_labelsIdBuffer = NULL;
        }
    }

    m_mbSize = mbSize;
    if (requestedEpochSamples == requestDataSize)
    {
        if (!m_endReached)
        {
            m_epochSize = requestDataSize;
        }
    }
    else
    {
        m_epochSize = requestedEpochSamples;
        if (!m_partialMinibatch)
            m_epochSize = RoundUp(requestedEpochSamples, mbSize);
        if (m_epochSize != requestedEpochSamples)
            fprintf(stderr, "epochSize rounded up to %d to fit an integral number of minibatches\n", (int)m_epochSize);
    }
    
    // set the randomization range for randomizationAuto
    // or if it's invalid less than the minibatch size, we need to make it at least minibatch size
    if (m_randomizeRange != randomizeNone)
    {
        if (m_epochSize != requestDataSize && m_randomizeRange == randomizeAuto)
        {
            m_randomizeRange = m_epochSize;
        }
        m_randomizeRange = max(m_randomizeRange, m_mbSize);
        if (m_randomizeRange != randomizeAuto)
        {
            if ((m_epochSize != requestDataSize && m_epochSize % m_randomizeRange != 0) || (m_randomizeRange % m_mbSize != 0))
                RuntimeError("randomizeRange must be an even multiple of mbSize and an integral factor of epochSize");
            m_randomordering.resize(m_randomizeRange, m_randomizeRange);
        }
    }

    // we use epochSize, which might not be set yet, so use a default value for allocations if not yet set
    size_t epochSize = m_epochSize == requestDataSize?1000:m_epochSize;
    m_epoch = epoch;
    m_mbStartSample = epoch*m_epochSize;

    // allocate room for the data
    m_featureData.reserve(m_featureCount*epochSize);
    if (m_labelType == labelCategory)
        m_labelIdData.reserve(epochSize);
    else if (m_labelType != labelNone)
        m_labelData.reserve(epochSize);

    SetupEpoch();
}

// function to store the LabelType in an ElemType
// required for string labels, which can't be stored in ElemType arrays
template<class ElemType>
void UCIFastReader<ElemType>::StoreLabel(ElemType& labelStore, const LabelType& labelValue)
{
    labelStore = (ElemType)m_mapLabelToId[labelValue];
}

// GetMinibatch - Get the next minibatch (features and labels)
// matrices - [in] a map with named matrix types (i.e. 'features', 'labels') mapped to the corresponing matrix, 
//             [out] each matrix resized if necessary containing data. 
// returns - true if there are more minibatches, false if no more minibatchs remain
template<class ElemType>
bool UCIFastReader<ElemType>::GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices)
{
    if (m_cachingReader)
    {
        return m_cachingReader->GetMinibatch(matrices);
    }
    // get the features array
    if (matrices.find(m_featuresName) == matrices.end())
        RuntimeError("Features matrix not found in config file, there should be a section '%ls=[...]' in the configuration file.", m_featuresName.c_str());
        
    Matrix<ElemType>& features = *matrices[m_featuresName];

    // get out if they didn't call StartMinibatchLoop() first
    if (m_mbSize == 0)
        return false;

    // check to see if we have changed epochs, if so we are done with this one.
    if (m_mbStartSample / m_epochSize != m_epoch)
        return false;

    bool randomize = Randomize();
    bool moreData = EnsureDataAvailable(m_mbStartSample);

    // figure which sweep of the randomization we are on
    size_t epochSample = m_mbStartSample % m_epochSize; // where the minibatch starts in this epoch
    //size_t samplesExtra = m_totalSamples % m_epochSize; // extra samples at the end of an epoch
    //size_t epochsDS = (m_totalSamples+m_epochSize-1)/m_epochSize; // how many epochs per dataset
    size_t randomizeSet = randomize?RandomizeSweep(m_mbStartSample):0;
    const auto & tmap = m_randomordering(randomizeSet);
    size_t epochEnd = m_epochSize;
    size_t recordStart = m_totalSamples?m_mbStartSample%m_totalSamples:m_mbStartSample;

    // actual size is either what requested, or total number of samples read so far
    size_t actualmbsize = min(m_totalSamples, m_mbSize);   // it may still return less if at end of sweep

    // check for an odd sized last minibatch
    if (epochSample + actualmbsize > epochEnd)
    {   
        actualmbsize = epochEnd - epochSample;
    }

    // hit the end of the dataset, we should only get here in "one=pass mode"
    if (!moreData)
    {
        // make sure we take into account hitting the end of the dataset (not wrapping around)
        actualmbsize = min(m_totalSamples-recordStart,actualmbsize);
    }

    if (m_labelType == labelCategory)
    {
        memset(m_labelsBuffer,0,sizeof(ElemType)*m_labelDim*actualmbsize);
        memset(m_labelsIdBuffer,0,sizeof(LabelIdType)*actualmbsize);
    }
    else if (m_labelType != labelNone)
    {
        memset(m_labelsBuffer,0,sizeof(ElemType)*1*actualmbsize);        
    }

    if (actualmbsize > 0)
    {
        // loop through and copy data to matrix
        int j = 0; // vector of vectors of feature data
        // determine randomization base index
        size_t randBase = 0;    // (keep compiler happy)
        if (randomize)
            randBase = epochSample - epochSample%m_randomizeRange;

        //loop through all the samples
        for (size_t jSample = m_mbStartSample; j < actualmbsize; ++j, ++jSample)
        {
            // pick the right sample with randomization if desired
            size_t jRand = randomize?(randBase + tmap[jSample%m_randomizeRange]):jSample;
            jRand %= m_epochSize;
         
            // vector of feature data goes into matrix column
            memcpy(&m_featuresBuffer[j*m_featureCount],&m_featureData[jRand*m_featureCount],sizeof(ElemType)*m_featureCount);

            if (m_labelType == labelCategory)
            {            
                m_labelsBuffer[j*m_labelDim + m_labelIdData[jRand]] = (ElemType)1;    
                m_labelsIdBuffer[j] = m_labelIdData[jRand];
            }
            else if (m_labelType != labelNone)
            {
                if (m_labelType == labelRegression)
                {
                    m_labelsBuffer[j] = (ElemType)atof(m_labelData[jRand].c_str());
                }
                else
                {
                    StoreLabel(m_labelsBuffer[j],m_labelData[jRand]);            
                }
            }
        }
    }

    // create the respective MBLayout
    // Every sample is returned as a sequence of 1 frame.
    m_pMBLayout->Init(actualmbsize, 1, false/*means it is not sequential*/);

    // if we are writing out to the caching writer, do it now
    if (m_cachingWriter)
    {
        map<std::wstring, void*, nocase_compare> writeBuffer;
        writeBuffer[m_featuresName] = m_featuresBuffer;
        if (m_labelType == labelCategory)
        {
            writeBuffer[m_labelsName] = m_labelsIdBuffer;
            if (!m_labelsCategoryName.empty())
                writeBuffer[m_labelsCategoryName] = m_labelsBuffer;
        }
        else if (m_labelType != labelNone)
        {
            writeBuffer[m_labelsName] = m_labelsBuffer;
        }

        // write out the data, on a second pass compute statistics as needed
        bool moreToWrite = m_cachingWriter->SaveData(m_mbStartSample, writeBuffer, actualmbsize, m_totalSamples, 0);

        // done writing
        if (!moreToWrite)
        {
            // write out the mapping table as necessary
            if (m_labelType == labelCategory && !m_labelsMapName.empty())
            {
                m_cachingWriter->SaveMapping(m_labelsMapName, m_mapIdToLabel);
            }

            WriteLabelFile();

            // now close the cache writer
            delete m_cachingWriter;
            m_cachingWriter = NULL;
        }
    }

    // advance to the next minibatch
    m_mbStartSample += actualmbsize;

    // if they don't want partial minibatches, skip data transfer and return
    if (actualmbsize < m_mbSize && !m_partialMinibatch
        || actualmbsize == 0) // no records found (end of minibatch)
    {
        return false;
    }

    // now transfer to the GPU as needed
    features.SetValue(m_featureCount, actualmbsize, features.GetDeviceId(), m_featuresBuffer, matrixFlagNormal);
    if (m_labelType == labelCategory)
    {
        auto labelEntry = matrices.find(m_labelsName);
        if (labelEntry != matrices.end())
        {
            Matrix<ElemType>* labels = labelEntry->second;
            if (labels != nullptr)
                labels->SetValue(m_labelDim, actualmbsize, labels->GetDeviceId(), m_labelsBuffer,matrixFlagNormal);
        }
    }
    else if (m_labelType != labelNone)
    {
        auto labelEntry = matrices.find(m_labelsName);
        if (labelEntry != matrices.end())
        {
            Matrix<ElemType>* labels = labelEntry->second;
            if (labels != nullptr)
                labels->SetValue(1, actualmbsize, labels->GetDeviceId(), m_labelsBuffer,matrixFlagNormal);
        }
    }
    // we read some records, so process them
    return true;
}

// GetLabelMapping - Gets the label mapping from integer index to label type 
// returns - a map from numeric datatype to native label type 
template<class ElemType>
const std::map<typename IDataReader<ElemType>::LabelIdType, typename IDataReader<ElemType>::LabelType>& UCIFastReader<ElemType>::GetLabelMapping(const std::wstring& sectionName)
{
    if (m_cachingReader)
    {
        return m_cachingReader->GetLabelMapping(sectionName);
    }
    return m_mapIdToLabel;
}

// SetLabelMapping - Sets the label mapping from integer index to label 
// labelMapping - mapping table from label values to IDs (must be 0-n)
// note: for tasks with labels, the mapping table must be the same between a training run and a testing run 
template<class ElemType>
void UCIFastReader<ElemType>::SetLabelMapping(const std::wstring& /*sectionName*/, const std::map<typename IDataReader<ElemType>::LabelIdType, LabelType>& labelMapping)
{
    if (m_cachingReader)
    {
        RuntimeError("Cannot set mapping table when the caching reader is being used");
    }
    m_mapIdToLabel = labelMapping;
    m_mapLabelToId.clear();
    for (std::pair<unsigned, LabelType> var : labelMapping)
    {
        m_mapLabelToId[var.second] = var.first;
    }
}

// GetData - Gets metadata from the specified section (into CPU memory) 
// sectionName - section name to retrieve data from
// numRecords - number of records to read
// data - pointer to data buffer, if NULL, dataBufferSize will be set to size of required buffer to accomidate request
// dataBufferSize - [in] size of the databuffer in bytes
//                  [out] size of buffer filled with data
// recordStart - record to start reading from, defaults to zero (start of data)
// returns: true if data remains to be read, false if the end of data was reached
template<class ElemType>
bool UCIFastReader<ElemType>::GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart)
{
    if (m_cachingReader)
    {
        return m_cachingReader->GetData(sectionName, numRecords, data, dataBufferSize, recordStart);
    }
    RuntimeError("GetData not supported in UCIFastReader");
}

template<class ElemType>
bool UCIFastReader<ElemType>::DataEnd(EndDataType endDataType)
{
    if (m_cachingReader)
    {
        return m_cachingReader->DataEnd(endDataType);
    }

    bool ret = false;
    switch (endDataType)
    {
    case endDataNull:
        assert(false);
        break;
    case endDataEpoch:
        ret = (m_mbStartSample / m_epochSize != m_epoch);
        break;
    case endDataSet:
        ret = EnsureDataAvailable(m_mbStartSample, true);
        break;
    case endDataSentence:  // for fast reader each minibatch is considered a "sentence", so always true
        ret = true;
        break;
    }
    return ret;
}

// instantiate all the combinations we expect to be used
template class UCIFastReader<double>; 
template class UCIFastReader<float>;
}}}