Add new image reader under new reader architecture

2016-01-25 16:49:09 +01:00 · 2016-01-25 16:49:09 +01:00 · 52d6444920
--- a/CNTK.sln
+++ b/CNTK.sln
@ -360,6 +360,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ReaderTests", "Tests\UnitTe
 	ProjectSection(ProjectDependencies) = postProject
 		{33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33D2FD22-DEF2-4507-A58A-368F641AEBE5}
 		{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
+		{F0A9637C-20DA-42F0-83D4-23B4704DE602} = {F0A9637C-20DA-42F0-83D4-23B4704DE602}
 		{E6646FFE-3588-4276-8A15-8D65C22711C1} = {E6646FFE-3588-4276-8A15-8D65C22711C1}
 	EndProjectSection
 EndProject
@ -757,6 +758,17 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "03_ConvBatchNorm", "03_Conv
 		Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\testcases.yml = Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\testcases.yml
 	EndProjectSection
 EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NewImageReader", "Source\Readers\NewImageReader\NewImageReader.vcxproj", "{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}"
+	ProjectSection(ProjectDependencies) = postProject
+		{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
+		{F0A9637C-20DA-42F0-83D4-23B4704DE602} = {F0A9637C-20DA-42F0-83D4-23B4704DE602}
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Reader", "Source\Readers\Reader\Reader.vcxproj", "{F0A9637C-20DA-42F0-83D4-23B4704DE602}"
+	ProjectSection(ProjectDependencies) = postProject
+		{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
+	EndProjectSection
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug_CpuOnly|x64 = Debug_CpuOnly|x64
@ -947,6 +959,18 @@ Global
 		{41E11A59-62B2-4927-A4F8-F40B1B612C6C}.Release_CpuOnly|x64.Build.0 = Release_CpuOnly|x64
 		{41E11A59-62B2-4927-A4F8-F40B1B612C6C}.Release|x64.ActiveCfg = Release|x64
 		{41E11A59-62B2-4927-A4F8-F40B1B612C6C}.Release|x64.Build.0 = Release|x64
+		{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}.Debug_CpuOnly|x64.ActiveCfg = Debug|x64
+		{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}.Debug|x64.ActiveCfg = Debug|x64
+		{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}.Debug|x64.Build.0 = Debug|x64
+		{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}.Release_CpuOnly|x64.ActiveCfg = Release|x64
+		{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}.Release|x64.ActiveCfg = Release|x64
+		{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}.Release|x64.Build.0 = Release|x64
+		{F0A9637C-20DA-42F0-83D4-23B4704DE602}.Debug_CpuOnly|x64.ActiveCfg = Debug|x64
+		{F0A9637C-20DA-42F0-83D4-23B4704DE602}.Debug|x64.ActiveCfg = Debug|x64
+		{F0A9637C-20DA-42F0-83D4-23B4704DE602}.Debug|x64.Build.0 = Debug|x64
+		{F0A9637C-20DA-42F0-83D4-23B4704DE602}.Release_CpuOnly|x64.ActiveCfg = Release|x64
+		{F0A9637C-20DA-42F0-83D4-23B4704DE602}.Release|x64.ActiveCfg = Release|x64
+		{F0A9637C-20DA-42F0-83D4-23B4704DE602}.Release|x64.Build.0 = Release|x64
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@ -1050,5 +1074,7 @@ Global
 		{6F1D0CE1-0F18-4B4C-9581-1F2146C8D300} = {63C6816D-66BF-487E-B541-094142C8272B}
 		{A0B366FE-2EEA-4E32-9AED-12C46409C30C} = {63C6816D-66BF-487E-B541-094142C8272B}
 		{BD783D50-47E2-485F-BDAF-29BD40D84645} = {63C6816D-66BF-487E-B541-094142C8272B}
+		{9BD0A711-0BBD-45B6-B81C-053F03C26CFB} = {33EBFE78-A1A8-4961-8938-92A271941F94}
+		{F0A9637C-20DA-42F0-83D4-23B4704DE602} = {33EBFE78-A1A8-4961-8938-92A271941F94}
 	EndGlobalSection
 EndGlobal
--- a/35
+++ b/35
@ -57,7 +57,7 @@ endif
 CXX = mpic++

 SOURCEDIR:= Source
-INCLUDEPATH:= $(addprefix $(SOURCEDIR)/, Common/Include Math CNTK ActionsLib ComputationNetworkLib SGDLib SequenceTrainingLib CNTK/BrainScript)
+INCLUDEPATH:= $(addprefix $(SOURCEDIR)/, Common/Include Math CNTK ActionsLib ComputationNetworkLib SGDLib SequenceTrainingLib CNTK/BrainScript Readers/Reader)
 CPPFLAGS:= -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K
 CXXFLAGS:= -msse3 -std=c++0x -std=c++11 -fopenmp -fpermissive -fPIC -Werror -fcheck-new
 LIBPATH:=
@ -211,6 +211,12 @@ $(BUILDINFO): $(GENBUILD)
 ########################################

 # Define all sources that need to be built
+READER_SRC =\
+	$(SOURCEDIR)/Readers/Reader/SampleModePacker.cpp \
+	$(SOURCEDIR)/Readers/Reader/BlockRandomizer.cpp \
+	$(SOURCEDIR)/Readers/Reader/NoRandomizer.cpp \
+	$(SOURCEDIR)/Readers/Reader/ReaderShim.cpp \
+
 COMMON_SRC =\
 	$(SOURCEDIR)/Common/Config.cpp \
 	$(SOURCEDIR)/Common/DataReader.cpp \
@ -249,6 +255,7 @@ MATH_SRC +=\
 endif

 MATH_SRC+=$(COMMON_SRC)
+MATH_SRC+=$(READER_SRC)

 MATH_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(MATH_SRC)))

@ -447,6 +454,32 @@ $(IMAGEREADER): $(IMAGEREADER_OBJ) | $(CNTKMATH_LIB)
 	$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH) -lopencv_core -lopencv_imgproc -lopencv_imgcodecs
 endif

+########################################
+# NewImageReader plugin
+########################################
+
+ifdef OPENCV_PATH
+NEWIMAGEREADER_SRC =\
+	$(SOURCEDIR)/Readers/NewImageReader/Exports.cpp \
+	$(SOURCEDIR)/Readers/NewImageReader/ImageConfigHelper.cpp \
+	$(SOURCEDIR)/Readers/NewImageReader/ImageDataDeserializer.cpp \
+	$(SOURCEDIR)/Readers/NewImageReader/ImageTransformers.cpp \
+	$(SOURCEDIR)/Readers/NewImageReader/ImageReader.cpp \
+
+NEWIMAGEREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(NEWIMAGEREADER_SRC))
+
+NEWIMAGEREADER:=$(LIBDIR)/NewImageReader.so
+ALL += $(NEWIMAGEREADER)
+SRC+=$(NEWIMAGEREADER_SRC)
+
+INCLUDEPATH += $(OPENCV_PATH)/include
+LIBPATH += $(OPENCV_PATH)/release/lib
+
+$(NEWIMAGEREADER): $(NEWIMAGEREADER_OBJ) | $(CNTKMATH_LIB)
+	@echo $(SEPARATOR)
+	$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH) -lopencv_core -lopencv_imgproc -lopencv_imgcodecs
+endif
+
 ########################################
 # 1bit SGD setup
 ########################################
--- a/Source/Common/Include/TimerUtility.h
+++ b/Source/Common/Include/TimerUtility.h
@ -1,5 +1,7 @@
 #pragma once

+#include <string>
+
 #define MILLI_PER_SEC 1000
 #define MICRO_PER_SEC 1000000
 #define NANO_PER_SEC 1000000000
@ -33,4 +35,32 @@ private:
    long long m_start;
    long long m_end;
 };
+
+class ScopeTimer
+{
+    Timer m_aggregateTimer;
+    size_t m_verbosity;
+    std::string m_message;
+
+public:
+    ScopeTimer(size_t verbosity, const std::string& message)
+        : m_verbosity(verbosity), m_message(message)
+    {
+        if (m_verbosity > 2)
+        {
+            m_aggregateTimer.Start();
+        }
+    }
+
+    ~ScopeTimer()
+    {
+        if (m_verbosity > 2)
+        {
+            m_aggregateTimer.Stop();
+            double time = m_aggregateTimer.ElapsedSeconds();
+            fprintf(stderr, m_message.c_str(), time);
+        }
+    }
+};
+
 } } }
--- a/Source/Readers/NewImageReader/Exports.cpp
+++ b/Source/Readers/NewImageReader/Exports.cpp
@ -0,0 +1,34 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+// Exports.cpp : Defines the exported functions for the DLL application.
+//
+
+#include "stdafx.h"
+#define DATAREADER_EXPORTS
+#include "DataReader.h"
+#include "ReaderShim.h"
+#include "ImageReader.h"
+#include "HeapMemoryProvider.h"
+#include "CudaMemoryProvider.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// TODO: Memory provider should be injected by SGD.
+
+auto factory = [](const ConfigParameters& parameters) -> ReaderPtr
+{
+    return std::make_shared<ImageReader>(std::make_shared<HeapMemoryProvider>(), parameters);
+};
+
+extern "C" DATAREADER_API void GetReaderF(IDataReader<float>** preader)
+{
+    *preader = new ReaderShim<float>(factory);
+}
+
+extern "C" DATAREADER_API void GetReaderD(IDataReader<double>** preader)
+{
+    *preader = new ReaderShim<double>(factory);
+}
+} } }
--- a/Source/Readers/NewImageReader/ImageConfigHelper.cpp
+++ b/Source/Readers/NewImageReader/ImageConfigHelper.cpp
@ -0,0 +1,134 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#include "stdafx.h"
+#include "ImageConfigHelper.h"
+#include "StringUtils.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+    std::vector<std::string> GetSectionsWithParameter(const ConfigParameters& config, const std::string& parameterName)
+    {
+        std::vector<std::string> sectionNames;
+        for (const std::pair<std::string, ConfigParameters>& section : config)
+        {
+            if (section.second.ExistsCurrent(parameterName))
+            {
+                sectionNames.push_back(section.first);
+            }
+        }
+
+        if (sectionNames.empty())
+        {
+            RuntimeError("ImageReader requires %s parameter.", parameterName.c_str());
+        }
+
+        return sectionNames;
+    }
+
+    ImageConfigHelper::ImageConfigHelper(const ConfigParameters& config)
+        : m_dataFormat(CHW)
+    {
+        std::vector<std::string> featureNames = GetSectionsWithParameter(config, "width");
+        std::vector<std::string> labelNames = GetSectionsWithParameter(config, "labelDim");
+
+        // REVIEW alexeyk: currently support only one feature and label section.
+        if (featureNames.size() != 1 || labelNames.size() != 1)
+        {
+            RuntimeError(
+                "ImageReader currently supports a single feature and label stream. '%d' features , '%d' labels found.",
+                static_cast<int>(featureNames.size()),
+                static_cast<int>(labelNames.size()));
+        }
+
+        ConfigParameters featureSection = config(featureNames[0]);
+        size_t w = featureSection("width");
+        size_t h = featureSection("height");
+        size_t c = featureSection("channels");
+
+        std::string mbFmt = featureSection("mbFormat", "nchw");
+        if (AreEqualIgnoreCase(mbFmt, "nhwc"))
+        {
+            m_dataFormat = HWC;
+        }
+        else if (!AreEqualIgnoreCase(mbFmt, "nchw"))
+        {
+            RuntimeError("ImageReader does not support the sample format '%s', only 'nchw' and 'nhwc' are supported.", mbFmt.c_str());
+        }
+
+        auto features = std::make_shared<StreamDescription>();
+        features->m_id = 0;
+        features->m_name = msra::strfun::utf16(featureSection.ConfigName());
+        features->m_sampleLayout = std::make_shared<TensorShape>(ImageDimensions(w, h, c).AsTensorShape(m_dataFormat));
+        m_streams.push_back(features);
+
+        ConfigParameters label = config(labelNames[0]);
+        size_t labelDimension = label("labelDim");
+
+        auto labelSection = std::make_shared<StreamDescription>();
+        labelSection->m_id = 1;
+        labelSection->m_name = msra::strfun::utf16(label.ConfigName());
+        labelSection->m_sampleLayout = std::make_shared<TensorShape>(labelDimension);
+        m_streams.push_back(labelSection);
+
+        m_mapPath = config(L"file");
+
+        std::string rand = config(L"randomize", "auto");
+
+        if (AreEqualIgnoreCase(rand, "auto"))
+        {
+            m_randomize = true;
+        }
+        else if (AreEqualIgnoreCase(rand, "none"))
+        {
+            m_randomize = false;
+        }
+        else
+        {
+            RuntimeError("'randomize' parameter must be set to 'auto' or 'none'");
+        }
+
+        // Identify precision
+        string precision = config.Find("precision", "float");
+        if (AreEqualIgnoreCase(precision, "float"))
+        {
+            features->m_elementType = ElementType::tfloat;
+            labelSection->m_elementType = ElementType::tfloat;
+        }
+        else if (AreEqualIgnoreCase(precision, "double"))
+        {
+            features->m_elementType = ElementType::tdouble;
+            labelSection->m_elementType = ElementType::tdouble;
+        }
+        else
+        {
+            RuntimeError("Not supported precision '%s'. Expected 'double' or 'float'.", precision.c_str());
+        }
+
+        m_cpuThreadCount = config(L"numCPUThreads", 0);
+    }
+
+    std::vector<StreamDescriptionPtr> ImageConfigHelper::GetStreams() const
+    {
+        return m_streams;
+    }
+
+    size_t ImageConfigHelper::GetFeatureStreamId() const
+    {
+        // Currently we only support a single feature/label stream, so the index is hard-wired.
+        return 0;
+    }
+
+    size_t ImageConfigHelper::GetLabelStreamId() const
+    {
+        // Currently we only support a single feature/label stream, so the index is hard-wired.
+        return 1;
+    }
+
+    std::string ImageConfigHelper::GetMapPath() const
+    {
+        return m_mapPath;
+    }
+}}}
--- a/Source/Readers/NewImageReader/ImageConfigHelper.h
+++ b/Source/Readers/NewImageReader/ImageConfigHelper.h
@ -0,0 +1,61 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "Config.h"
+#include "Reader.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// A helper class for image specific parameters.
+// A simple wrapper around CNTK ConfigParameters.
+class ImageConfigHelper
+{
+public:
+    explicit ImageConfigHelper(const ConfigParameters& config);
+
+    // Get all streams that are specified in the configuration.
+    std::vector<StreamDescriptionPtr> GetStreams() const;
+
+    // Get index of the feature stream.
+    size_t GetFeatureStreamId() const;
+
+    // Get index of the label stream.
+    size_t GetLabelStreamId() const;
+
+    // Get the map file path that describes mapping of images into their labels.
+    std::string GetMapPath() const;
+
+    ImageLayoutKind GetDataFormat() const
+    {
+        return m_dataFormat;
+    }
+
+    int GetCpuThreadCount() const
+    {
+        return m_cpuThreadCount;
+    }
+
+    bool ShouldRandomize() const
+    {
+        return m_randomize;
+    }
+
+private:
+    ImageConfigHelper(const ImageConfigHelper&) = delete;
+    ImageConfigHelper& operator=(const ImageConfigHelper&) = delete;
+
+    std::string m_mapPath;
+    std::vector<StreamDescriptionPtr> m_streams;
+    ImageLayoutKind m_dataFormat;
+    int m_cpuThreadCount;
+    bool m_randomize;
+};
+
+typedef std::shared_ptr<ImageConfigHelper> ImageConfigHelperPtr;
+} } }
--- a/Source/Readers/NewImageReader/ImageDataDeserializer.cpp
+++ b/Source/Readers/NewImageReader/ImageDataDeserializer.cpp
@ -0,0 +1,209 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#include "stdafx.h"
+#include <opencv2/opencv.hpp>
+#include "ImageDataDeserializer.h"
+#include "ImageConfigHelper.h"
+
+#ifndef UNREFERENCED_PARAMETER
+#define UNREFERENCED_PARAMETER(P) (P)
+#endif
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+class ImageDataDeserializer::LabelGenerator
+{
+public:
+    virtual void CreateLabelFor(size_t classId, SparseSequenceData& data) = 0;
+    virtual ~LabelGenerator() { }
+};
+
+// A helper class to generate a typed label in a sparse format.
+// A label is just a category/class the image belongs to.
+// It is represented as a array indexed by the category with zero values for all categories the image does not belong to, 
+// and a single one for a category it belongs to: [ 0, .. 0.. 1 .. 0 ]
+// The class is parameterized because the representation of 1 is type specific.
+template <class TElement>
+class TypedLabelGenerator : public ImageDataDeserializer::LabelGenerator
+{
+public:
+    TypedLabelGenerator() : m_value(1)
+    {
+    }
+
+    virtual void CreateLabelFor(size_t classId, SparseSequenceData& data) override
+    {
+        data.m_indices.resize(1);
+        data.m_indices[0] = std::vector<size_t>{ classId };
+        data.m_data = &m_value;
+    }
+
+private:
+    TElement m_value;
+};
+
+ImageDataDeserializer::ImageDataDeserializer(const ConfigParameters& config)
+{
+    ImageConfigHelper configHelper(config);
+    m_streams = configHelper.GetStreams();
+    assert(m_streams.size() == 2);
+    const auto& label = m_streams[configHelper.GetLabelStreamId()];
+    const auto& feature = m_streams[configHelper.GetFeatureStreamId()];
+
+    // Expect data in HWC.
+    ImageDimensions dimensions(*feature->m_sampleLayout, configHelper.GetDataFormat());
+    feature->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(HWC));
+
+    label->m_storageType = StorageType::sparse_csc;
+    feature->m_storageType = StorageType::dense;
+
+    m_featureElementType = feature->m_elementType;
+    size_t labelDimension = label->m_sampleLayout->GetDim(0);
+
+    if (label->m_elementType == ElementType::tfloat)
+    {
+        m_labelGenerator = std::make_shared<TypedLabelGenerator<float>>();
+    }
+    else if (label->m_elementType == ElementType::tdouble)
+    {
+        m_labelGenerator = std::make_shared<TypedLabelGenerator<double>>();
+    }
+    else
+    {
+        RuntimeError("Unsupported label element type '%d'.", label->m_elementType);
+    }
+
+    CreateSequenceDescriptions(configHelper.GetMapPath(), labelDimension);
+}
+
+void ImageDataDeserializer::CreateSequenceDescriptions(std::string mapPath, size_t labelDimension)
+{
+    UNREFERENCED_PARAMETER(labelDimension);
+
+    std::ifstream mapFile(mapPath);
+    if (!mapFile)
+    {
+        RuntimeError("Could not open %s for reading.", mapPath.c_str());
+    }
+
+    std::string line;
+    ImageSequenceDescription description;
+    description.m_numberOfSamples = 1;
+    description.m_isValid = true;
+    for (size_t lineIndex = 0; std::getline(mapFile, line); ++lineIndex)
+    {
+        std::stringstream ss(line);
+        std::string imagePath;
+        std::string classId;
+        if (!std::getline(ss, imagePath, '\t') || !std::getline(ss, classId, '\t'))
+        {
+            RuntimeError("Invalid map file format, must contain 2 tab-delimited columns: %s, line: %d.",
+                         mapPath.c_str(),
+                         static_cast<int>(lineIndex));
+        }
+
+        description.m_id = lineIndex;
+        description.m_chunkId = lineIndex;
+        description.m_path = imagePath;
+        description.m_classId = std::stoi(classId);
+
+        if (description.m_classId >= labelDimension)
+        {
+            RuntimeError(
+                "Image '%s' has invalid class id '%d'. Expected label dimension is '%d'.",
+                mapPath.c_str(),
+                static_cast<int>(description.m_classId),
+                static_cast<int>(labelDimension));
+        }
+        m_imageSequences.push_back(description);
+    }
+}
+
+std::vector<StreamDescriptionPtr> ImageDataDeserializer::GetStreamDescriptions() const
+{
+    return m_streams;
+}
+
+std::vector<std::vector<SequenceDataPtr>> ImageDataDeserializer::GetSequencesById(const std::vector<size_t>& ids)
+{
+    if (ids.empty())
+    {
+        RuntimeError("Number of requested sequences cannot be zero.");
+    }
+
+    m_currentImages.resize(ids.size());
+    m_labels.resize(ids.size());
+
+    std::vector<std::vector<SequenceDataPtr>> result;
+    result.resize(ids.size());
+
+#pragma omp parallel for ordered schedule(dynamic)
+    for (int i = 0; i < ids.size(); ++i)
+    {
+        if (ids[i] >= m_imageSequences.size())
+        {
+            RuntimeError("Invalid sequence id is provided '%d', expected range [0..%d].",
+                         static_cast<int>(ids[i]),
+                         static_cast<int>(m_imageSequences.size()) - 1);
+        }
+
+        const auto& imageSequence = m_imageSequences[ids[i]];
+
+        // Construct image
+        m_currentImages[i] = std::move(cv::imread(imageSequence.m_path, cv::IMREAD_COLOR));
+        cv::Mat& cvImage = m_currentImages[i];
+
+        if (!cvImage.data)
+        {
+            RuntimeError("Cannot open file '%s'", imageSequence.m_path.c_str());
+        }
+
+        // Convert element type.
+        // TODO We should all native CV element types to be able to match the behavior of the old reader.
+        int dataType = m_featureElementType == ElementType::tfloat ? CV_32F : CV_64F;
+        if (cvImage.type() != CV_MAKETYPE(dataType, cvImage.channels()))
+        {
+            cvImage.convertTo(cvImage, dataType);
+        }
+
+        if (!cvImage.isContinuous())
+        {
+            cvImage = cvImage.clone();
+        }
+        assert(cvImage.isContinuous());
+
+        ImageDimensions dimensions(cvImage.cols, cvImage.rows, cvImage.channels());
+        auto image = std::make_shared<DenseSequenceData>();
+        image->m_data = cvImage.data;
+        image->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(HWC));
+        image->m_numberOfSamples = 1;
+
+        if (m_labels[i] == nullptr)
+        {
+            m_labels[i] = std::make_shared<SparseSequenceData>();
+        }
+
+        m_labelGenerator->CreateLabelFor(imageSequence.m_classId, *m_labels[i]);
+        result[i] = std::move(std::vector<SequenceDataPtr>{image, m_labels[i]});
+    }
+
+    return result;
+}
+
+void ImageDataDeserializer::FillSequenceDescriptions(SequenceDescriptions& timeline) const
+{
+    timeline.resize(m_imageSequences.size());
+    std::transform(
+        m_imageSequences.begin(),
+        m_imageSequences.end(),
+        timeline.begin(),
+        [](const ImageSequenceDescription& desc)
+        {
+            return &desc;
+        });
+}
+
+}}}
--- a/Source/Readers/NewImageReader/ImageDataDeserializer.h
+++ b/Source/Readers/NewImageReader/ImageDataDeserializer.h
@ -0,0 +1,61 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+#include <opencv2/core/mat.hpp>
+#include "DataDeserializerBase.h"
+#include "Config.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// Image data deserializer based on the OpenCV library.
+// The deserializer currently supports two output streams only: a feature and a label stream.
+// All sequences consist only of a single sample (image/label).
+// For features it uses dense storage format with different layout (dimensions) per sequence.
+// For labels it uses the csc sparse storage format.
+class ImageDataDeserializer : public DataDeserializerBase
+{
+public:
+    explicit ImageDataDeserializer(const ConfigParameters& config);
+
+    // Description of streams that this data deserializer provides.
+    std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override;
+
+    // Get sequences by specified ids. Order of returned sequences corresponds to the order of provided ids.
+    std::vector<std::vector<SequenceDataPtr>> GetSequencesById(const std::vector<size_t>& ids) override;
+
+protected:
+    void FillSequenceDescriptions(SequenceDescriptions& timeline) const override;
+
+private:
+    // Creates a set of sequence descriptions.
+    void CreateSequenceDescriptions(std::string mapPath, size_t labelDimension);
+
+    // Image sequence descriptions. Currently, a sequence contains a single sample only.
+    struct ImageSequenceDescription : public SequenceDescription
+    {
+        std::string m_path;
+        size_t m_classId;
+    };
+
+    // A helper class for generation of type specific labels (currently float/double only).
+    class LabelGenerator;
+    typedef std::shared_ptr<LabelGenerator> LabelGeneratorPtr;
+    LabelGeneratorPtr m_labelGenerator;
+
+    // Sequence descriptions for all input data.
+    std::vector<ImageSequenceDescription> m_imageSequences;
+
+    // Buffer to store label data.
+    std::vector<SparseSequenceDataPtr> m_labels;
+
+    // Buffer to store feature data.
+    std::vector<cv::Mat> m_currentImages;
+
+    // Element type of the feature/label stream (currently float/double only).
+    ElementType m_featureElementType;
+};
+
+}}}
--- a/Source/Readers/NewImageReader/ImageReader.cpp
+++ b/Source/Readers/NewImageReader/ImageReader.cpp
@ -0,0 +1,97 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#include "stdafx.h"
+#include "ImageReader.h"
+#include "Config.h"
+#include "ImageConfigHelper.h"
+#include "ImageTransformers.h"
+#include "BlockRandomizer.h"
+#include "NoRandomizer.h"
+#include "ImageDataDeserializer.h"
+#include <omp.h>
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+ImageReader::ImageReader(MemoryProviderPtr provider,
+                         const ConfigParameters& config)
+    : m_seed(0), m_provider(provider)
+{
+    // In the future, deserializers and transformers will be dynamically loaded
+    // from external libraries based on the configuration/brain script.
+    // We will provide ability to implement the transformer and
+    // deserializer interface not only in C++ but in scripting languages as well.
+
+    ImageConfigHelper configHelper(config);
+    m_streams = configHelper.GetStreams();
+    assert(m_streams.size() == 2);
+
+    int threadCount = configHelper.GetCpuThreadCount();
+    if (threadCount > 0)
+    {
+        omp_set_num_threads(threadCount);
+    }
+
+    auto deserializer = std::make_shared<ImageDataDeserializer>(config);
+
+    TransformerPtr randomizer;
+    if (configHelper.ShouldRandomize())
+    {
+        randomizer = std::make_shared<BlockRandomizer>(0, SIZE_MAX, deserializer);
+    }
+    else
+    {
+        randomizer = std::make_shared<NoRandomizer>(deserializer);
+    }
+
+    randomizer->Initialize(nullptr, config);
+
+    auto cropper = std::make_shared<CropTransformer>();
+    cropper->Initialize(randomizer, config);
+
+    auto scaler = std::make_shared<ScaleTransformer>();
+    scaler->Initialize(cropper, config);
+
+    auto mean = std::make_shared<MeanTransformer>();
+    mean->Initialize(scaler, config);
+
+    TransformerPtr last = mean;
+    if (configHelper.GetDataFormat() == CHW)
+    {
+        last = std::make_shared<TransposeTransformer>();
+        last->Initialize(mean, config);
+    }
+
+    m_transformer = last;
+}
+
+std::vector<StreamDescriptionPtr> ImageReader::GetStreamDescriptions()
+{
+    assert(!m_streams.empty());
+    return m_streams;
+}
+
+void ImageReader::StartEpoch(const EpochConfiguration& config)
+{
+    if (config.m_totalEpochSizeInSamples <= 0)
+    {
+        RuntimeError("Unsupported minibatch size '%d'.",
+                     static_cast<int>(config.m_totalEpochSizeInSamples));
+    }
+
+    m_transformer->StartEpoch(config);
+    m_packer = std::make_shared<SampleModePacker>(
+        m_provider,
+        m_transformer,
+        config.m_minibatchSizeInSamples,
+        m_streams);
+}
+
+Minibatch ImageReader::ReadMinibatch()
+{
+    assert(m_packer != nullptr);
+    return m_packer->ReadMinibatch();
+}
+} } }
--- a/Source/Readers/NewImageReader/ImageReader.h
+++ b/Source/Readers/NewImageReader/ImageReader.h
@ -0,0 +1,49 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include "Reader.h"
+#include "ImageTransformers.h"
+#include "SampleModePacker.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// Implementation of the image reader.
+// Effectively the class represents a factory for connecting the packer,
+// transformers and deserialzier together.
+class ImageReader : public Reader
+{
+public:
+    ImageReader(MemoryProviderPtr provider,
+                const ConfigParameters& parameters);
+
+    // Description of streams that this reader provides.
+    std::vector<StreamDescriptionPtr> GetStreamDescriptions() override;
+
+    // Starts a new epoch with the provided configuration.
+    void StartEpoch(const EpochConfiguration& config) override;
+
+    // Reads a single minibatch.
+    Minibatch ReadMinibatch() override;
+
+private:
+    // All streams this reader provides.
+    std::vector<StreamDescriptionPtr> m_streams;
+
+    // A head transformer in a list of transformers.
+    TransformerPtr m_transformer;
+
+    // Packer.
+    SampleModePackerPtr m_packer;
+
+    // Seed for the random generator.
+    unsigned int m_seed;
+
+    // Memory provider (TODO: this will possibly change in the near future.)
+    MemoryProviderPtr m_provider;
+};
+
+}}}
--- a/Source/Readers/NewImageReader/ImageTransformers.cpp
+++ b/Source/Readers/NewImageReader/ImageTransformers.cpp
@ -0,0 +1,451 @@
+//
+// <copyright company="Microsoft">
+//     Copyright (c) Microsoft Corporation.  All rights reserved.
+// </copyright>
+//
+
+#include "stdafx.h"
+#include <algorithm>
+#include <unordered_map>
+#include <random>
+#include "ImageTransformers.h"
+#include "Config.h"
+#include "ConcStack.h"
+#include "ImageConfigHelper.h"
+#include "StringUtils.h"
+#include "ElementTypeUtils.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK
+{
+
+void ImageTransformerBase::Initialize(TransformerPtr next,
+                                      const ConfigParameters &readerConfig)
+{
+    Base::Initialize(next, readerConfig);
+    m_seed = std::stoi(readerConfig(L"seed", "0"));
+
+    ImageConfigHelper config(readerConfig);
+    size_t featureStreamId = config.GetFeatureStreamId();
+    m_appliedStreamIds.push_back(featureStreamId);
+
+    const auto &inputStreams = GetInputStreams();
+    m_outputStreams.resize(inputStreams.size());
+    std::copy(inputStreams.begin(), inputStreams.end(), m_outputStreams.begin());
+}
+
+SequenceDataPtr
+ImageTransformerBase::Apply(const DenseSequenceData &inputSequence,
+                            const StreamDescription &inputStream, cv::Mat &buffer,
+                            const StreamDescription & /*outputStream*/)
+{
+    ImageDimensions dimensions(*inputSequence.m_sampleLayout, HWC);
+    int columns = static_cast<int>(dimensions.m_width);
+    int rows = static_cast<int>(dimensions.m_height);
+    int channels = static_cast<int>(dimensions.m_numChannels);
+
+    int typeId = 0;
+    if (inputStream.m_elementType == ElementType::tdouble)
+    {
+        typeId = CV_64F;
+    }
+    else if (inputStream.m_elementType == ElementType::tfloat)
+    {
+        typeId = CV_32F;
+    }
+    else
+    {
+        RuntimeError("Unsupported type");
+    }
+
+    int type = CV_MAKETYPE(typeId, channels);
+    buffer = cv::Mat(rows, columns, type, inputSequence.m_data);
+    this->Apply(buffer);
+    if (!buffer.isContinuous())
+    {
+        buffer = buffer.clone();
+    }
+    assert(buffer.isContinuous());
+
+    auto result = std::make_shared<DenseSequenceData>();
+    ImageDimensions outputDimensions(buffer.cols, buffer.rows, buffer.channels());
+    result->m_sampleLayout = std::make_shared<TensorShape>(outputDimensions.AsTensorShape(HWC));
+    result->m_numberOfSamples = inputSequence.m_numberOfSamples;
+    result->m_data = buffer.ptr();
+    return result;
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+void CropTransformer::Initialize(TransformerPtr next,
+                                 const ConfigParameters &readerConfig)
+{
+    ImageTransformerBase::Initialize(next, readerConfig);
+    auto featureStreamIds = GetAppliedStreamIds();
+
+    if (featureStreamIds.size() != 1)
+    {
+        RuntimeError("Only a single feature stream is supported.");
+    }
+
+    InitFromConfig(readerConfig(GetInputStreams()[featureStreamIds[0]]->m_name));
+}
+
+void CropTransformer::InitFromConfig(const ConfigParameters &config)
+{
+    m_cropType = ParseCropType(config(L"cropType", ""));
+
+    floatargvector cropRatio = config(L"cropRatio", "1.0");
+    m_cropRatioMin = cropRatio[0];
+    m_cropRatioMax = cropRatio[1];
+
+    if (!(0 < m_cropRatioMin && m_cropRatioMin <= 1.0) ||
+        !(0 < m_cropRatioMax && m_cropRatioMax <= 1.0) ||
+        m_cropRatioMin > m_cropRatioMax)
+    {
+        RuntimeError("Invalid cropRatio value, must be > 0 and <= 1. cropMin must "
+                     "<= cropMax");
+    }
+
+    m_jitterType = ParseJitterType(config(L"jitterType", ""));
+
+    if (!config.ExistsCurrent(L"hflip"))
+    {
+        m_hFlip = m_cropType == CropType::Random;
+    }
+    else
+    {
+        m_hFlip = std::stoi(config(L"hflip")) != 0;
+    }
+}
+
+void CropTransformer::Apply(cv::Mat &mat)
+{
+    auto seed = GetSeed();
+    auto rng = m_rngs.pop_or_create(
+        [seed]()
+        {
+            return std::make_unique<std::mt19937>(seed);
+        });
+
+    double ratio = 1;
+    switch (m_jitterType)
+    {
+    case RatioJitterType::None:
+        ratio = m_cropRatioMin;
+        break;
+    case RatioJitterType::UniRatio:
+        if (m_cropRatioMin == m_cropRatioMax)
+        {
+            ratio = m_cropRatioMin;
+        }
+        else
+        {
+            ratio = UniRealT(m_cropRatioMin, m_cropRatioMax)(*rng);
+            assert(m_cropRatioMin <= ratio && ratio < m_cropRatioMax);
+        }
+        break;
+    default:
+        RuntimeError("Jitter type currently not implemented.");
+    }
+
+    mat = mat(GetCropRect(m_cropType, mat.rows, mat.cols, ratio, *rng));
+    if (m_hFlip && std::bernoulli_distribution()(*rng))
+    {
+        cv::flip(mat, mat, 1);
+    }
+
+    m_rngs.push(std::move(rng));
+}
+
+CropTransformer::CropType
+CropTransformer::ParseCropType(const std::string &src)
+{
+    if (src.empty() || AreEqualIgnoreCase(src, "center"))
+    {
+        return CropType::Center;
+    }
+
+    if (AreEqualIgnoreCase(src, "random"))
+    {
+        return CropType::Random;
+    }
+
+    RuntimeError("Invalid crop type: %s.", src.c_str());
+}
+
+CropTransformer::RatioJitterType
+CropTransformer::ParseJitterType(const std::string &src)
+{
+    if (src.empty() || AreEqualIgnoreCase(src, "none"))
+    {
+        return RatioJitterType::None;
+    }
+
+    if (AreEqualIgnoreCase(src, "uniratio"))
+    {
+        return RatioJitterType::UniRatio;
+    }
+
+    if (AreEqualIgnoreCase(src, "unilength"))
+    {
+        return RatioJitterType::UniLength;
+    }
+
+    if (AreEqualIgnoreCase(src, "uniarea"))
+    {
+        return RatioJitterType::UniArea;
+    }
+
+    RuntimeError("Invalid jitter type: %s.", src.c_str());
+}
+
+cv::Rect CropTransformer::GetCropRect(CropType type, int crow, int ccol,
+                                      double cropRatio, std::mt19937 &rng)
+{
+    assert(crow > 0);
+    assert(ccol > 0);
+    assert(0 < cropRatio && cropRatio <= 1.0);
+
+    int cropSize = static_cast<int>(std::min(crow, ccol) * cropRatio);
+    int xOff = -1;
+    int yOff = -1;
+    switch (type)
+    {
+    case CropType::Center:
+        xOff = (ccol - cropSize) / 2;
+        yOff = (crow - cropSize) / 2;
+        break;
+    case CropType::Random:
+        xOff = UniIntT(0, ccol - cropSize)(rng);
+        yOff = UniIntT(0, crow - cropSize)(rng);
+        break;
+    default:
+        assert(false);
+    }
+
+    assert(0 <= xOff && xOff <= ccol - cropSize);
+    assert(0 <= yOff && yOff <= crow - cropSize);
+    return cv::Rect(xOff, yOff, cropSize, cropSize);
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+void ScaleTransformer::Initialize(TransformerPtr next,
+                                  const ConfigParameters &readerConfig)
+{
+    ImageTransformerBase::Initialize(next, readerConfig);
+    m_interpMap.emplace("nearest", cv::INTER_NEAREST);
+    m_interpMap.emplace("linear", cv::INTER_LINEAR);
+    m_interpMap.emplace("cubic", cv::INTER_CUBIC);
+    m_interpMap.emplace("lanczos", cv::INTER_LANCZOS4);
+
+    auto featureStreamIds = GetAppliedStreamIds();
+
+    if (featureStreamIds.size() != 1)
+    {
+        RuntimeError("Only a single feature stream is supported.");
+    }
+
+    const auto &feature = GetInputStreams()[featureStreamIds[0]];
+    m_dataType = feature->m_elementType == ElementType::tfloat ? CV_32F : CV_64F;
+
+    InitFromConfig(readerConfig(feature->m_name));
+}
+
+void ScaleTransformer::InitFromConfig(const ConfigParameters &config)
+{
+    m_imgWidth = config(L"width");
+    m_imgHeight = config(L"height");
+    m_imgChannels = config(L"channels");
+
+    size_t cfeat = m_imgWidth * m_imgHeight * m_imgChannels;
+    if (cfeat == 0 || cfeat > std::numeric_limits<size_t>().max() / 2)
+        RuntimeError("Invalid image dimensions.");
+
+    m_interp.clear();
+    std::stringstream ss{config(L"interpolations", "")};
+    for (std::string token = ""; std::getline(ss, token, ':');)
+    {
+        // Explicit cast required for GCC.
+        std::transform(token.begin(), token.end(), token.begin(),
+                       (int (*) (int)) std::tolower);
+        StrToIntMapT::const_iterator res = m_interpMap.find(token);
+        if (res != m_interpMap.end())
+            m_interp.push_back((*res).second);
+    }
+
+    if (m_interp.size() == 0)
+        m_interp.push_back(cv::INTER_LINEAR);
+}
+
+void ScaleTransformer::Apply(cv::Mat &mat)
+{
+    // If matrix has not been converted to the right type, do it now as rescaling
+    // requires floating point type.
+    //
+    if (mat.type() != CV_MAKETYPE(m_dataType, m_imgChannels))
+    {
+        mat.convertTo(mat, m_dataType);
+    }
+
+    auto seed = GetSeed();
+    auto rng = m_rngs.pop_or_create(
+        [seed]()
+        {
+            return std::make_unique<std::mt19937>(seed);
+        });
+
+
+    auto index = UniIntT(0, static_cast<int>(m_interp.size()) - 1)(*rng);
+    assert(m_interp.size() > 0);
+    cv::resize(
+        mat, mat,
+        cv::Size(static_cast<int>(m_imgWidth), static_cast<int>(m_imgHeight)), 0,
+        0, m_interp[index]);
+
+    m_rngs.push(std::move(rng));
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+void MeanTransformer::Initialize(TransformerPtr next,
+                                 const ConfigParameters &readerConfig)
+{
+    ImageTransformerBase::Initialize(next, readerConfig);
+
+    auto featureStreamIds = GetAppliedStreamIds();
+
+    if (featureStreamIds.size() != 1)
+    {
+        RuntimeError("Only a single feature stream is supported.");
+    }
+
+    InitFromConfig(readerConfig(GetInputStreams()[featureStreamIds[0]]->m_name));
+}
+
+void MeanTransformer::InitFromConfig(const ConfigParameters &config)
+{
+    std::wstring meanFile = config(L"meanFile", L"");
+    if (meanFile.empty())
+        m_meanImg.release();
+    else
+    {
+        cv::FileStorage fs;
+        // REVIEW alexeyk: this sort of defeats the purpose of using wstring at
+        // all...  [fseide] no, only OpenCV has this problem.
+        fs.open(msra::strfun::utf8(meanFile).c_str(), cv::FileStorage::READ);
+        if (!fs.isOpened())
+            RuntimeError("Could not open file: %ls", meanFile.c_str());
+        fs["MeanImg"] >> m_meanImg;
+        int cchan;
+        fs["Channel"] >> cchan;
+        int crow;
+        fs["Row"] >> crow;
+        int ccol;
+        fs["Col"] >> ccol;
+        if (cchan * crow * ccol !=
+            m_meanImg.channels() * m_meanImg.rows * m_meanImg.cols)
+            RuntimeError("Invalid data in file: %ls", meanFile.c_str());
+        fs.release();
+        m_meanImg = m_meanImg.reshape(cchan, crow);
+    }
+}
+
+void MeanTransformer::Apply(cv::Mat &mat)
+{
+    assert(m_meanImg.size() == cv::Size(0, 0) ||
+           (m_meanImg.size() == mat.size() &&
+            m_meanImg.channels() == mat.channels()));
+
+    // REVIEW alexeyk: check type conversion (float/double).
+    if (m_meanImg.size() == mat.size())
+    {
+        mat = mat - m_meanImg;
+    }
+}
+
+void TransposeTransformer::Initialize(TransformerPtr next,
+                                      const ConfigParameters &readerConfig)
+{
+    Base::Initialize(next, readerConfig);
+
+    // Currently we only support a single stream.
+    ImageConfigHelper config(readerConfig);
+    size_t featureStreamId = config.GetFeatureStreamId();
+    m_appliedStreamIds.push_back(featureStreamId);
+
+    const auto &inputStreams = GetInputStreams();
+    m_outputStreams.resize(inputStreams.size());
+    std::copy(inputStreams.begin(), inputStreams.end(), m_outputStreams.begin());
+
+    for (auto id : m_appliedStreamIds)
+    {
+        auto &stream = inputStreams[id];
+
+        ImageDimensions dimensions(*stream->m_sampleLayout, HWC);
+
+        // Changing layout from NWH to NHW
+        auto changedStream = std::make_shared<StreamDescription>(*stream);
+        changedStream->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(CHW));
+        m_outputStreams[id] = changedStream;
+    }
+}
+
+SequenceDataPtr
+TransposeTransformer::Apply(const DenseSequenceData &inputSequence,
+                            const StreamDescription &inputStream,
+                            vector<char> &buffer,
+                            const StreamDescription &outputStream)
+{
+    if (inputStream.m_elementType == ElementType::tdouble)
+    {
+        return TypedApply<double>(inputSequence, inputStream, buffer, outputStream);
+    }
+
+    if (inputStream.m_elementType == ElementType::tfloat)
+    {
+        return TypedApply<float>(inputSequence, inputStream, buffer, outputStream);
+    }
+
+    RuntimeError("Unsupported type");
+}
+
+template <class TElement>
+SequenceDataPtr
+TransposeTransformer::TypedApply(const DenseSequenceData &inputSequence,
+                                 const StreamDescription &inputStream,
+                                 vector<char> &buffer,
+                                 const StreamDescription &outputStream)
+{
+    assert(inputSequence.m_numberOfSamples == 1);
+    assert(inputStream.m_sampleLayout->GetNumElements() ==
+        outputStream.m_sampleLayout->GetNumElements());
+
+    size_t count = inputStream.m_sampleLayout->GetNumElements() * GetSizeByType(inputStream.m_elementType);
+    buffer.resize(count);
+
+    TElement* typedBuffer = reinterpret_cast<TElement*>(&buffer[0]);
+    ImageDimensions dimensions(*inputStream.m_sampleLayout, ImageLayoutKind::HWC);
+
+    size_t rowCount = dimensions.m_height * dimensions.m_width;
+    size_t channelCount = dimensions.m_numChannels;
+    TElement* data = reinterpret_cast<TElement*>(inputSequence.m_data);
+
+    for (size_t rowIndex = 0; rowIndex < rowCount; rowIndex++)
+    {
+        for (size_t columnIndex = 0; columnIndex < channelCount;
+             columnIndex++)
+        {
+            typedBuffer[columnIndex * rowCount + rowIndex] =
+                data[rowIndex * channelCount + columnIndex];
+        }
+    }
+
+    auto result = std::make_shared<DenseSequenceData>();
+    result->m_sampleLayout = outputStream.m_sampleLayout;
+    result->m_data = &buffer[0];
+    result->m_numberOfSamples = inputSequence.m_numberOfSamples;
+    return result;
+}
+
+}}}
--- a/Source/Readers/NewImageReader/ImageTransformers.h
+++ b/Source/Readers/NewImageReader/ImageTransformers.h
@ -0,0 +1,177 @@
+//
+// <copyright company="Microsoft">
+//     Copyright (c) Microsoft Corporation.  All rights reserved.
+// </copyright>
+//
+
+#pragma once
+
+#include <unordered_map>
+#include <random>
+#include <opencv2/opencv.hpp>
+
+#include "Transformer.h"
+#include "ConcStack.h"
+#include "TransformerBase.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+class ConfigParameters;
+
+// Base class for image transformations based on OpenCV
+// that helps to wrap the sequences into OpenCV::Mat class.
+class ImageTransformerBase : public TransformerBase<cv::Mat>
+{
+public:
+    // Initializes the transformer.
+    virtual void Initialize(TransformerPtr next,
+                            const ConfigParameters &readerConfig) override;
+
+protected:
+    virtual const std::vector<StreamId> &GetAppliedStreamIds() const override
+    {
+        return m_appliedStreamIds;
+    }
+
+    virtual const std::vector<StreamDescriptionPtr>& GetOutputStreams() const override
+    {
+        return m_outputStreams;
+    }
+
+    // Seed  getter.
+    unsigned int GetSeed() const
+    {
+        return m_seed;
+    }
+
+    using Base = TransformerBase<cv::Mat>;
+    using UniRealT = std::uniform_real_distribution<double>;
+    using UniIntT = std::uniform_int_distribution<int>;
+
+    // Applies transformation to the sequence.
+    SequenceDataPtr Apply(const DenseSequenceData &inputSequence,
+                          const StreamDescription &inputStream, cv::Mat &buffer,
+                          const StreamDescription &outputStream) override;
+
+    // The only function that should be redefined by the inherited classes.
+    virtual void Apply(cv::Mat &from) = 0;
+
+private:
+    std::vector<StreamDescriptionPtr> m_outputStreams;
+    std::vector<StreamId> m_appliedStreamIds;
+    unsigned int m_seed;
+};
+
+// Crop transformation of the image.
+// Can work on images of any size.
+class CropTransformer : public ImageTransformerBase
+{
+public:
+    virtual void Initialize(TransformerPtr next,
+                            const ConfigParameters &readerConfig) override;
+
+protected:
+    virtual void Apply(cv::Mat &mat) override;
+
+private:
+    enum class CropType
+    {
+        Center = 0,
+        Random = 1
+    };
+    enum class RatioJitterType
+    {
+        None = 0,
+        UniRatio = 1,
+        UniLength = 2,
+        UniArea = 3
+    };
+
+    void InitFromConfig(const ConfigParameters &config);
+    CropType ParseCropType(const std::string &src);
+    RatioJitterType ParseJitterType(const std::string &src);
+    cv::Rect GetCropRect(CropType type, int crow, int ccol, double cropRatio,
+                         std::mt19937 &rng);
+
+    conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
+    CropType m_cropType;
+    double m_cropRatioMin;
+    double m_cropRatioMax;
+    RatioJitterType m_jitterType;
+    bool m_hFlip;
+};
+
+// Scale transformation of the image.
+// Scales the image to the dimensions requested by the network.
+class ScaleTransformer : public ImageTransformerBase
+{
+public:
+    virtual void Initialize(TransformerPtr next,
+                            const ConfigParameters &readerConfig) override;
+
+private:
+    void InitFromConfig(const ConfigParameters &config);
+    virtual void Apply(cv::Mat &mat) override;
+
+    using StrToIntMapT = std::unordered_map<std::string, int>;
+    StrToIntMapT m_interpMap;
+    std::vector<int> m_interp;
+
+    conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
+    int m_dataType;
+    size_t m_imgWidth;
+    size_t m_imgHeight;
+    size_t m_imgChannels;
+};
+
+// Mean transformation.
+class MeanTransformer : public ImageTransformerBase
+{
+public:
+    virtual void Initialize(TransformerPtr next,
+                            const ConfigParameters &readerConfig) override;
+
+private:
+    virtual void Apply(cv::Mat &mat) override;
+    void InitFromConfig(const ConfigParameters &config);
+
+    cv::Mat m_meanImg;
+};
+
+// Transpose transformation from HWC to CHW.
+class TransposeTransformer : public TransformerBase<vector<char>>
+{
+public:
+    virtual void Initialize(TransformerPtr next,
+                            const ConfigParameters &readerConfig) override;
+
+protected:
+    virtual const std::vector<StreamId>& GetAppliedStreamIds() const override
+    {
+        return m_appliedStreamIds;
+    }
+
+    virtual const std::vector<StreamDescriptionPtr>& GetOutputStreams() const override
+    {
+        return m_outputStreams;
+    }
+
+    SequenceDataPtr Apply(const DenseSequenceData &inputSequence,
+                          const StreamDescription &inputStream,
+                          vector<char> &buffer,
+                          const StreamDescription &outputStream) override;
+
+private:
+    using Base = TransformerBase<vector<char>>;
+
+    template <class TElement>
+    SequenceDataPtr TypedApply(const DenseSequenceData &inputSequence,
+                               const StreamDescription &inputStream,
+                               vector<char> &buffer,
+                               const StreamDescription &outputStream);
+
+    std::vector<StreamDescriptionPtr> m_outputStreams;
+    std::vector<StreamId> m_appliedStreamIds;
+};
+
+}}}
--- a/Source/Readers/NewImageReader/NewImageReader.vcxproj
+++ b/Source/Readers/NewImageReader/NewImageReader.vcxproj
@ -0,0 +1,145 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" InitialTargets="CheckDependencies" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>ImageReader</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <PlatformToolset>v120</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Choose>
+    <When Condition="Exists('$(OPENCV_PATH)')">
+      <PropertyGroup>
+        <HasOpenCV>true</HasOpenCV>
+      </PropertyGroup>
+    </When>
+    <Otherwise>
+      <PropertyGroup>
+        <HasOpenCV>false</HasOpenCV>
+      </PropertyGroup>
+    </Otherwise>
+  </Choose>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <UseDebugLibraries>true</UseDebugLibraries>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <IncludePath>..\..\common\include;..\..\math;$(OPENCV_PATH)\include;$(IncludePath);</IncludePath>
+    <LibraryPath>$(SolutionDir)$(Platform)\$(Configuration);$(OPENCV_PATH)\x64\vc12\lib;$(LibraryPath);</LibraryPath>
+    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+    <OpenCVLib Condition="$(HasOpenCV)">opencv_world300.lib</OpenCVLib>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <PreprocessorDefinitions>WIN32;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <OpenMPSupport>true</OpenMPSupport>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>Reader.lib;Math.lib;$(OpenCVLib);%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <PostBuildEvent>
+      <Command Condition="$(HasOpenCV)">xcopy /Y $(OPENCV_PATH)\x64\vc12\bin\opencv_world300.dll $(TargetDir)</Command>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>../Reader</AdditionalIncludeDirectories>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalIncludeDirectories>../Reader</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <Profile>true</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\Common\Include\basetypes.h" />
+    <ClInclude Include="..\..\Common\Include\DataReader.h" />
+    <ClInclude Include="..\..\Common\Include\File.h" />
+    <ClInclude Include="..\..\Common\Include\fileutil.h" />
+    <ClInclude Include="ImageConfigHelper.h" />
+    <ClInclude Include="ImageDataDeserializer.h" />
+    <ClInclude Include="ImageReader.h" />
+    <ClInclude Include="ImageTransformers.h" />
+    <ClInclude Include="stdafx.h" />
+    <ClInclude Include="targetver.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\Common\DataReader.cpp" />
+    <ClCompile Include="..\..\Common\File.cpp">
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="..\..\Common\DebugUtil.cpp">
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="..\..\Common\fileutil.cpp">
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="..\..\Common\Config.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="ImageConfigHelper.cpp" />
+    <ClCompile Include="ImageDataDeserializer.cpp" />
+    <ClCompile Include="dllmain.cpp" />
+    <ClCompile Include="Exports.cpp">
+      <ExcludedFromBuild Condition="!$(HasOpenCV)">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="ImageReader.cpp" />
+    <ClCompile Include="ImageTransformers.cpp" />
+    <ClCompile Include="stdafx.cpp">
+      <PrecompiledHeader>Create</PrecompiledHeader>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+  <Target Name="CheckDependencies">
+    <Warning Condition="!$(HasOpenCV)" Text="ImageReader requires OpenCV library v3.0 or higher to build. Please install the library from http://opencv.org/downloads.html and set OPENCV_PATH environment variable to OpenCV build folder (e.g. C:\src\opencv\build)." />
+  </Target>
+</Project>
--- a/Source/Readers/NewImageReader/NewImageReader.vcxproj.filters
+++ b/Source/Readers/NewImageReader/NewImageReader.vcxproj.filters
@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <ClCompile Include="Exports.cpp" />
+    <ClCompile Include="stdafx.cpp" />
+    <ClCompile Include="..\..\Common\DataReader.cpp">
+      <Filter>Common</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\Common\fileutil.cpp">
+      <Filter>Common</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\Common\File.cpp">
+      <Filter>Common</Filter>
+    </ClCompile>
+    <ClCompile Include="dllmain.cpp" />
+    <ClCompile Include="ImageTransformers.cpp" />
+    <ClCompile Include="ImageDataDeserializer.cpp" />
+    <ClCompile Include="ImageReader.cpp" />
+    <ClCompile Include="ImageConfigHelper.cpp" />
+    <ClCompile Include="..\..\Common\DebugUtil.cpp">
+      <Filter>Common</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\Common\Config.cpp">
+      <Filter>Common</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="stdafx.h" />
+    <ClInclude Include="targetver.h" />
+    <ClInclude Include="..\..\Common\Include\basetypes.h">
+      <Filter>Common\Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\Common\Include\DataReader.h">
+      <Filter>Common\Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\Common\Include\File.h">
+      <Filter>Common\Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\Common\Include\fileutil.h">
+      <Filter>Common\Include</Filter>
+    </ClInclude>
+    <ClInclude Include="ImageTransformers.h" />
+    <ClInclude Include="ImageDataDeserializer.h" />
+    <ClInclude Include="ImageReader.h" />
+    <ClInclude Include="ImageConfigHelper.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <Filter Include="Common">
+      <UniqueIdentifier>{0D0EFA10-72A8-4078-840A-B7F76AFEC0A4}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Common\Include">
+      <UniqueIdentifier>{C6F55578-121A-4D7C-8F57-4172BC5C463B}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+</Project>
--- a/Source/Readers/NewImageReader/dllmain.cpp
+++ b/Source/Readers/NewImageReader/dllmain.cpp
@ -0,0 +1,12 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+// dllmain.cpp : Defines the entry point for the DLL application.
+//
+#include "stdafx.h"
+
+BOOL APIENTRY DllMain(HMODULE /*hModule*/, DWORD /*ul_reason_for_call*/, LPVOID /*lpReserved*/)
+{
+    return TRUE;
+}
--- a/Source/Readers/NewImageReader/stdafx.cpp
+++ b/Source/Readers/NewImageReader/stdafx.cpp
@ -0,0 +1,13 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+// stdafx.cpp : source file that includes just the standard includes
+// ParseNumber.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+//
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
--- a/Source/Readers/NewImageReader/stdafx.h
+++ b/Source/Readers/NewImageReader/stdafx.h
@ -0,0 +1,21 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#pragma once
+
+#include "Platform.h"
+#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms
+#include "targetver.h"
+#ifdef __WINDOWS__
+#include "windows.h"
+#endif
+#include <stdio.h>
+#include <math.h>
+
+// TODO: reference additional headers your program requires here
--- a/Source/Readers/NewImageReader/targetver.h
+++ b/Source/Readers/NewImageReader/targetver.h
@ -0,0 +1,14 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+// Including SDKDDKVer.h defines the highest available Windows platform.
+
+// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
+// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
+#ifdef __WINDOWS__
+#include <SDKDDKVer.h>
+#endif
--- a/Source/Readers/Reader/BlockRandomizer.cpp
+++ b/Source/Readers/Reader/BlockRandomizer.cpp
@ -0,0 +1,398 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#define _CRT_SECURE_NO_WARNINGS
+
+#include "BlockRandomizer.h"
+#include <algorithm>
+#include <utility>
+#include <iostream>
+
+#include "DataReader.h"
+#include <random>
+
+#ifndef UNREFERENCED_PARAMETER
+#define UNREFERENCED_PARAMETER(P) (P)
+#endif
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+static inline size_t rand(const size_t begin, const size_t end)
+{
+    // eldak: this has already been changed by Alexey(alrezni)
+    // still only covers 32-bit range
+    const size_t randomNumber = ::rand() * RAND_MAX + ::rand();
+    return begin + randomNumber % (end - begin);
+}
+
+bool BlockRandomizer::TimelineIsValidForRandomization(const SequenceDescriptions& timeline) const
+{
+    SequenceDescription previous = { SIZE_MAX, 0, 0, true };
+
+    auto it = std::find_if_not(timeline.begin(), timeline.end(),
+                               [&](const SequenceDescription* current)
+                               {
+                                   bool result = current->m_isValid
+                                       && previous.m_id + 1 == current->m_id
+                                       && previous.m_chunkId <= current->m_chunkId
+                                       && current->m_chunkId <= previous.m_chunkId + 1
+                                       && 0 < current->m_numberOfSamples;
+                                   previous = *current;
+                                   return result;
+                               });
+    return it == timeline.end();
+}
+
+void BlockRandomizer::RandomizeChunks()
+{
+    // Create vector of chunk indices and shuffle them using current sweep as seed
+    std::vector<size_t> randomizedChunkIndices;
+    randomizedChunkIndices.reserve(m_numChunks);
+    for (size_t i = 0; i < m_numChunks; i++)
+    {
+        randomizedChunkIndices.push_back(i);
+    }
+
+    std::mt19937 m_rng(static_cast<int>(m_sweep));
+
+    std::shuffle(randomizedChunkIndices.begin(), randomizedChunkIndices.end(), m_rng);
+
+    // Place randomized chunks on global time line
+    m_randomizedChunks.clear();
+    m_randomizedChunks.reserve(m_numChunks + 1);
+    size_t chunkId, samplePosition, sequencePosition;
+    for (chunkId = 0, samplePosition = m_sweepStartInSamples, sequencePosition = 0; chunkId < m_numChunks; chunkId++)
+    {
+        const size_t originalChunkIndex = randomizedChunkIndices[chunkId];
+        const size_t numSequences =
+            m_chunkInformation[originalChunkIndex + 1].m_sequencePositionStart -
+            m_chunkInformation[originalChunkIndex].m_sequencePositionStart;
+        const size_t numSamples =
+            m_chunkInformation[originalChunkIndex + 1].m_samplePositionStart -
+            m_chunkInformation[originalChunkIndex].m_samplePositionStart;
+        m_randomizedChunks.push_back(RandomizedChunk { sequencePosition, samplePosition, originalChunkIndex });
+        samplePosition += numSamples;
+        sequencePosition += numSequences;
+    }
+
+    // Add sentinel
+    m_randomizedChunks.push_back(RandomizedChunk { sequencePosition, samplePosition, SIZE_MAX });
+
+    // For each chunk, compute the randomization range (w.r.t. the randomized chunk sequence)
+    size_t halfWindowRange = m_randomizationRangeInSamples / 2;
+    for (size_t chunkId = 0; chunkId < m_numChunks; chunkId++)
+    {
+        auto& chunk = m_randomizedChunks[chunkId];
+        // start with the range of left neighbor
+        if (chunkId == 0)
+        {
+            chunk.m_windowBegin = 0;
+            chunk.m_windowEnd = 1;
+        }
+        else
+        {
+            chunk.m_windowBegin = m_randomizedChunks[chunkId - 1].m_windowBegin; // might be too early
+            chunk.m_windowEnd = m_randomizedChunks[chunkId - 1].m_windowEnd; // might have more space
+        }
+        while (chunk.m_info.m_samplePositionStart - m_randomizedChunks[chunk.m_windowBegin].m_info.m_samplePositionStart > halfWindowRange)
+            chunk.m_windowBegin++; // too early
+        // TODO m_randomizedChunks[chunk.windowend + 1].info.samplePositionStart - m_randomizedChunks[chunk.windowbegin].info.samplePositionStart < m_randomizationRangeInSamples
+        while (chunk.m_windowEnd < m_numChunks &&
+               m_randomizedChunks[chunk.m_windowEnd + 1].m_info.m_samplePositionStart - chunk.m_info.m_samplePositionStart < halfWindowRange)
+            chunk.m_windowEnd++; // got more space
+    }
+
+    // Compute the randomization range for sequence positions.
+    m_sequencePositionToChunkIndex.clear();
+    m_sequencePositionToChunkIndex.reserve(m_numSequences);
+    for (size_t k = 0; k < m_numChunks; k++)
+    {
+        const size_t numSequences =
+            m_randomizedChunks[k + 1].m_info.m_sequencePositionStart -
+            m_randomizedChunks[k].m_info.m_sequencePositionStart;
+        for (size_t i = 0; i < numSequences; i++)
+        {
+            m_sequencePositionToChunkIndex.push_back(k);
+        }
+    }
+    assert(m_sequencePositionToChunkIndex.size() == m_numSequences);
+}
+
+bool BlockRandomizer::IsValidForPosition(size_t targetPosition, const SequenceDescription& seqDesc) const
+{
+    const auto& chunk = m_randomizedChunks[m_sequencePositionToChunkIndex[targetPosition]];
+    return chunk.m_windowBegin <= seqDesc.m_chunkId && seqDesc.m_chunkId < chunk.m_windowEnd;
+}
+
+void BlockRandomizer::Randomize()
+{
+    const auto& timeline = m_deserializer->GetSequenceDescriptions();
+    RandomizeChunks();
+
+    // Set up m_randomTimeline, shuffled by chunks.
+    m_randomTimeline.clear();
+    m_randomTimeline.reserve(m_numSequences);
+    for (size_t chunkId = 0; chunkId < m_numChunks; chunkId++)
+    {
+        auto originalChunkIndex = m_randomizedChunks[chunkId].m_originalChunkIndex;
+
+        for (size_t sequencePosition = m_chunkInformation[originalChunkIndex].m_sequencePositionStart;
+             sequencePosition < m_chunkInformation[originalChunkIndex + 1].m_sequencePositionStart;
+             sequencePosition++)
+        {
+            SequenceDescription randomizedSeqDesc = *timeline[sequencePosition];
+            randomizedSeqDesc.m_chunkId = chunkId;
+            m_randomTimeline.push_back(randomizedSeqDesc);
+        }
+    }
+    assert(m_randomTimeline.size() == m_numSequences);
+
+    // Check we got those setup right
+    foreach_index (i, m_randomTimeline)
+    {
+        assert(IsValidForPosition(i, m_randomTimeline[i]));
+    }
+
+    // Now randomly shuffle m_randomTimeline, while considering the
+    // constraints of what chunk range needs to be in memory.
+    srand(static_cast<unsigned int>(m_sweep + 1));
+    foreach_index (i, m_randomTimeline)
+    {
+        // Get valid randomization range, expressed in chunks
+        const size_t chunkId = m_sequencePositionToChunkIndex[i];
+        const size_t windowBegin = m_randomizedChunks[chunkId].m_windowBegin;
+        const size_t windowEnd = m_randomizedChunks[chunkId].m_windowEnd;
+
+        // Get valid randomization range, expressed in sequence positions.
+        size_t posBegin = m_randomizedChunks[windowBegin].m_info.m_sequencePositionStart;
+        size_t posEnd = m_randomizedChunks[windowEnd].m_info.m_sequencePositionStart;
+
+        for (;;)
+        {
+            // Pick a sequence position from [posBegin, posEnd)
+            const size_t j = rand(posBegin, posEnd);
+
+            // Try again if the sequence currently at j cannot be placed at position i.
+            if (!IsValidForPosition(i, m_randomTimeline[j]))
+                continue;
+
+            // Try again if the sequence currently at i cannot be placed at position j.
+            if (!IsValidForPosition(j, m_randomTimeline[i]))
+                continue;
+
+            // Swap and break out.
+            std::swap(m_randomTimeline[i], m_randomTimeline[j]); // TODO old swap was perhaps more efficient
+            break;
+        }
+    }
+
+    // Verify that we got it right
+    foreach_index (i, m_randomTimeline)
+    {
+        // TODO assert only
+        if (!IsValidForPosition(i, m_randomTimeline[i]))
+            LogicError("BlockRandomizer::Randomize: randomization logic mangled!");
+    }
+}
+
+void BlockRandomizer::RandomizeIfNewSweepIsEntered()
+{
+    // Check that StartEpoch() was called
+    assert(m_sequencePositionInSweep != SIZE_MAX);
+
+    if (m_sequencePositionInSweep >= m_numSequences)
+    {
+        if (m_verbosity > 0)
+            std::cerr << __FUNCTION__ << ": re-randomizing for sweep " << m_sweep
+                      << " in " << (m_frameMode ? "frame" : "utterance") << " mode" << endl;
+        m_sweep++;
+        m_sweepStartInSamples += m_numSamples;
+        Randomize();
+        m_sequencePositionInSweep -= m_numSequences;
+        assert(m_sequencePositionInSweep < m_numSequences); // cannot jump ahead more than a sweep
+    };
+}
+
+void BlockRandomizer::RandomizeForGlobalSamplePosition(const size_t samplePosition)
+{
+    size_t sweep = samplePosition / m_numSamples;
+
+    if (m_sweep != sweep)
+    {
+        m_sweep = sweep;
+        m_sweepStartInSamples = sweep * m_numSamples;
+        Randomize();
+    }
+    m_sequencePositionInSweep = samplePosition % m_numSamples; // TODO only for m_frameMode
+};
+
+//
+// Public methods
+//
+
+BlockRandomizer::BlockRandomizer(int verbosity, size_t randomizationRangeInSamples, DataDeserializerPtr deserializer)
+    : m_verbosity(verbosity),
+      m_randomizationRangeInSamples(randomizationRangeInSamples),
+      m_distributionMode(DistributionMode::sequences_strides),
+      m_deserializer(deserializer),
+      m_sweep(SIZE_MAX),
+      m_sequencePositionInSweep(SIZE_MAX),
+      m_samplePositionInEpoch(SIZE_MAX),
+      m_epochSize(SIZE_MAX)
+{
+    assert(deserializer != nullptr);
+    const SequenceDescriptions& timeline = m_deserializer->GetSequenceDescriptions();
+    assert(TimelineIsValidForRandomization(timeline));
+
+    if (timeline.size() == 0)
+    {
+        m_numSequences = 0;
+        m_numChunks = 0;
+    }
+    else
+    {
+        // TODO let timeline keep this info?
+        m_numSequences = timeline.back()->m_id + 1;
+        m_numChunks = timeline.back()->m_chunkId + 1;
+    }
+
+    // Generate additional information about physical chunks
+    assert(m_chunkInformation.size() == 0);
+    m_chunkInformation.reserve(m_numChunks + 1);
+    m_chunkInformation.insert(m_chunkInformation.begin(),
+                              m_numChunks + 1,
+                              ChunkInformation{SIZE_MAX, SIZE_MAX});
+
+    size_t maxNumberOfSamples = 0;
+
+    m_numSamples = 0;
+    for (const auto& seqDesc : timeline)
+    {
+        // TODO let timeline keep this info?
+        auto& chunkInformation = m_chunkInformation[seqDesc->m_chunkId];
+        chunkInformation.m_sequencePositionStart =
+            min(chunkInformation.m_sequencePositionStart, seqDesc->m_id);
+        chunkInformation.m_samplePositionStart =
+            min(chunkInformation.m_samplePositionStart, m_numSamples);
+        maxNumberOfSamples = max(maxNumberOfSamples, seqDesc->m_numberOfSamples);
+        m_numSamples += seqDesc->m_numberOfSamples;
+    }
+
+    // Add sentinel
+    m_chunkInformation[m_numChunks] = {m_numSequences, m_numSamples};
+
+    // Frame mode to the randomizer just means there are only single-sample sequences
+    m_frameMode = (maxNumberOfSamples == 1);
+}
+
+void BlockRandomizer::Initialize(TransformerPtr next, const ConfigParameters& readerConfig)
+{
+    // Not used for the block randomizer.
+    UNREFERENCED_PARAMETER(next);
+    UNREFERENCED_PARAMETER(readerConfig);
+}
+
+void BlockRandomizer::StartEpoch(const EpochConfiguration& config)
+{
+    m_deserializer->StartEpoch(config);
+
+    m_workerRank = config.m_workerRank;
+    m_numberOfWorkers = config.m_numberOfWorkers;
+
+    // eldak: check partial minibatches.
+    if (config.m_totalEpochSizeInSamples == requestDataSize)
+    {
+        m_epochSize = m_numSamples;
+    }
+    else
+    {
+        m_epochSize = config.m_totalEpochSizeInSamples;
+    }
+
+    // TODO add some asserts on EpochConfiguration
+    m_samplePositionInEpoch = 0;
+    size_t timeframe = m_epochSize * config.m_epochIndex;
+    assert(m_frameMode); // TODO !m_frameMode needs fixes
+    assert(timeframe != SIZE_MAX); // used as special value for init
+    RandomizeForGlobalSamplePosition(timeframe);
+};
+
+bool BlockRandomizer::GetNextSequenceIds(size_t sampleCount, std::vector<size_t>& originalIds)
+{
+    assert(m_frameMode); // TODO !m_frameMode not implemented yet
+    assert(originalIds.size() == 0);
+    assert(sampleCount < m_numSamples);
+
+    if (m_samplePositionInEpoch < m_epochSize)
+    {
+        if (m_distributionMode == DistributionMode::chunk_modulus)
+        {
+            assert(m_numberOfWorkers == 1); // TODO needs implementation
+
+            while ((m_samplePositionInEpoch < m_epochSize) &&
+                   (originalIds.size() < sampleCount))
+            {
+                RandomizeIfNewSweepIsEntered();
+
+                const auto& seqDesc = m_randomTimeline[m_sequencePositionInSweep];
+                if ((seqDesc.m_chunkId % m_numberOfWorkers) == m_workerRank)
+                {
+                    // Got one, collect it
+                    originalIds.push_back(seqDesc.m_id);
+                }
+
+                m_samplePositionInEpoch += seqDesc.m_numberOfSamples;
+                m_sequencePositionInSweep++;
+            }
+        }
+        else
+        {
+            assert(m_distributionMode == DistributionMode::sequences_strides);
+
+            size_t nextSamplePositionInEpoch = std::min(m_epochSize, m_samplePositionInEpoch + sampleCount);
+            size_t distributedSampleCount = nextSamplePositionInEpoch - m_samplePositionInEpoch;
+            size_t strideBegin = distributedSampleCount * m_workerRank / m_numberOfWorkers;
+            size_t strideEnd = distributedSampleCount * (m_workerRank + 1) / m_numberOfWorkers;
+
+            for (size_t i = 0; i < distributedSampleCount; ++i, ++m_samplePositionInEpoch, ++m_sequencePositionInSweep)
+            {
+                RandomizeIfNewSweepIsEntered();
+                if (strideBegin <= i && i < strideEnd)
+                {
+                    const auto& seqDesc = m_randomTimeline[m_sequencePositionInSweep];
+                    originalIds.push_back(seqDesc.m_id);
+                }
+            }
+            assert(m_samplePositionInEpoch == nextSamplePositionInEpoch);
+        }
+    }
+
+    return m_epochSize <= m_samplePositionInEpoch;
+}
+
+Sequences BlockRandomizer::GetNextSequences(size_t sampleCount)
+{
+    assert(m_samplePositionInEpoch != SIZE_MAX); // SetEpochConfiguration() must be called first
+
+    std::vector<size_t> originalIds;
+    Sequences result;
+
+    assert(m_frameMode); // TODO sequence mode not implemented yet
+
+    result.m_endOfEpoch = GetNextSequenceIds(sampleCount, originalIds);
+
+    if (originalIds.size() == 0)
+    {
+        return result;
+    }
+
+    // TODO implement require and release chunks from the data deserializer, but only for this worker
+    //      (probably in GetNextSequenceIds())
+
+    result.m_data = m_deserializer->GetSequencesById(originalIds);
+    return result;
+};
+
+} } }
--- a/Source/Readers/Reader/BlockRandomizer.h
+++ b/Source/Readers/Reader/BlockRandomizer.h
@ -0,0 +1,104 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <vector>
+
+#include "Transformer.h"
+#include "DataDeserializer.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// The class represents a randomizer that does randomization based on chunks/sequences inside a set of chunk.
+// TODO: currently this code moved from the old block randomizer.
+// The class will be further refactored and common based will be extracted with NoRandomizer.
+// Currently works only for frame mode (numberOfSample in sequence == 1)
+class BlockRandomizer : public Transformer
+{
+public:
+    BlockRandomizer(int verbosity, size_t randomizationRangeInSamples, DataDeserializerPtr deserializer);
+    virtual ~BlockRandomizer()
+    {
+    }
+
+    virtual void Initialize(TransformerPtr next, const ConfigParameters& readerConfig) override;
+    virtual void StartEpoch(const EpochConfiguration& config) override;
+    virtual Sequences GetNextSequences(size_t sampleCount) override;
+    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
+    {
+        return m_deserializer->GetStreamDescriptions();
+    }
+
+private:
+    enum class DistributionMode {
+        // TODO better names, description
+        chunk_modulus,
+        sequences_strides
+    };
+
+    // Structure for per-chunk information
+    struct ChunkInformation
+    {
+        size_t m_sequencePositionStart;
+        size_t m_samplePositionStart;
+    };
+
+    // Structure that will be maintained for each randomized chunk
+    struct RandomizedChunk
+    {
+        struct ChunkInformation m_info; // sample positions are global // TODO could drop 'global' requirement?
+
+        size_t m_originalChunkIndex;
+
+        // Randomization range (in randomized chunk positions; right-side open)
+        size_t m_windowBegin;
+        size_t m_windowEnd;
+    };
+
+    // General configuration
+    int m_verbosity;
+    size_t m_randomizationRangeInSamples; // full window
+    DistributionMode m_distributionMode;
+
+    // Deserializer and information on the original timeline
+    DataDeserializerPtr m_deserializer;
+    size_t m_numSequences;
+    size_t m_numChunks;
+    size_t m_numSamples;
+    bool m_frameMode;                                 // true iff only single-sample sequences
+    std::vector<ChunkInformation> m_chunkInformation; // (includes a sentinel)
+
+    // Per-epoch configuration
+    size_t m_workerRank;
+    size_t m_numberOfWorkers;
+    size_t m_epochSize;
+    size_t m_samplePositionInEpoch;
+
+    // Per-randomization-sweep information
+    size_t m_sweep;
+    size_t m_sweepStartInSamples; // TODO do we need it?
+    size_t m_sequencePositionInSweep;
+    std::vector<RandomizedChunk> m_randomizedChunks;    // (includes a sentinel)
+    std::vector<size_t> m_sequencePositionToChunkIndex; // TODO find on m_randomizedChunks instead?
+    std::vector<SequenceDescription> m_randomTimeline;
+
+    // Check that timeline has only valid sequences of non-zero length
+    // with incrementing IDs and non-decreasing chunk identifiers.
+    bool TimelineIsValidForRandomization(const SequenceDescriptions& timeline) const;
+
+    void RandomizeChunks();
+
+    bool IsValidForPosition(size_t targetPosition, const SequenceDescription& seqDesc) const;
+
+    void Randomize();
+
+    void RandomizeForGlobalSamplePosition(const size_t samplePosition);
+
+    void RandomizeIfNewSweepIsEntered();
+
+    bool GetNextSequenceIds(size_t sampleCount, std::vector<size_t>& ids);
+};
+} } }
--- a/Source/Readers/Reader/CudaMemoryProvider.h
+++ b/Source/Readers/Reader/CudaMemoryProvider.h
@ -0,0 +1,43 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <memory>
+#include <CUDAPageLockedMemAllocator.h>
+
+#include "MemoryProvider.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+/// TODO: Memory provider should reside on the matrix. It is responsibility of the network
+/// to decide what memory to use per stream. This class will be moved in the near future.
+class CudaMemoryProvider : public MemoryProvider
+{
+    std::unique_ptr<CUDAPageLockedMemAllocator> m_allocator;
+
+public:
+    CudaMemoryProvider(int deviceId)
+    {
+        m_allocator = std::make_unique<CUDAPageLockedMemAllocator>(deviceId);
+    }
+
+    virtual void* Alloc(size_t elementSize, size_t numberOfElements) override
+    {
+        size_t totalSize = elementSize * numberOfElements;
+        return m_allocator->Malloc(totalSize);
+    }
+
+    virtual void Free(void* p) override
+    {
+        if (!p)
+        {
+            return;
+        }
+
+        m_allocator->Free(reinterpret_cast<char*>(p));
+    }
+};
+} } }
--- a/Source/Readers/Reader/DataDeserializer.h
+++ b/Source/Readers/Reader/DataDeserializer.h
@ -0,0 +1,105 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <vector>
+#include "Reader.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// Defines main properties of a sequence.
+// Sequence descriptions are used by the randomizer to establish a global timeline for complete input.
+// A sequence is defined as an ordered set of samples (size == 1 is used for sample training).
+struct SequenceDescription
+{
+    size_t m_id;              // Sequence id, uniquely identifies the sequence.
+    size_t m_numberOfSamples; // Number of samples in a sequence.
+    size_t m_chunkId;         // Each sequence belongs to an I/O chunk, how chunk is defined is specific to a
+                              // particular data deserializer. The randomizer guarantees to request sequences
+                              // from only limited subset of chunks at any moment in time.
+    bool m_isValid;           // Indicates whether the sequence is valid.
+};
+typedef std::vector<const SequenceDescription*> SequenceDescriptions;
+
+// Defines sequence data and its layout.
+// Currently CNTK supports dense and sparse sequences (csc).
+// The storageType in the corresponding stream description identifies what type of SequenceData
+// data deserializer or transformer can provide provides.
+struct SequenceDataBase
+{
+    SequenceDataBase() : m_data(nullptr) { }
+
+    // A non-owned pointer. The actual size is provided for particular sequences,
+    // i.e. see DenseSequenceData, or SparseSequenceData.
+    void* m_data;
+};
+typedef std::shared_ptr<SequenceDataBase> SequenceDataPtr;
+
+// Dense sequence. Should be returned by the deserializer for streams with storage type StorageType::dense.
+// All samples are stored in the 'data' member as a contiguous array.
+// The layout of samples are described in the sampleLayout.
+// All samples in the sequence should have the same layout.
+struct DenseSequenceData : SequenceDataBase
+{
+    DenseSequenceData() : m_numberOfSamples(0) { }
+
+    TensorShapePtr m_sampleLayout; // Sample layout, can be shared by several sequences.
+    size_t m_numberOfSamples;      // Number of samples in the sequence
+};
+typedef std::shared_ptr<DenseSequenceData> DenseSequenceDataPtr;
+
+// Sparse sequence. Should be returned by the deserializer for streams with storage type StorageType::csc_sparse.
+// All non zero values are store in the 'data' member as a contiguous array.
+// The corresponding row indices are stored in 'indices' per sample.
+// All samples in the sequence should have the same layout.
+struct SparseSequenceData : SequenceDataBase
+{
+    std::vector<std::vector<size_t>> m_indices;
+};
+typedef std::shared_ptr<SparseSequenceData> SparseSequenceDataPtr;
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// Interface all data deserializers should implement.
+// Data deserializers are intimately familiar with a particular input formats and responsible for bringing 
+// the serialized data into sequences in memory. Very often data for different streams (i.e. features/lattices)
+// reside in the same physical storage (file), so the data deserializer can expose not a single but several 
+// streams. Examples of data include image data deserializer or htkmlf data deserializer.
+// TODO: This interface will become ABI and deserializers can be implemented in different languages, i.e. Python.
+//////////////////////////////////////////////////////////////////////////////////////////////////
+class DataDeserializer
+{
+public:
+    // Describes streams this data deserializer can produce. Streams correspond to network inputs.
+    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const = 0;
+
+    // Retrieves description of all sequences this data deserializer can produce.
+    virtual const SequenceDescriptions& GetSequenceDescriptions() const = 0;
+
+    // Sets epoch configuration.
+    virtual void StartEpoch(const EpochConfiguration& config) = 0;
+
+    // Gets sequences by id.
+    // The return value can be used until the next call to GetSequencesById.
+    // All non-owned pointers returned are valid till the next call to this method.
+    virtual std::vector<std::vector<SequenceDataPtr>> GetSequencesById(const std::vector<size_t>& ids) = 0;
+
+    // Requires the chunk. Each sequence is assigned to the IO chunk by the data deserializer.
+    // This information is communicated thru GetSequenceDescriptions method.
+    // The randomizer guarantees that it accesses sequences only from a limited number of chunks.
+    // When randomizer requires a sequence from a particular chunk it notifies about this the data deserializer,
+    // so that the data deserializer can load/cache sequences more efficiently (loading complete chunks in memory).
+    virtual void RequireChunk(size_t chunkIndex) = 0;
+
+    // Releases the chunk.
+    // When randomizer read all sequences from a particular chunk it notifies the data deserializer
+    // that the chunk can be freed.
+    virtual void ReleaseChunk(size_t chunkIndex) = 0;
+
+    virtual ~DataDeserializer() {};
+};
+
+typedef std::shared_ptr<DataDeserializer> DataDeserializerPtr;
+} } }
--- a/Source/Readers/Reader/DataDeserializerBase.h
+++ b/Source/Readers/Reader/DataDeserializerBase.h
@ -0,0 +1,58 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include "DataDeserializer.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// Base class for data deserializers.
+// Has a default implementation for a subset of methods.
+class DataDeserializerBase : public DataDeserializer
+{
+public:
+    DataDeserializerBase() : m_sequencesInitialized(false)
+    {}
+
+    // Sets configuration for the current epoch.
+    void StartEpoch(const EpochConfiguration& /*config*/) override {};
+
+    // Provides description of all sequences the deserializer can produce.
+    const SequenceDescriptions& GetSequenceDescriptions() const override
+    {
+        if (!m_sequencesInitialized)
+        {
+            FillSequenceDescriptions(m_sequences);
+            m_sequencesInitialized = true;
+        }
+        return m_sequences;
+    }
+
+    // To be called by the randomizer for prefetching the next chunk.
+    // By default IO read-ahead is not implemented.
+    void RequireChunk(size_t /*chunkIndex*/) override{};
+
+    // To be called by the randomizer for releasing a prefetched chunk.
+    // By default IO read-ahead is not implemented.
+    void ReleaseChunk(size_t /*chunkIndex*/) override{};
+
+protected:
+    // Fills the timeline with sequence descriptions.
+    // Inherited classes should provide the complete Sequence descriptions for all input data.
+    virtual void FillSequenceDescriptions(SequenceDescriptions& timeline) const = 0;
+
+    // Streams this data deserializer can produce.
+    std::vector<StreamDescriptionPtr> m_streams;
+
+private:
+    DataDeserializerBase(const DataDeserializerBase&) = delete;
+    DataDeserializerBase& operator=(const DataDeserializerBase&) = delete;
+
+    mutable SequenceDescriptions m_sequences;
+    mutable bool m_sequencesInitialized;
+};
+
+}}}
--- a/Source/Readers/Reader/ElementTypeUtils.h
+++ b/Source/Readers/Reader/ElementTypeUtils.h
@ -0,0 +1,29 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+#include "Reader.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// Returns the size of the type.
+inline size_t GetSizeByType(ElementType type)
+{
+    switch (type)
+    {
+    case ElementType::tfloat:
+        return sizeof(float);
+    case ElementType::tdouble:
+        return sizeof(double);
+    case ElementType::tatom:
+        return sizeof(char);
+    default:
+        RuntimeError("Unsupported type '%d'", type);
+    }
+}
+} } }
--- a/Source/Readers/Reader/HeapMemoryProvider.h
+++ b/Source/Readers/Reader/HeapMemoryProvider.h
@ -0,0 +1,30 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <algorithm>
+#include "MemoryProvider.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+class HeapMemoryProvider : public MemoryProvider
+{
+    static const size_t size_of_first_pointer = sizeof(void*);
+
+public:
+    virtual void* Alloc(size_t elementSize, size_t numberOfElements) override
+    {
+        // Currently not alligned.
+        return ::operator new(elementSize * numberOfElements);
+    }
+
+    virtual void Free(void* p) override
+    {
+        ::operator delete(p);
+    }
+};
+
+}}}
--- a/Source/Readers/Reader/MemoryProvider.h
+++ b/Source/Readers/Reader/MemoryProvider.h
@ -0,0 +1,29 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <memory>
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// Interface used for allocating stream data returned by the reader.
+// TODO: Should be injected by CNTK into the reader (will be a member of Matrix class).
+//////////////////////////////////////////////////////////////////////////////////////////////////
+class MemoryProvider
+{
+public:
+    // Allocates contiguous storage for specified number of elements of provided size.
+    virtual void* Alloc(size_t elementSize, size_t numberOfElements) = 0;
+
+    // Frees contiguous storage.
+    virtual void Free(void* ptr) = 0;
+
+    virtual ~MemoryProvider() { }
+};
+
+typedef std::shared_ptr<MemoryProvider> MemoryProviderPtr;
+} } }
--- a/Source/Readers/Reader/NoRandomizer.cpp
+++ b/Source/Readers/Reader/NoRandomizer.cpp
@ -0,0 +1,86 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#define _CRT_SECURE_NO_WARNINGS
+#include <algorithm>
+
+#include "NoRandomizer.h"
+#include "DataReader.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+NoRandomizer::NoRandomizer(DataDeserializerPtr deserializer)
+    : m_deserializer(deserializer),
+      m_samplePositionInEpoch(0),
+      m_sequencePosition(0)
+{
+    assert(deserializer != nullptr);
+
+    m_timeline = m_deserializer->GetSequenceDescriptions();
+    for (const auto& sequence : m_timeline)
+    {
+        if (sequence->m_numberOfSamples != 1)
+        {
+            RuntimeError("Currently, no randomizer supports only frame mode. Received a sequence with %d number of samples.",
+                static_cast<int>(sequence->m_numberOfSamples));
+        }
+    }
+}
+
+void NoRandomizer::Initialize(TransformerPtr, const ConfigParameters&)
+{
+}
+
+void NoRandomizer::StartEpoch(const EpochConfiguration& config)
+{
+    m_deserializer->StartEpoch(config);
+    m_config = config;
+
+    if (m_config.m_totalEpochSizeInSamples == requestDataSize)
+    {
+        m_config.m_totalEpochSizeInSamples = m_timeline.size();
+    }
+
+    m_samplePositionInEpoch = 0;
+    size_t globalSamplePosition = m_config.m_totalEpochSizeInSamples * config.m_epochIndex;
+    m_sequencePosition = globalSamplePosition % m_timeline.size();
+};
+
+Sequences NoRandomizer::GetNextSequences(size_t sampleCount)
+{
+    Sequences result;
+    if(m_config.m_totalEpochSizeInSamples <= m_samplePositionInEpoch)
+    {
+        result.m_endOfEpoch = true;
+        return result;
+    }
+
+    size_t maxSampleCount = std::min(sampleCount, m_config.m_totalEpochSizeInSamples - m_samplePositionInEpoch);
+    size_t start = maxSampleCount * m_config.m_workerRank / m_config.m_numberOfWorkers;
+    size_t end = maxSampleCount * (m_config.m_workerRank + 1) / m_config.m_numberOfWorkers;
+    size_t subsetSize = end - start;
+
+    std::vector<size_t> originalIds;
+    originalIds.reserve(subsetSize);
+    for (size_t i = start; i < end; ++i)
+    {
+        const auto& sequence = m_timeline[(m_sequencePosition + i) % m_timeline.size()];
+        assert(sequence->m_numberOfSamples == 1);
+        originalIds.push_back(sequence->m_id);
+    }
+
+    m_samplePositionInEpoch += maxSampleCount;
+    m_sequencePosition = (m_sequencePosition + maxSampleCount) % m_timeline.size();
+
+    if (originalIds.size() == 0)
+    {
+        return result;
+    }
+
+    result.m_data = m_deserializer->GetSequencesById(originalIds);
+    return result;
+}
+
+}}}
--- a/Source/Readers/Reader/NoRandomizer.h
+++ b/Source/Readers/Reader/NoRandomizer.h
@ -0,0 +1,45 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <vector>
+
+#include "Transformer.h"
+#include "DataDeserializer.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// The class represents a randomizer that does not randomize input (identity function over the original timeline).
+// TODO: currently this code moved from the old block randomizer.
+// The class will be further refactored and common based will be extracted with BlockRandomizer.
+// Currently works only for frame mode (numberOfSample in sequence == 1) and without chunking
+class NoRandomizer : public Transformer
+{
+public:
+    NoRandomizer(DataDeserializerPtr deserializer);
+
+    virtual void Initialize(TransformerPtr next, const ConfigParameters& readerConfig) override;
+    virtual void StartEpoch(const EpochConfiguration& config) override;
+    virtual Sequences GetNextSequences(size_t sampleCount) override;
+    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
+    {
+        return m_deserializer->GetStreamDescriptions();
+    }
+
+private:
+    // Deserializer and information on the original timeline
+    DataDeserializerPtr m_deserializer;
+
+    // Initial timeline.
+    SequenceDescriptions m_timeline;
+
+    // Epoch configuration
+    EpochConfiguration m_config;
+    size_t m_samplePositionInEpoch;
+    size_t m_sequencePosition;
+};
+
+}}}
--- a/Source/Readers/Reader/Reader.h
+++ b/Source/Readers/Reader/Reader.h
@ -0,0 +1,108 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+#include "Sequences.h"
+#include "TensorShape.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+typedef std::shared_ptr<TensorShape> TensorShapePtr;
+
+struct MBLayout;
+typedef std::shared_ptr<MBLayout> MBLayoutPtr;
+
+// Configuration for the current epoch.
+// Each time the epoch is started CNTK should provide the configuration to the reader using StartEpoch method
+// and the below structure.
+struct EpochConfiguration
+{
+    size_t m_numberOfWorkers;               // Number of the Open MPI workers for the current epoch
+    size_t m_workerRank;                    // Rank of the Open MPI worker, worker rank has to be less than the number of workers
+    size_t m_minibatchSizeInSamples;        // Maximum minibatch size for the epoch in samples
+    size_t m_totalEpochSizeInSamples;       // Total size of the epoch in samples
+    size_t m_epochIndex;                    // Current epoch index [0 .. max number of epochs)
+};
+
+// Supported primitive element types, will be extended in the future.
+enum class ElementType
+{
+    tfloat,  // single precision
+    tdouble, // double precision
+    tatom    // sizeof(atom) == 1 constitute of blobs -> sequences of atoms (i.e. used for lattices, hmmm, etc.)
+};
+
+// Supported storage types, will be extended in the future.
+enum class StorageType
+{
+    dense,
+    sparse_csc,
+};
+
+typedef size_t StreamId;
+
+// This class describes a particular stream: its name, element type, storage, etc.
+struct StreamDescription
+{
+    std::wstring m_name;           // Unique name of the stream
+    StreamId m_id;                 // Unique identifier of the stream
+    StorageType m_storageType;     // Storage type of the stream
+    ElementType m_elementType;     // Element type of the stream
+    TensorShapePtr m_sampleLayout; // Layout of the sample for the stream
+                                   // If not specified - can be specified per sequence
+};
+typedef std::shared_ptr<StreamDescription> StreamDescriptionPtr;
+
+// Represent a minibatch date for a single stream formatted in according to the minibatch layout.
+// This data is returned per stream as a part of Minibatch from the ReadMinibatch function.
+// All raw non owned pointers are valid till the next call to the ReadMinibatch function.
+struct StreamMinibatch
+{
+    void* m_data;         // Contiguous array of data. Can be encoded in dense or sparse formats depending on the stream description.
+    size_t m_dataSize;    // Data size in bytes.
+    MBLayoutPtr m_layout; // Layout of the data
+};
+typedef std::shared_ptr<StreamMinibatch> StreamMinibatchPtr;
+
+// Represents a single minibatch, that contains information about all streams.
+struct Minibatch
+{
+    // Indicates that the end of epoch has been reached.
+    // It is set to true for the last minibatch, there still
+    // can be data in m_data field even if this flag is set.
+    bool m_endOfEpoch;
+
+    // Minibatch data
+    std::vector<StreamMinibatchPtr> m_data;
+
+    Minibatch() : m_endOfEpoch(false)
+    {
+    }
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// Main Reader interface. The border interface between the CNTK and reader libraries.
+// TODO: Expect to change in a little bit: stream matrices provided by the network as input.
+//////////////////////////////////////////////////////////////////////////////////////////////////
+class Reader
+{
+public:
+    // Describes the streams this reader produces.
+    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() = 0;
+
+    // Starts a new epoch with the provided configuration
+    virtual void StartEpoch(const EpochConfiguration& config) = 0;
+
+    // Reads a minibatch that contains data across all streams.
+    virtual Minibatch ReadMinibatch() = 0;
+
+    virtual ~Reader() {};
+};
+
+typedef std::shared_ptr<Reader> ReaderPtr;
+}}}
--- a/Source/Readers/Reader/Reader.vcxproj
+++ b/Source/Readers/Reader/Reader.vcxproj
@ -0,0 +1,93 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{F0A9637C-20DA-42F0-83D4-23B4704DE602}</ProjectGuid>
+    <RootNamespace>Reader</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v120</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <UseDebugLibraries>true</UseDebugLibraries>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <SDLCheck>true</SDLCheck>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <AdditionalIncludeDirectories>..\..\common\include;..\..\math</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="DataDeserializerBase.h" />
+    <ClInclude Include="TransformerBase.h" />
+    <ClInclude Include="BlockRandomizer.h" />
+    <ClInclude Include="NoRandomizer.h" />
+    <ClInclude Include="CudaMemoryProvider.h" />
+    <ClInclude Include="DataDeserializer.h" />
+    <ClInclude Include="ElementTypeUtils.h" />
+    <ClInclude Include="SampleModePacker.h" />
+    <ClInclude Include="HeapMemoryProvider.h" />
+    <ClInclude Include="MemoryProvider.h" />
+    <ClInclude Include="Reader.h" />
+    <ClInclude Include="ReaderShim.h" />
+    <ClInclude Include="StringUtils.h" />
+    <ClInclude Include="Transformer.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="BlockRandomizer.cpp" />
+    <ClCompile Include="NoRandomizer.cpp" />
+    <ClCompile Include="SampleModePacker.cpp" />
+    <ClCompile Include="ReaderShim.cpp" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
--- a/Source/Readers/Reader/Reader.vcxproj.filters
+++ b/Source/Readers/Reader/Reader.vcxproj.filters
@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <ClInclude Include="BlockRandomizer.h">
+      <Filter>Randomizers</Filter>
+    </ClInclude>
+    <ClInclude Include="NoRandomizer.h">
+      <Filter>Randomizers</Filter>
+    </ClInclude>
+    <ClInclude Include="CudaMemoryProvider.h">
+      <Filter>MemoryProviders</Filter>
+    </ClInclude>
+    <ClInclude Include="HeapMemoryProvider.h">
+      <Filter>MemoryProviders</Filter>
+    </ClInclude>
+    <ClInclude Include="ReaderShim.h">
+      <Filter>Utils</Filter>
+    </ClInclude>
+    <ClInclude Include="Reader.h">
+      <Filter>Interfaces</Filter>
+    </ClInclude>
+    <ClInclude Include="DataDeserializer.h">
+      <Filter>Interfaces</Filter>
+    </ClInclude>
+    <ClInclude Include="MemoryProvider.h">
+      <Filter>Interfaces</Filter>
+    </ClInclude>
+    <ClInclude Include="Transformer.h">
+      <Filter>Interfaces</Filter>
+    </ClInclude>
+    <ClInclude Include="ElementTypeUtils.h">
+      <Filter>Utils</Filter>
+    </ClInclude>
+    <ClInclude Include="StringUtils.h">
+      <Filter>Utils</Filter>
+    </ClInclude>
+    <ClInclude Include="TransformerBase.h">
+      <Filter>Transformers</Filter>
+    </ClInclude>
+    <ClInclude Include="DataDeserializerBase.h">
+      <Filter>Deserializers</Filter>
+    </ClInclude>
+    <ClInclude Include="SampleModePacker.h">
+      <Filter>Packers</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="BlockRandomizer.cpp">
+      <Filter>Randomizers</Filter>
+    </ClCompile>
+    <ClCompile Include="NoRandomizer.cpp">
+      <Filter>Randomizers</Filter>
+    </ClCompile>
+    <ClCompile Include="ReaderShim.cpp">
+      <Filter>Utils</Filter>
+    </ClCompile>
+    <ClCompile Include="SampleModePacker.cpp">
+      <Filter>Packers</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <Filter Include="Interfaces">
+      <UniqueIdentifier>{0a7ac112-ad49-4d5e-83d8-a7640b199568}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Randomizers">
+      <UniqueIdentifier>{0e2c2f22-20f2-42c1-a5d1-d9ac7a4bd33a}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Packers">
+      <UniqueIdentifier>{b7203d1c-fd6e-40de-9680-ba4042ce7c7f}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="MemoryProviders">
+      <UniqueIdentifier>{6ada575c-9f2c-476f-8c97-8617e1ecd01a}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Utils">
+      <UniqueIdentifier>{3cd09e43-18cb-48ad-a5f7-89553c063d8c}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Deserializers">
+      <UniqueIdentifier>{2f73a66b-9551-41c4-91c5-cf51537b9feb}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Transformers">
+      <UniqueIdentifier>{90d4b51b-73ae-47f5-9a9e-97ef287dcead}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+</Project>
--- a/Source/Readers/Reader/ReaderShim.cpp
+++ b/Source/Readers/Reader/ReaderShim.cpp
@ -0,0 +1,158 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+// ReaderShim.cpp: implementation for shim wrapping the new reader interface
+//
+
+#define _CRT_SECURE_NO_WARNINGS
+
+#ifdef _WIN32
+#include <objbase.h>
+#endif
+#include "Basics.h"
+
+#define DATAREADER_EXPORTS // creating the exports here
+#include "DataReader.h"
+//#include "commandArgUtil.h"
+#include "ReaderShim.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+template <class ElemType>
+ReaderShim<ElemType>::ReaderShim(ReaderFactory factory)
+    : m_layout(make_shared<MBLayout>()), m_factory(factory)
+{
+}
+
+template <class ElemType>
+void ReaderShim<ElemType>::Init(const ConfigParameters& config)
+{
+    intargvector numberOfuttsPerMinibatchForAllEpochs =
+        config(L"nbruttsineachrecurrentiter", ConfigParameters::Array(intargvector(vector<int> { 1 })));
+
+    bool prefetch = config(L"prefetch", true);
+    // if prefetch - launching asynchronously,
+    // otherwise deferring - synchronous execution during .get() call
+    m_launchType = prefetch ? launch::async : launch::deferred;
+
+    auto numSeqsPerMBForAllEpochs = numberOfuttsPerMinibatchForAllEpochs;
+    m_layout->Init(numSeqsPerMBForAllEpochs[0], 0);
+
+    m_reader = m_factory(config);
+    m_streams = m_reader->GetStreamDescriptions();
+    for (auto i : m_streams)
+    {
+        m_nameToStreamId.insert(std::make_pair(i->m_name, i->m_id));
+    }
+}
+
+template <class ElemType>
+void ReaderShim<ElemType>::StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize)
+{
+    return StartDistributedMinibatchLoop(mbSize, epoch, 0, 1, requestedEpochSamples);
+}
+
+template <class ElemType>
+void ReaderShim<ElemType>::StartDistributedMinibatchLoop(
+    size_t requestedMBSize,
+    size_t epoch,
+    size_t subsetNum,
+    size_t numSubsets,
+    size_t requestedEpochSamples /*= requestDataSize*/)
+{
+    EpochConfiguration config;
+    config.m_workerRank = subsetNum;
+    config.m_numberOfWorkers = numSubsets;
+    config.m_minibatchSizeInSamples = requestedMBSize;
+    config.m_totalEpochSizeInSamples = requestedEpochSamples;
+    config.m_epochIndex = epoch;
+
+    m_reader->StartEpoch(config);
+    m_endOfEpoch = false;
+
+    m_prefetchTask = std::async(m_launchType, [this]()
+    {
+        return m_reader->ReadMinibatch();
+    });
+}
+
+template <class ElemType>
+bool ReaderShim<ElemType>::GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices)
+{
+    if (m_endOfEpoch)
+    {
+        return false;
+    }
+
+    // Check that all matrices have the same device id.
+    // If not we should inject the IMemoryProvider per stream.
+    int deviceId = matrices.begin()->second->GetDeviceId();
+    for (auto mx : matrices)
+    {
+        if (mx.second->GetDeviceId() != deviceId)
+        {
+            assert(false);
+        }
+    }
+
+    assert(m_prefetchTask.valid());
+
+    Minibatch minibatch = m_prefetchTask.get();
+    if (minibatch.m_endOfEpoch)
+    {
+        m_endOfEpoch = true;
+        if (minibatch.m_data.empty())
+        {
+            return false;
+        }
+    }
+
+    if (!minibatch.m_data.empty())
+    {
+        // Copy returned minibatch to the matrices.
+        for (const auto& mx : matrices)
+        {
+            assert(m_nameToStreamId.find(mx.first) != m_nameToStreamId.end());
+            size_t streamId = m_nameToStreamId[mx.first];
+
+            const auto& stream = minibatch.m_data[streamId];
+            m_layout = stream->m_layout;
+
+            size_t columnNumber = m_layout->GetNumCols();
+            size_t rowNumber = m_streams[streamId]->m_sampleLayout->GetNumElements();
+
+            auto data = reinterpret_cast<const ElemType*>(stream->m_data);
+            mx.second->SetValue(rowNumber, columnNumber, mx.second->GetDeviceId(), const_cast<ElemType*>(data), matrixFlagNormal);
+        }
+    }
+
+    m_prefetchTask = std::async(m_launchType, [this]()
+    {
+        return m_reader->ReadMinibatch();
+    });
+
+    return !minibatch.m_data.empty();
+}
+
+template <class ElemType>
+bool ReaderShim<ElemType>::DataEnd(EndDataType /*endDataType*/)
+{
+    return false;
+}
+
+template <class ElemType>
+void ReaderShim<ElemType>::CopyMBLayoutTo(MBLayoutPtr layout)
+{
+    layout->CopyFrom(m_layout);
+}
+
+template <class ElemType>
+size_t ReaderShim<ElemType>::GetNumParallelSequences()
+{
+    return m_layout->GetNumParallelSequences();
+}
+
+template class ReaderShim<float>;
+template class ReaderShim<double>;
+} } }
--- a/Source/Readers/Reader/ReaderShim.h
+++ b/Source/Readers/Reader/ReaderShim.h
@ -0,0 +1,68 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+// ReaderShim.h: Currently we are preserving the old interface in SGD. So this shim exposes the old interface and calls into the 
+// reader implemented with the new interfaces (reader/packer/transforms/serializers)
+//
+
+#pragma once
+
+#include <map>
+#include <string>
+#include "DataReader.h"
+#include <future>
+#include "Reader.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+typedef ReaderPtr (*ReaderFactory)(const ConfigParameters& parameters);
+
+template <class ElemType>
+class ReaderShim : public IDataReader<ElemType>
+{
+public:
+    explicit ReaderShim(ReaderFactory factory);
+    virtual ~ReaderShim() { }
+
+    virtual void Init(const ScriptableObjects::IConfigRecord& /*config*/) override
+    {
+        assert(false);
+    }
+    virtual void Init(const ConfigParameters& config) override;
+
+    virtual void Destroy() override
+    {
+        delete this;
+    }
+
+    virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples) override;
+    virtual void StartDistributedMinibatchLoop(size_t requestedMBSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples) override;
+
+    virtual bool SupportsDistributedMBRead() const override
+    {
+        return true;
+    }
+
+    virtual bool GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices) override;
+
+    virtual bool DataEnd(EndDataType endDataType) override;
+
+    void CopyMBLayoutTo(MBLayoutPtr) override;
+
+    virtual size_t GetNumParallelSequences() override;
+
+private:
+    std::future<Minibatch> m_prefetchTask;
+    ReaderPtr m_reader;
+    ReaderFactory m_factory;
+    bool m_endOfEpoch;
+
+    MBLayoutPtr m_layout;
+
+    std::map<std::wstring, size_t> m_nameToStreamId;
+    std::vector<StreamDescriptionPtr> m_streams;
+    launch m_launchType;
+};
+
+}}}
--- a/Source/Readers/Reader/SampleModePacker.cpp
+++ b/Source/Readers/Reader/SampleModePacker.cpp
@ -0,0 +1,152 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#define _CRT_SECURE_NO_WARNINGS
+#define _SCL_SECURE_NO_WARNINGS
+
+#include "SampleModePacker.h"
+#include "ElementTypeUtils.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+SampleModePacker::SampleModePacker(
+    MemoryProviderPtr memoryProvider,
+    TransformerPtr transformer,
+    size_t minibatchSize,
+    const std::vector<StreamDescriptionPtr>& streams)
+    : m_transformer(transformer),
+    m_minibatchSize(minibatchSize),
+    m_outputStreams(streams),
+    m_minibatchLayout(std::make_shared<MBLayout>()),
+    m_memoryProvider(memoryProvider)
+{
+    m_inputStreams = m_transformer->GetStreamDescriptions();
+    assert(m_inputStreams.size() == m_outputStreams.size());
+    assert(
+        std::find_if(
+            m_outputStreams.begin(),
+            m_outputStreams.end(),
+            [](const StreamDescriptionPtr& s)
+            {
+                return s->m_storageType == StorageType::sparse_csc;
+            }) == m_outputStreams.end());
+
+    assert(m_minibatchSize > 0);
+    for (int i = 0; i < m_outputStreams.size(); ++i)
+    {
+        const auto& stream = m_outputStreams[i];
+        // Input and output should match in everything except for sparse/dense.
+        assert(stream->m_elementType == ElementType::tfloat || stream->m_elementType == ElementType::tdouble);
+        assert(stream->m_name == m_inputStreams[i]->m_name);
+        assert(stream->m_id == m_inputStreams[i]->m_id);
+        assert(GetSampleSize(m_inputStreams[i]) == GetSampleSize(stream));
+
+        m_streamBuffers.push_back(
+            AllocateBuffer(m_minibatchSize * stream->m_sampleLayout->GetNumElements(), GetSizeByType(stream->m_elementType)));
+    }
+}
+
+Minibatch SampleModePacker::ReadMinibatch()
+{
+    auto sequences = m_transformer->GetNextSequences(m_minibatchSize);
+
+    Minibatch minibatch;
+    minibatch.m_endOfEpoch = sequences.m_endOfEpoch;
+
+    // Iterating for sequences inside the batch of sequences.
+    for (size_t sequenceIndex = 0; sequenceIndex < sequences.m_data.size(); sequenceIndex++)
+    {
+        // For each sequence iterating thru all the streams with this sequence id and copying to the buffer.
+        assert(m_streamBuffers.size() == sequences.m_data[sequenceIndex].size());
+        for (int streamIndex = 0; streamIndex < sequences.m_data[sequenceIndex].size(); ++streamIndex)
+        {
+            CopySequenceToBuffer(sequenceIndex, streamIndex, sequences.m_data);
+        }
+    }
+
+    if (sequences.m_data.size() == 0)
+    {
+        return minibatch;
+    }
+
+    // Creating output minibatch with shared layout between all streams.
+    m_minibatchLayout->InitAsFrameMode(sequences.m_data.size());
+    for (int i = 0; i < m_outputStreams.size(); ++i)
+    {
+        auto stream = std::make_shared<StreamMinibatch>();
+        stream->m_data = m_streamBuffers[i].get();
+        stream->m_dataSize = sequences.m_data.size() * GetSampleSize(m_outputStreams[i]);
+        stream->m_layout = m_minibatchLayout;
+
+        minibatch.m_data.push_back(stream);
+    }
+
+    return minibatch;
+}
+
+size_t SampleModePacker::GetSampleSize(StreamDescriptionPtr stream)
+{
+    assert(stream != nullptr);
+    size_t elementSize = GetSizeByType(stream->m_elementType);
+    return stream->m_sampleLayout->GetNumElements() * elementSize;
+}
+
+void SampleModePacker::CopySequenceToBuffer(size_t sampleIndex, size_t streamIndex, const std::vector<std::vector<SequenceDataPtr>>& sequences)
+{
+    // In framemode sequence just contains a single sample.
+    const auto& sample = sequences[sampleIndex][streamIndex];
+    size_t sampleSize = GetSampleSize(m_inputStreams[streamIndex]);
+    auto sampleData = reinterpret_cast<const char*>(sample->m_data);
+
+    const auto& stream = m_inputStreams[streamIndex];
+    auto elementSize = GetSizeByType(stream->m_elementType);
+    auto buffer = m_streamBuffers[streamIndex].get();
+
+    if (stream->m_storageType == StorageType::dense)
+    {
+        auto data = reinterpret_cast<DenseSequenceData&>(*sample);
+        // Expect single sample.
+        assert(data.m_numberOfSamples == 1);
+
+        // Copying the sequence to its position in the buffer. Effectivly a buffer contains concatenation of samples for a stream.
+        std::copy(sampleData, sampleData + sampleSize, buffer + sampleIndex * sampleSize);
+    }
+    else if (stream->m_storageType == StorageType::sparse_csc)
+    {
+        auto data = reinterpret_cast<SparseSequenceData&>(*sample);
+        // Expect single sample.
+        assert(data.m_indices.size() == 1);
+
+        // Currently sparse data has to be unpacked to the dense one. Possibly can be done later
+        // in the network or as a transformation.
+
+        // Fill it in with zeros.
+        std::fill(buffer + sampleIndex * sampleSize, buffer + (sampleIndex + 1) * sampleSize, 0);
+
+        // Copy the non zero data to the buffer.
+        size_t nonZeroCount = data.m_indices[0].size();
+        for (size_t nonZeroIndex = 0; nonZeroIndex < nonZeroCount; ++nonZeroIndex)
+        {
+            size_t rowIndex = data.m_indices[0][nonZeroIndex];
+            char* destination = buffer + sampleIndex * sampleSize + rowIndex * elementSize;
+            std::copy(sampleData + nonZeroIndex * elementSize, sampleData + (nonZeroIndex + 1) * elementSize, destination);
+        }
+    }
+    else
+    {
+        RuntimeError("Storage type %d is not supported.", m_inputStreams[streamIndex]->m_storageType);
+    }
+}
+
+std::shared_ptr<char> SampleModePacker::AllocateBuffer(size_t numElements, size_t elementSize)
+{
+    return std::shared_ptr<char>(
+        reinterpret_cast<char*>(m_memoryProvider->Alloc(elementSize, numElements)),
+        [this](char* p)
+        {
+            m_memoryProvider->Free(p);
+        });
+}
+} } }
--- a/Source/Readers/Reader/SampleModePacker.h
+++ b/Source/Readers/Reader/SampleModePacker.h
@ -0,0 +1,42 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include "Reader.h"
+#include "MemoryProvider.h"
+#include "Transformer.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// A sample packer that densely packs samples in parallel for GPU consumptions.
+class SampleModePacker
+{
+public:
+    SampleModePacker(
+        MemoryProviderPtr memoryProvider,
+        TransformerPtr transformer,
+        size_t minibatchSize,
+        const std::vector<StreamDescriptionPtr>& streams);
+
+    Minibatch ReadMinibatch();
+
+private:
+    std::shared_ptr<char> AllocateBuffer(size_t numElements, size_t elementSize);
+    size_t GetSampleSize(StreamDescriptionPtr stream);
+    void CopySequenceToBuffer(size_t sequenceIndex, size_t streamIndex, const std::vector<std::vector<SequenceDataPtr>>& sequences);
+
+    MemoryProviderPtr m_memoryProvider;
+    TransformerPtr m_transformer;
+    std::vector<StreamDescriptionPtr> m_outputStreams;
+    std::vector<StreamDescriptionPtr> m_inputStreams;
+    std::vector<std::shared_ptr<char>> m_streamBuffers;
+
+    MBLayoutPtr m_minibatchLayout;
+    size_t m_minibatchSize;
+};
+
+typedef std::shared_ptr<SampleModePacker> SampleModePackerPtr;
+} } }
--- a/Source/Readers/Reader/StringUtils.h
+++ b/Source/Readers/Reader/StringUtils.h
@ -0,0 +1,22 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <string>
+#include <locale>
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// Compares two ASCII strings ignoring the case.
+// TODO: Should be moved to common CNTK library and after switching to boost, boost::iequal should be used instead.
+inline bool AreEqualIgnoreCase(const std::string& s1, const std::string& s2)
+{
+    return std::equal(s1.begin(), s1.end(), s2.begin(), [](const char& a, const char& b)
+                      {
+                          return std::tolower(a) == std::tolower(b);
+                      });
+}
+} } }
--- a/Source/Readers/Reader/Transformer.h
+++ b/Source/Readers/Reader/Transformer.h
@ -0,0 +1,61 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <vector>
+#include "DataDeserializer.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+class ConfigParameters;
+
+// Defines a set of sequences.
+struct Sequences
+{
+    Sequences()
+        : m_endOfEpoch(false)
+    {
+    }
+
+    // Data for up to a requested number of sequences.
+    // Indices in the inner vector have to correspond to the stream IDs
+    // given by GetStream().
+    std::vector<std::vector<SequenceDataPtr>> m_data;
+
+    // Indicates whether the epoch ends with the data returned.
+    bool m_endOfEpoch;
+};
+
+class Transformer;
+typedef std::shared_ptr<Transformer> TransformerPtr;
+
+// Defines a data transformation interface.
+// Transformers are responsible for doing custom transformation of sequences.
+// For example for images, there could be scale, crop, or median transformation.
+// TODO: Adopt to the C#/Java iterator pattern.
+class Transformer
+{
+public:
+    // Initialization.
+    virtual void Initialize(
+        TransformerPtr next,
+        const ConfigParameters& readerConfig) = 0;
+
+    // Describes streams the transformer produces.
+    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const = 0;
+
+    // Sets current epoch configuration.
+    virtual void StartEpoch(const EpochConfiguration& config) = 0;
+
+    // Gets next sequences up to a maximum count of samples.
+    // The return value can be used until the next call to GetNextSequences.
+    virtual Sequences GetNextSequences(size_t sampleCount) = 0;
+
+    virtual ~Transformer()
+    {
+    }
+};
+} } }
--- a/Source/Readers/Reader/TransformerBase.h
+++ b/Source/Readers/Reader/TransformerBase.h
@ -0,0 +1,104 @@
+//
+// <copyright company="Microsoft">
+//     Copyright (c) Microsoft Corporation.  All rights reserved.
+// </copyright>
+//
+
+#pragma once
+
+#include <set>
+
+#include "Transformer.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// Currently supports only dense data format.
+template <class TBufferElement>
+class TransformerBase : public Transformer
+{
+public:
+    // Initializes the transformer.
+    virtual void Initialize(TransformerPtr next,
+                            const ConfigParameters &) override
+    {
+        m_next = next;
+        m_inputStreams = m_next->GetStreamDescriptions();
+    }
+
+    // Sets configuration for the current epoch.
+    virtual void StartEpoch(const EpochConfiguration &config) override
+    {
+        assert(m_next != nullptr);
+        m_next->StartEpoch(config);
+    }
+
+    // Description of streams that the transformer provides.
+    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
+    {
+        return this->GetOutputStreams();
+    }
+
+    // Gets next sequences up to a maximum count of samples.
+    // Sequences contains data for all streams.
+    virtual Sequences GetNextSequences(size_t sampleCount) override
+    {
+        assert(m_next != nullptr);
+        Sequences samples = m_next->GetNextSequences(sampleCount);
+
+        if (samples.m_endOfEpoch)
+        {
+            return samples;
+        }
+
+        const auto &appliedStreamIds = GetAppliedStreamIds();
+        const auto &outputStreams = GetOutputStreams();
+        assert(m_inputStreams.size() == outputStreams.size());
+        m_buffer.resize(samples.m_data.size());
+
+#pragma omp parallel for ordered schedule(dynamic)
+        for (int i = 0; i < samples.m_data.size(); ++i)
+        {
+            auto &sample = samples.m_data[i];
+            assert(sample.size() == m_inputStreams.size());
+
+            m_buffer[i].resize(appliedStreamIds.size());
+            for (int j = 0; j < appliedStreamIds.size(); ++j)
+            {
+                size_t id = appliedStreamIds[j];
+                assert(m_inputStreams[id]->m_storageType == StorageType::dense);
+                const DenseSequenceData &sequence =
+                    reinterpret_cast<DenseSequenceData &>(*sample[id]);
+                sample[id] = Apply(sequence, *m_inputStreams[id], m_buffer[i][j],
+                                   *outputStreams[id]);
+            }
+        }
+
+        return samples;
+    }
+
+protected:
+    virtual const std::vector<StreamId> &GetAppliedStreamIds() const = 0;
+    virtual const std::vector<StreamDescriptionPtr> &GetOutputStreams() const
+    {
+        return m_inputStreams;
+    }
+
+    const std::vector<StreamDescriptionPtr> &GetInputStreams()
+    {
+        return m_inputStreams;
+    }
+
+private:
+    // Applies transformation to the sequence.
+    virtual SequenceDataPtr Apply(const DenseSequenceData &inputSequence,
+                                  const StreamDescription &inputStream,
+                                  TBufferElement &buffer,
+                                  const StreamDescription &outputStream) = 0;
+
+    TransformerPtr m_next;
+    std::vector<StreamId> m_featureStreamIds;
+    std::vector<std::vector<TBufferElement>> m_buffer;
+    std::vector<StreamDescriptionPtr> m_inputStreams;
+};
+
+}}}
--- a/Tests/UnitTests/ReaderTests/ReaderLibTests.cpp
+++ b/Tests/UnitTests/ReaderTests/ReaderLibTests.cpp
@ -0,0 +1,65 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#include "stdafx.h"
+
+#include "BlockRandomizer.h"
+#include "DataDeserializer.h"
+
+using namespace Microsoft::MSR::CNTK;
+
+namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {
+
+BOOST_AUTO_TEST_SUITE(ReaderLibTests)
+
+class MockDeserializer : public DataDeserializer
+{
+private:
+    SequenceDescriptions m_sequenceDescriptions;
+
+public:
+    std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
+    {
+        std::vector<StreamDescriptionPtr> result;
+        return result;
+    }
+
+    const SequenceDescriptions& GetSequenceDescriptions() const override
+    {
+        return m_sequenceDescriptions;
+    }
+
+    void StartEpoch(const EpochConfiguration& config) override
+    {
+        UNREFERENCED_PARAMETER(config);
+    }
+
+    std::vector<std::vector<SequenceDataPtr>> GetSequencesById(const std::vector<size_t>& ids) override
+    {
+        UNREFERENCED_PARAMETER(ids);
+        return std::vector<std::vector<SequenceDataPtr>>();
+    }
+
+    void RequireChunk(size_t chunkIndex) override
+    {
+        UNREFERENCED_PARAMETER(chunkIndex);
+    }
+
+    void ReleaseChunk(size_t chunkIndex) override
+    {
+        UNREFERENCED_PARAMETER(chunkIndex);
+    }
+};
+
+BOOST_AUTO_TEST_CASE(BlockRandomizerInstantiate)
+{
+    auto mockDeserializer = std::make_shared<MockDeserializer>();
+
+    auto randomizer = std::make_shared<BlockRandomizer>(0, SIZE_MAX, mockDeserializer);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+} } } }
--- a/Tests/UnitTests/ReaderTests/ReaderTests.vcxproj
+++ b/Tests/UnitTests/ReaderTests/ReaderTests.vcxproj
@ -75,14 +75,14 @@
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64' Or '$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">
    <LinkIncremental>false</LinkIncremental>
-    <IncludePath>..\..\..\Source\Common\include;..\..\..\Source\Math;$(IncludePath)</IncludePath>
+    <IncludePath>..\..\..\Source\Readers\Reader;..\..\..\Source\Common\include;..\..\..\Source\Math;$(IncludePath)</IncludePath>
    <LibraryPath>$(OutDir);$(LibraryPath)</LibraryPath>
    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\UnitTests\</OutDir>
    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64' Or '$(Configuration)|$(Platform)'=='Release_CpuOnly|x64'">
    <LinkIncremental>false</LinkIncremental>
-    <IncludePath>..\..\..\Source\Common\include;..\..\..\Source\Math;$(IncludePath)</IncludePath>
+    <IncludePath>..\..\..\Source\Readers\Reader;..\..\..\Source\Common\include;..\..\..\Source\Math;$(IncludePath)</IncludePath>
    <LibraryPath>$(OutDir);$(LibraryPath)</LibraryPath>
    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\UnitTests\</OutDir>
    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
@ -102,7 +102,7 @@
      <SubSystem>Console</SubSystem>
      <GenerateDebugInformation>true</GenerateDebugInformation>
      <AdditionalLibraryDirectories>$(BOOST_LIB_PATH);$(OutDir)..\;</AdditionalLibraryDirectories>
-      <AdditionalDependencies>htkmlfreader.lib;Math.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>htkmlfreader.lib;Math.lib;Reader.lib;%(AdditionalDependencies)</AdditionalDependencies>
      <OptimizeReferences>true</OptimizeReferences>
    </Link>
  </ItemDefinitionGroup>
@ -127,7 +127,7 @@
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
      <AdditionalLibraryDirectories>$(BOOST_LIB_PATH);$(OutDir)..\;</AdditionalLibraryDirectories>
-      <AdditionalDependencies>htkmlfreader.lib;Math.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>htkmlfreader.lib;Math.lib;Reader.lib;%(AdditionalDependencies)</AdditionalDependencies>
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="!$(UsesCuda)">
@ -149,6 +149,7 @@
    <ClCompile Include="..\..\..\Source\Common\fileutil.cpp" />
    <ClCompile Include="..\..\..\Source\Common\TimerUtility.cpp" />
    <ClCompile Include="HTKLMFReaderTests.cpp" />
+    <ClCompile Include="ReaderLibTests.cpp" />
    <ClCompile Include="stdafx.cpp">
      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'  Or '$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">Create</PrecompiledHeader>
      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64' Or '$(Configuration)|$(Platform)'=='Release_CpuOnly|x64'">Create</PrecompiledHeader>
--- a/Tests/UnitTests/ReaderTests/ReaderTests.vcxproj.filters
+++ b/Tests/UnitTests/ReaderTests/ReaderTests.vcxproj.filters
@ -32,6 +32,7 @@
    <ClCompile Include="..\..\..\Source\Common\Config.cpp">
      <Filter>Common</Filter>
    </ClCompile>
+    <ClCompile Include="ReaderLibTests.cpp" />
  </ItemGroup>
  <ItemGroup>
    <Filter Include="Common">