Add new image reader under new reader architecture

This commit is contained in:
Mark Hillebrand 2016-01-25 16:49:09 +01:00
Родитель 69112d13c8
Коммит 52d6444920
41 изменённых файлов: 3513 добавлений и 5 удалений

Просмотреть файл

@ -360,6 +360,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ReaderTests", "Tests\UnitTe
ProjectSection(ProjectDependencies) = postProject
{33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33D2FD22-DEF2-4507-A58A-368F641AEBE5}
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{F0A9637C-20DA-42F0-83D4-23B4704DE602} = {F0A9637C-20DA-42F0-83D4-23B4704DE602}
{E6646FFE-3588-4276-8A15-8D65C22711C1} = {E6646FFE-3588-4276-8A15-8D65C22711C1}
EndProjectSection
EndProject
@ -757,6 +758,17 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "03_ConvBatchNorm", "03_Conv
Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\testcases.yml = Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\testcases.yml
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NewImageReader", "Source\Readers\NewImageReader\NewImageReader.vcxproj", "{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{F0A9637C-20DA-42F0-83D4-23B4704DE602} = {F0A9637C-20DA-42F0-83D4-23B4704DE602}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Reader", "Source\Readers\Reader\Reader.vcxproj", "{F0A9637C-20DA-42F0-83D4-23B4704DE602}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug_CpuOnly|x64 = Debug_CpuOnly|x64
@ -947,6 +959,18 @@ Global
{41E11A59-62B2-4927-A4F8-F40B1B612C6C}.Release_CpuOnly|x64.Build.0 = Release_CpuOnly|x64
{41E11A59-62B2-4927-A4F8-F40B1B612C6C}.Release|x64.ActiveCfg = Release|x64
{41E11A59-62B2-4927-A4F8-F40B1B612C6C}.Release|x64.Build.0 = Release|x64
{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}.Debug_CpuOnly|x64.ActiveCfg = Debug|x64
{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}.Debug|x64.ActiveCfg = Debug|x64
{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}.Debug|x64.Build.0 = Debug|x64
{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}.Release_CpuOnly|x64.ActiveCfg = Release|x64
{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}.Release|x64.ActiveCfg = Release|x64
{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}.Release|x64.Build.0 = Release|x64
{F0A9637C-20DA-42F0-83D4-23B4704DE602}.Debug_CpuOnly|x64.ActiveCfg = Debug|x64
{F0A9637C-20DA-42F0-83D4-23B4704DE602}.Debug|x64.ActiveCfg = Debug|x64
{F0A9637C-20DA-42F0-83D4-23B4704DE602}.Debug|x64.Build.0 = Debug|x64
{F0A9637C-20DA-42F0-83D4-23B4704DE602}.Release_CpuOnly|x64.ActiveCfg = Release|x64
{F0A9637C-20DA-42F0-83D4-23B4704DE602}.Release|x64.ActiveCfg = Release|x64
{F0A9637C-20DA-42F0-83D4-23B4704DE602}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@ -1050,5 +1074,7 @@ Global
{6F1D0CE1-0F18-4B4C-9581-1F2146C8D300} = {63C6816D-66BF-487E-B541-094142C8272B}
{A0B366FE-2EEA-4E32-9AED-12C46409C30C} = {63C6816D-66BF-487E-B541-094142C8272B}
{BD783D50-47E2-485F-BDAF-29BD40D84645} = {63C6816D-66BF-487E-B541-094142C8272B}
{9BD0A711-0BBD-45B6-B81C-053F03C26CFB} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{F0A9637C-20DA-42F0-83D4-23B4704DE602} = {33EBFE78-A1A8-4961-8938-92A271941F94}
EndGlobalSection
EndGlobal

Просмотреть файл

@ -57,7 +57,7 @@ endif
CXX = mpic++
SOURCEDIR:= Source
INCLUDEPATH:= $(addprefix $(SOURCEDIR)/, Common/Include Math CNTK ActionsLib ComputationNetworkLib SGDLib SequenceTrainingLib CNTK/BrainScript)
INCLUDEPATH:= $(addprefix $(SOURCEDIR)/, Common/Include Math CNTK ActionsLib ComputationNetworkLib SGDLib SequenceTrainingLib CNTK/BrainScript Readers/Reader)
CPPFLAGS:= -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K
CXXFLAGS:= -msse3 -std=c++0x -std=c++11 -fopenmp -fpermissive -fPIC -Werror -fcheck-new
LIBPATH:=
@ -211,6 +211,12 @@ $(BUILDINFO): $(GENBUILD)
########################################
# Define all sources that need to be built
READER_SRC =\
$(SOURCEDIR)/Readers/Reader/SampleModePacker.cpp \
$(SOURCEDIR)/Readers/Reader/BlockRandomizer.cpp \
$(SOURCEDIR)/Readers/Reader/NoRandomizer.cpp \
$(SOURCEDIR)/Readers/Reader/ReaderShim.cpp \
COMMON_SRC =\
$(SOURCEDIR)/Common/Config.cpp \
$(SOURCEDIR)/Common/DataReader.cpp \
@ -249,6 +255,7 @@ MATH_SRC +=\
endif
MATH_SRC+=$(COMMON_SRC)
MATH_SRC+=$(READER_SRC)
MATH_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(MATH_SRC)))
@ -447,6 +454,32 @@ $(IMAGEREADER): $(IMAGEREADER_OBJ) | $(CNTKMATH_LIB)
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH) -lopencv_core -lopencv_imgproc -lopencv_imgcodecs
endif
########################################
# NewImageReader plugin
########################################
ifdef OPENCV_PATH
NEWIMAGEREADER_SRC =\
$(SOURCEDIR)/Readers/NewImageReader/Exports.cpp \
$(SOURCEDIR)/Readers/NewImageReader/ImageConfigHelper.cpp \
$(SOURCEDIR)/Readers/NewImageReader/ImageDataDeserializer.cpp \
$(SOURCEDIR)/Readers/NewImageReader/ImageTransformers.cpp \
$(SOURCEDIR)/Readers/NewImageReader/ImageReader.cpp \
NEWIMAGEREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(NEWIMAGEREADER_SRC))
NEWIMAGEREADER:=$(LIBDIR)/NewImageReader.so
ALL += $(NEWIMAGEREADER)
SRC+=$(NEWIMAGEREADER_SRC)
INCLUDEPATH += $(OPENCV_PATH)/include
LIBPATH += $(OPENCV_PATH)/release/lib
$(NEWIMAGEREADER): $(NEWIMAGEREADER_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH) -lopencv_core -lopencv_imgproc -lopencv_imgcodecs
endif
########################################
# 1bit SGD setup
########################################

Просмотреть файл

@ -1,5 +1,7 @@
#pragma once
#include <string>
#define MILLI_PER_SEC 1000
#define MICRO_PER_SEC 1000000
#define NANO_PER_SEC 1000000000
@ -33,4 +35,32 @@ private:
long long m_start;
long long m_end;
};
class ScopeTimer
{
Timer m_aggregateTimer;
size_t m_verbosity;
std::string m_message;
public:
ScopeTimer(size_t verbosity, const std::string& message)
: m_verbosity(verbosity), m_message(message)
{
if (m_verbosity > 2)
{
m_aggregateTimer.Start();
}
}
~ScopeTimer()
{
if (m_verbosity > 2)
{
m_aggregateTimer.Stop();
double time = m_aggregateTimer.ElapsedSeconds();
fprintf(stderr, m_message.c_str(), time);
}
}
};
} } }

Просмотреть файл

@ -0,0 +1,34 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// Exports.cpp : Defines the exported functions for the DLL application.
//
#include "stdafx.h"
#define DATAREADER_EXPORTS
#include "DataReader.h"
#include "ReaderShim.h"
#include "ImageReader.h"
#include "HeapMemoryProvider.h"
#include "CudaMemoryProvider.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// TODO: Memory provider should be injected by SGD.
auto factory = [](const ConfigParameters& parameters) -> ReaderPtr
{
return std::make_shared<ImageReader>(std::make_shared<HeapMemoryProvider>(), parameters);
};
extern "C" DATAREADER_API void GetReaderF(IDataReader<float>** preader)
{
*preader = new ReaderShim<float>(factory);
}
extern "C" DATAREADER_API void GetReaderD(IDataReader<double>** preader)
{
*preader = new ReaderShim<double>(factory);
}
} } }

Просмотреть файл

@ -0,0 +1,134 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "stdafx.h"
#include "ImageConfigHelper.h"
#include "StringUtils.h"
namespace Microsoft { namespace MSR { namespace CNTK {
std::vector<std::string> GetSectionsWithParameter(const ConfigParameters& config, const std::string& parameterName)
{
std::vector<std::string> sectionNames;
for (const std::pair<std::string, ConfigParameters>& section : config)
{
if (section.second.ExistsCurrent(parameterName))
{
sectionNames.push_back(section.first);
}
}
if (sectionNames.empty())
{
RuntimeError("ImageReader requires %s parameter.", parameterName.c_str());
}
return sectionNames;
}
ImageConfigHelper::ImageConfigHelper(const ConfigParameters& config)
: m_dataFormat(CHW)
{
std::vector<std::string> featureNames = GetSectionsWithParameter(config, "width");
std::vector<std::string> labelNames = GetSectionsWithParameter(config, "labelDim");
// REVIEW alexeyk: currently support only one feature and label section.
if (featureNames.size() != 1 || labelNames.size() != 1)
{
RuntimeError(
"ImageReader currently supports a single feature and label stream. '%d' features , '%d' labels found.",
static_cast<int>(featureNames.size()),
static_cast<int>(labelNames.size()));
}
ConfigParameters featureSection = config(featureNames[0]);
size_t w = featureSection("width");
size_t h = featureSection("height");
size_t c = featureSection("channels");
std::string mbFmt = featureSection("mbFormat", "nchw");
if (AreEqualIgnoreCase(mbFmt, "nhwc"))
{
m_dataFormat = HWC;
}
else if (!AreEqualIgnoreCase(mbFmt, "nchw"))
{
RuntimeError("ImageReader does not support the sample format '%s', only 'nchw' and 'nhwc' are supported.", mbFmt.c_str());
}
auto features = std::make_shared<StreamDescription>();
features->m_id = 0;
features->m_name = msra::strfun::utf16(featureSection.ConfigName());
features->m_sampleLayout = std::make_shared<TensorShape>(ImageDimensions(w, h, c).AsTensorShape(m_dataFormat));
m_streams.push_back(features);
ConfigParameters label = config(labelNames[0]);
size_t labelDimension = label("labelDim");
auto labelSection = std::make_shared<StreamDescription>();
labelSection->m_id = 1;
labelSection->m_name = msra::strfun::utf16(label.ConfigName());
labelSection->m_sampleLayout = std::make_shared<TensorShape>(labelDimension);
m_streams.push_back(labelSection);
m_mapPath = config(L"file");
std::string rand = config(L"randomize", "auto");
if (AreEqualIgnoreCase(rand, "auto"))
{
m_randomize = true;
}
else if (AreEqualIgnoreCase(rand, "none"))
{
m_randomize = false;
}
else
{
RuntimeError("'randomize' parameter must be set to 'auto' or 'none'");
}
// Identify precision
string precision = config.Find("precision", "float");
if (AreEqualIgnoreCase(precision, "float"))
{
features->m_elementType = ElementType::tfloat;
labelSection->m_elementType = ElementType::tfloat;
}
else if (AreEqualIgnoreCase(precision, "double"))
{
features->m_elementType = ElementType::tdouble;
labelSection->m_elementType = ElementType::tdouble;
}
else
{
RuntimeError("Not supported precision '%s'. Expected 'double' or 'float'.", precision.c_str());
}
m_cpuThreadCount = config(L"numCPUThreads", 0);
}
std::vector<StreamDescriptionPtr> ImageConfigHelper::GetStreams() const
{
return m_streams;
}
size_t ImageConfigHelper::GetFeatureStreamId() const
{
// Currently we only support a single feature/label stream, so the index is hard-wired.
return 0;
}
size_t ImageConfigHelper::GetLabelStreamId() const
{
// Currently we only support a single feature/label stream, so the index is hard-wired.
return 1;
}
std::string ImageConfigHelper::GetMapPath() const
{
return m_mapPath;
}
}}}

Просмотреть файл

@ -0,0 +1,61 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <string>
#include <vector>
#include "Config.h"
#include "Reader.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// A helper class for image specific parameters.
// A simple wrapper around CNTK ConfigParameters.
class ImageConfigHelper
{
public:
explicit ImageConfigHelper(const ConfigParameters& config);
// Get all streams that are specified in the configuration.
std::vector<StreamDescriptionPtr> GetStreams() const;
// Get index of the feature stream.
size_t GetFeatureStreamId() const;
// Get index of the label stream.
size_t GetLabelStreamId() const;
// Get the map file path that describes mapping of images into their labels.
std::string GetMapPath() const;
ImageLayoutKind GetDataFormat() const
{
return m_dataFormat;
}
int GetCpuThreadCount() const
{
return m_cpuThreadCount;
}
bool ShouldRandomize() const
{
return m_randomize;
}
private:
ImageConfigHelper(const ImageConfigHelper&) = delete;
ImageConfigHelper& operator=(const ImageConfigHelper&) = delete;
std::string m_mapPath;
std::vector<StreamDescriptionPtr> m_streams;
ImageLayoutKind m_dataFormat;
int m_cpuThreadCount;
bool m_randomize;
};
typedef std::shared_ptr<ImageConfigHelper> ImageConfigHelperPtr;
} } }

Просмотреть файл

@ -0,0 +1,209 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "stdafx.h"
#include <opencv2/opencv.hpp>
#include "ImageDataDeserializer.h"
#include "ImageConfigHelper.h"
#ifndef UNREFERENCED_PARAMETER
#define UNREFERENCED_PARAMETER(P) (P)
#endif
namespace Microsoft { namespace MSR { namespace CNTK {
class ImageDataDeserializer::LabelGenerator
{
public:
virtual void CreateLabelFor(size_t classId, SparseSequenceData& data) = 0;
virtual ~LabelGenerator() { }
};
// A helper class to generate a typed label in a sparse format.
// A label is just a category/class the image belongs to.
// It is represented as a array indexed by the category with zero values for all categories the image does not belong to,
// and a single one for a category it belongs to: [ 0, .. 0.. 1 .. 0 ]
// The class is parameterized because the representation of 1 is type specific.
template <class TElement>
class TypedLabelGenerator : public ImageDataDeserializer::LabelGenerator
{
public:
TypedLabelGenerator() : m_value(1)
{
}
virtual void CreateLabelFor(size_t classId, SparseSequenceData& data) override
{
data.m_indices.resize(1);
data.m_indices[0] = std::vector<size_t>{ classId };
data.m_data = &m_value;
}
private:
TElement m_value;
};
ImageDataDeserializer::ImageDataDeserializer(const ConfigParameters& config)
{
ImageConfigHelper configHelper(config);
m_streams = configHelper.GetStreams();
assert(m_streams.size() == 2);
const auto& label = m_streams[configHelper.GetLabelStreamId()];
const auto& feature = m_streams[configHelper.GetFeatureStreamId()];
// Expect data in HWC.
ImageDimensions dimensions(*feature->m_sampleLayout, configHelper.GetDataFormat());
feature->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(HWC));
label->m_storageType = StorageType::sparse_csc;
feature->m_storageType = StorageType::dense;
m_featureElementType = feature->m_elementType;
size_t labelDimension = label->m_sampleLayout->GetDim(0);
if (label->m_elementType == ElementType::tfloat)
{
m_labelGenerator = std::make_shared<TypedLabelGenerator<float>>();
}
else if (label->m_elementType == ElementType::tdouble)
{
m_labelGenerator = std::make_shared<TypedLabelGenerator<double>>();
}
else
{
RuntimeError("Unsupported label element type '%d'.", label->m_elementType);
}
CreateSequenceDescriptions(configHelper.GetMapPath(), labelDimension);
}
void ImageDataDeserializer::CreateSequenceDescriptions(std::string mapPath, size_t labelDimension)
{
UNREFERENCED_PARAMETER(labelDimension);
std::ifstream mapFile(mapPath);
if (!mapFile)
{
RuntimeError("Could not open %s for reading.", mapPath.c_str());
}
std::string line;
ImageSequenceDescription description;
description.m_numberOfSamples = 1;
description.m_isValid = true;
for (size_t lineIndex = 0; std::getline(mapFile, line); ++lineIndex)
{
std::stringstream ss(line);
std::string imagePath;
std::string classId;
if (!std::getline(ss, imagePath, '\t') || !std::getline(ss, classId, '\t'))
{
RuntimeError("Invalid map file format, must contain 2 tab-delimited columns: %s, line: %d.",
mapPath.c_str(),
static_cast<int>(lineIndex));
}
description.m_id = lineIndex;
description.m_chunkId = lineIndex;
description.m_path = imagePath;
description.m_classId = std::stoi(classId);
if (description.m_classId >= labelDimension)
{
RuntimeError(
"Image '%s' has invalid class id '%d'. Expected label dimension is '%d'.",
mapPath.c_str(),
static_cast<int>(description.m_classId),
static_cast<int>(labelDimension));
}
m_imageSequences.push_back(description);
}
}
std::vector<StreamDescriptionPtr> ImageDataDeserializer::GetStreamDescriptions() const
{
return m_streams;
}
std::vector<std::vector<SequenceDataPtr>> ImageDataDeserializer::GetSequencesById(const std::vector<size_t>& ids)
{
if (ids.empty())
{
RuntimeError("Number of requested sequences cannot be zero.");
}
m_currentImages.resize(ids.size());
m_labels.resize(ids.size());
std::vector<std::vector<SequenceDataPtr>> result;
result.resize(ids.size());
#pragma omp parallel for ordered schedule(dynamic)
for (int i = 0; i < ids.size(); ++i)
{
if (ids[i] >= m_imageSequences.size())
{
RuntimeError("Invalid sequence id is provided '%d', expected range [0..%d].",
static_cast<int>(ids[i]),
static_cast<int>(m_imageSequences.size()) - 1);
}
const auto& imageSequence = m_imageSequences[ids[i]];
// Construct image
m_currentImages[i] = std::move(cv::imread(imageSequence.m_path, cv::IMREAD_COLOR));
cv::Mat& cvImage = m_currentImages[i];
if (!cvImage.data)
{
RuntimeError("Cannot open file '%s'", imageSequence.m_path.c_str());
}
// Convert element type.
// TODO We should all native CV element types to be able to match the behavior of the old reader.
int dataType = m_featureElementType == ElementType::tfloat ? CV_32F : CV_64F;
if (cvImage.type() != CV_MAKETYPE(dataType, cvImage.channels()))
{
cvImage.convertTo(cvImage, dataType);
}
if (!cvImage.isContinuous())
{
cvImage = cvImage.clone();
}
assert(cvImage.isContinuous());
ImageDimensions dimensions(cvImage.cols, cvImage.rows, cvImage.channels());
auto image = std::make_shared<DenseSequenceData>();
image->m_data = cvImage.data;
image->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(HWC));
image->m_numberOfSamples = 1;
if (m_labels[i] == nullptr)
{
m_labels[i] = std::make_shared<SparseSequenceData>();
}
m_labelGenerator->CreateLabelFor(imageSequence.m_classId, *m_labels[i]);
result[i] = std::move(std::vector<SequenceDataPtr>{image, m_labels[i]});
}
return result;
}
void ImageDataDeserializer::FillSequenceDescriptions(SequenceDescriptions& timeline) const
{
timeline.resize(m_imageSequences.size());
std::transform(
m_imageSequences.begin(),
m_imageSequences.end(),
timeline.begin(),
[](const ImageSequenceDescription& desc)
{
return &desc;
});
}
}}}

Просмотреть файл

@ -0,0 +1,61 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <opencv2/core/mat.hpp>
#include "DataDeserializerBase.h"
#include "Config.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// Image data deserializer based on the OpenCV library.
// The deserializer currently supports two output streams only: a feature and a label stream.
// All sequences consist only of a single sample (image/label).
// For features it uses dense storage format with different layout (dimensions) per sequence.
// For labels it uses the csc sparse storage format.
class ImageDataDeserializer : public DataDeserializerBase
{
public:
explicit ImageDataDeserializer(const ConfigParameters& config);
// Description of streams that this data deserializer provides.
std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override;
// Get sequences by specified ids. Order of returned sequences corresponds to the order of provided ids.
std::vector<std::vector<SequenceDataPtr>> GetSequencesById(const std::vector<size_t>& ids) override;
protected:
void FillSequenceDescriptions(SequenceDescriptions& timeline) const override;
private:
// Creates a set of sequence descriptions.
void CreateSequenceDescriptions(std::string mapPath, size_t labelDimension);
// Image sequence descriptions. Currently, a sequence contains a single sample only.
struct ImageSequenceDescription : public SequenceDescription
{
std::string m_path;
size_t m_classId;
};
// A helper class for generation of type specific labels (currently float/double only).
class LabelGenerator;
typedef std::shared_ptr<LabelGenerator> LabelGeneratorPtr;
LabelGeneratorPtr m_labelGenerator;
// Sequence descriptions for all input data.
std::vector<ImageSequenceDescription> m_imageSequences;
// Buffer to store label data.
std::vector<SparseSequenceDataPtr> m_labels;
// Buffer to store feature data.
std::vector<cv::Mat> m_currentImages;
// Element type of the feature/label stream (currently float/double only).
ElementType m_featureElementType;
};
}}}

Просмотреть файл

@ -0,0 +1,97 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "stdafx.h"
#include "ImageReader.h"
#include "Config.h"
#include "ImageConfigHelper.h"
#include "ImageTransformers.h"
#include "BlockRandomizer.h"
#include "NoRandomizer.h"
#include "ImageDataDeserializer.h"
#include <omp.h>
namespace Microsoft { namespace MSR { namespace CNTK {
ImageReader::ImageReader(MemoryProviderPtr provider,
const ConfigParameters& config)
: m_seed(0), m_provider(provider)
{
// In the future, deserializers and transformers will be dynamically loaded
// from external libraries based on the configuration/brain script.
// We will provide ability to implement the transformer and
// deserializer interface not only in C++ but in scripting languages as well.
ImageConfigHelper configHelper(config);
m_streams = configHelper.GetStreams();
assert(m_streams.size() == 2);
int threadCount = configHelper.GetCpuThreadCount();
if (threadCount > 0)
{
omp_set_num_threads(threadCount);
}
auto deserializer = std::make_shared<ImageDataDeserializer>(config);
TransformerPtr randomizer;
if (configHelper.ShouldRandomize())
{
randomizer = std::make_shared<BlockRandomizer>(0, SIZE_MAX, deserializer);
}
else
{
randomizer = std::make_shared<NoRandomizer>(deserializer);
}
randomizer->Initialize(nullptr, config);
auto cropper = std::make_shared<CropTransformer>();
cropper->Initialize(randomizer, config);
auto scaler = std::make_shared<ScaleTransformer>();
scaler->Initialize(cropper, config);
auto mean = std::make_shared<MeanTransformer>();
mean->Initialize(scaler, config);
TransformerPtr last = mean;
if (configHelper.GetDataFormat() == CHW)
{
last = std::make_shared<TransposeTransformer>();
last->Initialize(mean, config);
}
m_transformer = last;
}
std::vector<StreamDescriptionPtr> ImageReader::GetStreamDescriptions()
{
assert(!m_streams.empty());
return m_streams;
}
void ImageReader::StartEpoch(const EpochConfiguration& config)
{
if (config.m_totalEpochSizeInSamples <= 0)
{
RuntimeError("Unsupported minibatch size '%d'.",
static_cast<int>(config.m_totalEpochSizeInSamples));
}
m_transformer->StartEpoch(config);
m_packer = std::make_shared<SampleModePacker>(
m_provider,
m_transformer,
config.m_minibatchSizeInSamples,
m_streams);
}
Minibatch ImageReader::ReadMinibatch()
{
assert(m_packer != nullptr);
return m_packer->ReadMinibatch();
}
} } }

Просмотреть файл

@ -0,0 +1,49 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include "Reader.h"
#include "ImageTransformers.h"
#include "SampleModePacker.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// Implementation of the image reader.
// Effectively the class represents a factory for connecting the packer,
// transformers and deserialzier together.
class ImageReader : public Reader
{
public:
ImageReader(MemoryProviderPtr provider,
const ConfigParameters& parameters);
// Description of streams that this reader provides.
std::vector<StreamDescriptionPtr> GetStreamDescriptions() override;
// Starts a new epoch with the provided configuration.
void StartEpoch(const EpochConfiguration& config) override;
// Reads a single minibatch.
Minibatch ReadMinibatch() override;
private:
// All streams this reader provides.
std::vector<StreamDescriptionPtr> m_streams;
// A head transformer in a list of transformers.
TransformerPtr m_transformer;
// Packer.
SampleModePackerPtr m_packer;
// Seed for the random generator.
unsigned int m_seed;
// Memory provider (TODO: this will possibly change in the near future.)
MemoryProviderPtr m_provider;
};
}}}

Просмотреть файл

@ -0,0 +1,451 @@
//
// <copyright company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#include "stdafx.h"
#include <algorithm>
#include <unordered_map>
#include <random>
#include "ImageTransformers.h"
#include "Config.h"
#include "ConcStack.h"
#include "ImageConfigHelper.h"
#include "StringUtils.h"
#include "ElementTypeUtils.h"
namespace Microsoft { namespace MSR { namespace CNTK
{
void ImageTransformerBase::Initialize(TransformerPtr next,
const ConfigParameters &readerConfig)
{
Base::Initialize(next, readerConfig);
m_seed = std::stoi(readerConfig(L"seed", "0"));
ImageConfigHelper config(readerConfig);
size_t featureStreamId = config.GetFeatureStreamId();
m_appliedStreamIds.push_back(featureStreamId);
const auto &inputStreams = GetInputStreams();
m_outputStreams.resize(inputStreams.size());
std::copy(inputStreams.begin(), inputStreams.end(), m_outputStreams.begin());
}
SequenceDataPtr
ImageTransformerBase::Apply(const DenseSequenceData &inputSequence,
const StreamDescription &inputStream, cv::Mat &buffer,
const StreamDescription & /*outputStream*/)
{
ImageDimensions dimensions(*inputSequence.m_sampleLayout, HWC);
int columns = static_cast<int>(dimensions.m_width);
int rows = static_cast<int>(dimensions.m_height);
int channels = static_cast<int>(dimensions.m_numChannels);
int typeId = 0;
if (inputStream.m_elementType == ElementType::tdouble)
{
typeId = CV_64F;
}
else if (inputStream.m_elementType == ElementType::tfloat)
{
typeId = CV_32F;
}
else
{
RuntimeError("Unsupported type");
}
int type = CV_MAKETYPE(typeId, channels);
buffer = cv::Mat(rows, columns, type, inputSequence.m_data);
this->Apply(buffer);
if (!buffer.isContinuous())
{
buffer = buffer.clone();
}
assert(buffer.isContinuous());
auto result = std::make_shared<DenseSequenceData>();
ImageDimensions outputDimensions(buffer.cols, buffer.rows, buffer.channels());
result->m_sampleLayout = std::make_shared<TensorShape>(outputDimensions.AsTensorShape(HWC));
result->m_numberOfSamples = inputSequence.m_numberOfSamples;
result->m_data = buffer.ptr();
return result;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void CropTransformer::Initialize(TransformerPtr next,
const ConfigParameters &readerConfig)
{
ImageTransformerBase::Initialize(next, readerConfig);
auto featureStreamIds = GetAppliedStreamIds();
if (featureStreamIds.size() != 1)
{
RuntimeError("Only a single feature stream is supported.");
}
InitFromConfig(readerConfig(GetInputStreams()[featureStreamIds[0]]->m_name));
}
void CropTransformer::InitFromConfig(const ConfigParameters &config)
{
m_cropType = ParseCropType(config(L"cropType", ""));
floatargvector cropRatio = config(L"cropRatio", "1.0");
m_cropRatioMin = cropRatio[0];
m_cropRatioMax = cropRatio[1];
if (!(0 < m_cropRatioMin && m_cropRatioMin <= 1.0) ||
!(0 < m_cropRatioMax && m_cropRatioMax <= 1.0) ||
m_cropRatioMin > m_cropRatioMax)
{
RuntimeError("Invalid cropRatio value, must be > 0 and <= 1. cropMin must "
"<= cropMax");
}
m_jitterType = ParseJitterType(config(L"jitterType", ""));
if (!config.ExistsCurrent(L"hflip"))
{
m_hFlip = m_cropType == CropType::Random;
}
else
{
m_hFlip = std::stoi(config(L"hflip")) != 0;
}
}
void CropTransformer::Apply(cv::Mat &mat)
{
auto seed = GetSeed();
auto rng = m_rngs.pop_or_create(
[seed]()
{
return std::make_unique<std::mt19937>(seed);
});
double ratio = 1;
switch (m_jitterType)
{
case RatioJitterType::None:
ratio = m_cropRatioMin;
break;
case RatioJitterType::UniRatio:
if (m_cropRatioMin == m_cropRatioMax)
{
ratio = m_cropRatioMin;
}
else
{
ratio = UniRealT(m_cropRatioMin, m_cropRatioMax)(*rng);
assert(m_cropRatioMin <= ratio && ratio < m_cropRatioMax);
}
break;
default:
RuntimeError("Jitter type currently not implemented.");
}
mat = mat(GetCropRect(m_cropType, mat.rows, mat.cols, ratio, *rng));
if (m_hFlip && std::bernoulli_distribution()(*rng))
{
cv::flip(mat, mat, 1);
}
m_rngs.push(std::move(rng));
}
CropTransformer::CropType
CropTransformer::ParseCropType(const std::string &src)
{
if (src.empty() || AreEqualIgnoreCase(src, "center"))
{
return CropType::Center;
}
if (AreEqualIgnoreCase(src, "random"))
{
return CropType::Random;
}
RuntimeError("Invalid crop type: %s.", src.c_str());
}
CropTransformer::RatioJitterType
CropTransformer::ParseJitterType(const std::string &src)
{
if (src.empty() || AreEqualIgnoreCase(src, "none"))
{
return RatioJitterType::None;
}
if (AreEqualIgnoreCase(src, "uniratio"))
{
return RatioJitterType::UniRatio;
}
if (AreEqualIgnoreCase(src, "unilength"))
{
return RatioJitterType::UniLength;
}
if (AreEqualIgnoreCase(src, "uniarea"))
{
return RatioJitterType::UniArea;
}
RuntimeError("Invalid jitter type: %s.", src.c_str());
}
cv::Rect CropTransformer::GetCropRect(CropType type, int crow, int ccol,
double cropRatio, std::mt19937 &rng)
{
assert(crow > 0);
assert(ccol > 0);
assert(0 < cropRatio && cropRatio <= 1.0);
int cropSize = static_cast<int>(std::min(crow, ccol) * cropRatio);
int xOff = -1;
int yOff = -1;
switch (type)
{
case CropType::Center:
xOff = (ccol - cropSize) / 2;
yOff = (crow - cropSize) / 2;
break;
case CropType::Random:
xOff = UniIntT(0, ccol - cropSize)(rng);
yOff = UniIntT(0, crow - cropSize)(rng);
break;
default:
assert(false);
}
assert(0 <= xOff && xOff <= ccol - cropSize);
assert(0 <= yOff && yOff <= crow - cropSize);
return cv::Rect(xOff, yOff, cropSize, cropSize);
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void ScaleTransformer::Initialize(TransformerPtr next,
const ConfigParameters &readerConfig)
{
ImageTransformerBase::Initialize(next, readerConfig);
m_interpMap.emplace("nearest", cv::INTER_NEAREST);
m_interpMap.emplace("linear", cv::INTER_LINEAR);
m_interpMap.emplace("cubic", cv::INTER_CUBIC);
m_interpMap.emplace("lanczos", cv::INTER_LANCZOS4);
auto featureStreamIds = GetAppliedStreamIds();
if (featureStreamIds.size() != 1)
{
RuntimeError("Only a single feature stream is supported.");
}
const auto &feature = GetInputStreams()[featureStreamIds[0]];
m_dataType = feature->m_elementType == ElementType::tfloat ? CV_32F : CV_64F;
InitFromConfig(readerConfig(feature->m_name));
}
void ScaleTransformer::InitFromConfig(const ConfigParameters &config)
{
m_imgWidth = config(L"width");
m_imgHeight = config(L"height");
m_imgChannels = config(L"channels");
size_t cfeat = m_imgWidth * m_imgHeight * m_imgChannels;
if (cfeat == 0 || cfeat > std::numeric_limits<size_t>().max() / 2)
RuntimeError("Invalid image dimensions.");
m_interp.clear();
std::stringstream ss{config(L"interpolations", "")};
for (std::string token = ""; std::getline(ss, token, ':');)
{
// Explicit cast required for GCC.
std::transform(token.begin(), token.end(), token.begin(),
(int (*) (int)) std::tolower);
StrToIntMapT::const_iterator res = m_interpMap.find(token);
if (res != m_interpMap.end())
m_interp.push_back((*res).second);
}
if (m_interp.size() == 0)
m_interp.push_back(cv::INTER_LINEAR);
}
void ScaleTransformer::Apply(cv::Mat &mat)
{
// If matrix has not been converted to the right type, do it now as rescaling
// requires floating point type.
//
if (mat.type() != CV_MAKETYPE(m_dataType, m_imgChannels))
{
mat.convertTo(mat, m_dataType);
}
auto seed = GetSeed();
auto rng = m_rngs.pop_or_create(
[seed]()
{
return std::make_unique<std::mt19937>(seed);
});
auto index = UniIntT(0, static_cast<int>(m_interp.size()) - 1)(*rng);
assert(m_interp.size() > 0);
cv::resize(
mat, mat,
cv::Size(static_cast<int>(m_imgWidth), static_cast<int>(m_imgHeight)), 0,
0, m_interp[index]);
m_rngs.push(std::move(rng));
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void MeanTransformer::Initialize(TransformerPtr next,
const ConfigParameters &readerConfig)
{
ImageTransformerBase::Initialize(next, readerConfig);
auto featureStreamIds = GetAppliedStreamIds();
if (featureStreamIds.size() != 1)
{
RuntimeError("Only a single feature stream is supported.");
}
InitFromConfig(readerConfig(GetInputStreams()[featureStreamIds[0]]->m_name));
}
void MeanTransformer::InitFromConfig(const ConfigParameters &config)
{
std::wstring meanFile = config(L"meanFile", L"");
if (meanFile.empty())
m_meanImg.release();
else
{
cv::FileStorage fs;
// REVIEW alexeyk: this sort of defeats the purpose of using wstring at
// all... [fseide] no, only OpenCV has this problem.
fs.open(msra::strfun::utf8(meanFile).c_str(), cv::FileStorage::READ);
if (!fs.isOpened())
RuntimeError("Could not open file: %ls", meanFile.c_str());
fs["MeanImg"] >> m_meanImg;
int cchan;
fs["Channel"] >> cchan;
int crow;
fs["Row"] >> crow;
int ccol;
fs["Col"] >> ccol;
if (cchan * crow * ccol !=
m_meanImg.channels() * m_meanImg.rows * m_meanImg.cols)
RuntimeError("Invalid data in file: %ls", meanFile.c_str());
fs.release();
m_meanImg = m_meanImg.reshape(cchan, crow);
}
}
void MeanTransformer::Apply(cv::Mat &mat)
{
assert(m_meanImg.size() == cv::Size(0, 0) ||
(m_meanImg.size() == mat.size() &&
m_meanImg.channels() == mat.channels()));
// REVIEW alexeyk: check type conversion (float/double).
if (m_meanImg.size() == mat.size())
{
mat = mat - m_meanImg;
}
}
void TransposeTransformer::Initialize(TransformerPtr next,
const ConfigParameters &readerConfig)
{
Base::Initialize(next, readerConfig);
// Currently we only support a single stream.
ImageConfigHelper config(readerConfig);
size_t featureStreamId = config.GetFeatureStreamId();
m_appliedStreamIds.push_back(featureStreamId);
const auto &inputStreams = GetInputStreams();
m_outputStreams.resize(inputStreams.size());
std::copy(inputStreams.begin(), inputStreams.end(), m_outputStreams.begin());
for (auto id : m_appliedStreamIds)
{
auto &stream = inputStreams[id];
ImageDimensions dimensions(*stream->m_sampleLayout, HWC);
// Changing layout from NWH to NHW
auto changedStream = std::make_shared<StreamDescription>(*stream);
changedStream->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(CHW));
m_outputStreams[id] = changedStream;
}
}
SequenceDataPtr
TransposeTransformer::Apply(const DenseSequenceData &inputSequence,
const StreamDescription &inputStream,
vector<char> &buffer,
const StreamDescription &outputStream)
{
if (inputStream.m_elementType == ElementType::tdouble)
{
return TypedApply<double>(inputSequence, inputStream, buffer, outputStream);
}
if (inputStream.m_elementType == ElementType::tfloat)
{
return TypedApply<float>(inputSequence, inputStream, buffer, outputStream);
}
RuntimeError("Unsupported type");
}
template <class TElement>
SequenceDataPtr
TransposeTransformer::TypedApply(const DenseSequenceData &inputSequence,
const StreamDescription &inputStream,
vector<char> &buffer,
const StreamDescription &outputStream)
{
assert(inputSequence.m_numberOfSamples == 1);
assert(inputStream.m_sampleLayout->GetNumElements() ==
outputStream.m_sampleLayout->GetNumElements());
size_t count = inputStream.m_sampleLayout->GetNumElements() * GetSizeByType(inputStream.m_elementType);
buffer.resize(count);
TElement* typedBuffer = reinterpret_cast<TElement*>(&buffer[0]);
ImageDimensions dimensions(*inputStream.m_sampleLayout, ImageLayoutKind::HWC);
size_t rowCount = dimensions.m_height * dimensions.m_width;
size_t channelCount = dimensions.m_numChannels;
TElement* data = reinterpret_cast<TElement*>(inputSequence.m_data);
for (size_t rowIndex = 0; rowIndex < rowCount; rowIndex++)
{
for (size_t columnIndex = 0; columnIndex < channelCount;
columnIndex++)
{
typedBuffer[columnIndex * rowCount + rowIndex] =
data[rowIndex * channelCount + columnIndex];
}
}
auto result = std::make_shared<DenseSequenceData>();
result->m_sampleLayout = outputStream.m_sampleLayout;
result->m_data = &buffer[0];
result->m_numberOfSamples = inputSequence.m_numberOfSamples;
return result;
}
}}}

Просмотреть файл

@ -0,0 +1,177 @@
//
// <copyright company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#include <unordered_map>
#include <random>
#include <opencv2/opencv.hpp>
#include "Transformer.h"
#include "ConcStack.h"
#include "TransformerBase.h"
namespace Microsoft { namespace MSR { namespace CNTK {
class ConfigParameters;
// Base class for image transformations based on OpenCV
// that helps to wrap the sequences into OpenCV::Mat class.
class ImageTransformerBase : public TransformerBase<cv::Mat>
{
public:
// Initializes the transformer.
virtual void Initialize(TransformerPtr next,
const ConfigParameters &readerConfig) override;
protected:
virtual const std::vector<StreamId> &GetAppliedStreamIds() const override
{
return m_appliedStreamIds;
}
virtual const std::vector<StreamDescriptionPtr>& GetOutputStreams() const override
{
return m_outputStreams;
}
// Seed getter.
unsigned int GetSeed() const
{
return m_seed;
}
using Base = TransformerBase<cv::Mat>;
using UniRealT = std::uniform_real_distribution<double>;
using UniIntT = std::uniform_int_distribution<int>;
// Applies transformation to the sequence.
SequenceDataPtr Apply(const DenseSequenceData &inputSequence,
const StreamDescription &inputStream, cv::Mat &buffer,
const StreamDescription &outputStream) override;
// The only function that should be redefined by the inherited classes.
virtual void Apply(cv::Mat &from) = 0;
private:
std::vector<StreamDescriptionPtr> m_outputStreams;
std::vector<StreamId> m_appliedStreamIds;
unsigned int m_seed;
};
// Crop transformation of the image.
// Can work on images of any size.
class CropTransformer : public ImageTransformerBase
{
public:
virtual void Initialize(TransformerPtr next,
const ConfigParameters &readerConfig) override;
protected:
virtual void Apply(cv::Mat &mat) override;
private:
enum class CropType
{
Center = 0,
Random = 1
};
enum class RatioJitterType
{
None = 0,
UniRatio = 1,
UniLength = 2,
UniArea = 3
};
void InitFromConfig(const ConfigParameters &config);
CropType ParseCropType(const std::string &src);
RatioJitterType ParseJitterType(const std::string &src);
cv::Rect GetCropRect(CropType type, int crow, int ccol, double cropRatio,
std::mt19937 &rng);
conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
CropType m_cropType;
double m_cropRatioMin;
double m_cropRatioMax;
RatioJitterType m_jitterType;
bool m_hFlip;
};
// Scale transformation of the image.
// Scales the image to the dimensions requested by the network.
class ScaleTransformer : public ImageTransformerBase
{
public:
virtual void Initialize(TransformerPtr next,
const ConfigParameters &readerConfig) override;
private:
void InitFromConfig(const ConfigParameters &config);
virtual void Apply(cv::Mat &mat) override;
using StrToIntMapT = std::unordered_map<std::string, int>;
StrToIntMapT m_interpMap;
std::vector<int> m_interp;
conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
int m_dataType;
size_t m_imgWidth;
size_t m_imgHeight;
size_t m_imgChannels;
};
// Mean transformation.
class MeanTransformer : public ImageTransformerBase
{
public:
virtual void Initialize(TransformerPtr next,
const ConfigParameters &readerConfig) override;
private:
virtual void Apply(cv::Mat &mat) override;
void InitFromConfig(const ConfigParameters &config);
cv::Mat m_meanImg;
};
// Transpose transformation from HWC to CHW.
class TransposeTransformer : public TransformerBase<vector<char>>
{
public:
virtual void Initialize(TransformerPtr next,
const ConfigParameters &readerConfig) override;
protected:
virtual const std::vector<StreamId>& GetAppliedStreamIds() const override
{
return m_appliedStreamIds;
}
virtual const std::vector<StreamDescriptionPtr>& GetOutputStreams() const override
{
return m_outputStreams;
}
SequenceDataPtr Apply(const DenseSequenceData &inputSequence,
const StreamDescription &inputStream,
vector<char> &buffer,
const StreamDescription &outputStream) override;
private:
using Base = TransformerBase<vector<char>>;
template <class TElement>
SequenceDataPtr TypedApply(const DenseSequenceData &inputSequence,
const StreamDescription &inputStream,
vector<char> &buffer,
const StreamDescription &outputStream);
std::vector<StreamDescriptionPtr> m_outputStreams;
std::vector<StreamId> m_appliedStreamIds;
};
}}}

Просмотреть файл

@ -0,0 +1,145 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" InitialTargets="CheckDependencies" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>ImageReader</RootNamespace>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<PlatformToolset>v120</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Choose>
<When Condition="Exists('$(OPENCV_PATH)')">
<PropertyGroup>
<HasOpenCV>true</HasOpenCV>
</PropertyGroup>
</When>
<Otherwise>
<PropertyGroup>
<HasOpenCV>false</HasOpenCV>
</PropertyGroup>
</Otherwise>
</Choose>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IncludePath>..\..\common\include;..\..\math;$(OPENCV_PATH)\include;$(IncludePath);</IncludePath>
<LibraryPath>$(SolutionDir)$(Platform)\$(Configuration);$(OPENCV_PATH)\x64\vc12\lib;$(LibraryPath);</LibraryPath>
<IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
<OpenCVLib Condition="$(HasOpenCV)">opencv_world300.lib</OpenCVLib>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<PreprocessorDefinitions>WIN32;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<TreatWarningAsError>true</TreatWarningAsError>
<OpenMPSupport>true</OpenMPSupport>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>Reader.lib;Math.lib;$(OpenCVLib);%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<PostBuildEvent>
<Command Condition="$(HasOpenCV)">xcopy /Y $(OPENCV_PATH)\x64\vc12\bin\opencv_world300.dll $(TargetDir)</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>../Reader</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
<AdditionalIncludeDirectories>../Reader</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<Profile>true</Profile>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="..\..\Common\Include\basetypes.h" />
<ClInclude Include="..\..\Common\Include\DataReader.h" />
<ClInclude Include="..\..\Common\Include\File.h" />
<ClInclude Include="..\..\Common\Include\fileutil.h" />
<ClInclude Include="ImageConfigHelper.h" />
<ClInclude Include="ImageDataDeserializer.h" />
<ClInclude Include="ImageReader.h" />
<ClInclude Include="ImageTransformers.h" />
<ClInclude Include="stdafx.h" />
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\Common\DataReader.cpp" />
<ClCompile Include="..\..\Common\File.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\DebugUtil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\Config.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="ImageConfigHelper.cpp" />
<ClCompile Include="ImageDataDeserializer.cpp" />
<ClCompile Include="dllmain.cpp" />
<ClCompile Include="Exports.cpp">
<ExcludedFromBuild Condition="!$(HasOpenCV)">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="ImageReader.cpp" />
<ClCompile Include="ImageTransformers.cpp" />
<ClCompile Include="stdafx.cpp">
<PrecompiledHeader>Create</PrecompiledHeader>
</ClCompile>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
<Target Name="CheckDependencies">
<Warning Condition="!$(HasOpenCV)" Text="ImageReader requires OpenCV library v3.0 or higher to build. Please install the library from http://opencv.org/downloads.html and set OPENCV_PATH environment variable to OpenCV build folder (e.g. C:\src\opencv\build)." />
</Target>
</Project>

Просмотреть файл

@ -0,0 +1,55 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="Exports.cpp" />
<ClCompile Include="stdafx.cpp" />
<ClCompile Include="..\..\Common\DataReader.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\File.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="dllmain.cpp" />
<ClCompile Include="ImageTransformers.cpp" />
<ClCompile Include="ImageDataDeserializer.cpp" />
<ClCompile Include="ImageReader.cpp" />
<ClCompile Include="ImageConfigHelper.cpp" />
<ClCompile Include="..\..\Common\DebugUtil.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\Config.cpp">
<Filter>Common</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="stdafx.h" />
<ClInclude Include="targetver.h" />
<ClInclude Include="..\..\Common\Include\basetypes.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\DataReader.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\File.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\fileutil.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="ImageTransformers.h" />
<ClInclude Include="ImageDataDeserializer.h" />
<ClInclude Include="ImageReader.h" />
<ClInclude Include="ImageConfigHelper.h" />
</ItemGroup>
<ItemGroup>
<Filter Include="Common">
<UniqueIdentifier>{0D0EFA10-72A8-4078-840A-B7F76AFEC0A4}</UniqueIdentifier>
</Filter>
<Filter Include="Common\Include">
<UniqueIdentifier>{C6F55578-121A-4D7C-8F57-4172BC5C463B}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>

Просмотреть файл

@ -0,0 +1,12 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// dllmain.cpp : Defines the entry point for the DLL application.
//
#include "stdafx.h"
BOOL APIENTRY DllMain(HMODULE /*hModule*/, DWORD /*ul_reason_for_call*/, LPVOID /*lpReserved*/)
{
return TRUE;
}

Просмотреть файл

@ -0,0 +1,13 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// stdafx.cpp : source file that includes just the standard includes
// ParseNumber.pch will be the pre-compiled header
// stdafx.obj will contain the pre-compiled type information
//
#include "stdafx.h"
// TODO: reference any additional headers you need in STDAFX.H
// and not in this file

Просмотреть файл

@ -0,0 +1,21 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// stdafx.h : include file for standard system include files,
// or project specific include files that are used frequently, but
// are changed infrequently
//
#pragma once
#include "Platform.h"
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms
#include "targetver.h"
#ifdef __WINDOWS__
#include "windows.h"
#endif
#include <stdio.h>
#include <math.h>
// TODO: reference additional headers your program requires here

Просмотреть файл

@ -0,0 +1,14 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
// Including SDKDDKVer.h defines the highest available Windows platform.
// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
#ifdef __WINDOWS__
#include <SDKDDKVer.h>
#endif

Просмотреть файл

@ -0,0 +1,398 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#define _CRT_SECURE_NO_WARNINGS
#include "BlockRandomizer.h"
#include <algorithm>
#include <utility>
#include <iostream>
#include "DataReader.h"
#include <random>
#ifndef UNREFERENCED_PARAMETER
#define UNREFERENCED_PARAMETER(P) (P)
#endif
namespace Microsoft { namespace MSR { namespace CNTK {
static inline size_t rand(const size_t begin, const size_t end)
{
// eldak: this has already been changed by Alexey(alrezni)
// still only covers 32-bit range
const size_t randomNumber = ::rand() * RAND_MAX + ::rand();
return begin + randomNumber % (end - begin);
}
bool BlockRandomizer::TimelineIsValidForRandomization(const SequenceDescriptions& timeline) const
{
SequenceDescription previous = { SIZE_MAX, 0, 0, true };
auto it = std::find_if_not(timeline.begin(), timeline.end(),
[&](const SequenceDescription* current)
{
bool result = current->m_isValid
&& previous.m_id + 1 == current->m_id
&& previous.m_chunkId <= current->m_chunkId
&& current->m_chunkId <= previous.m_chunkId + 1
&& 0 < current->m_numberOfSamples;
previous = *current;
return result;
});
return it == timeline.end();
}
void BlockRandomizer::RandomizeChunks()
{
// Create vector of chunk indices and shuffle them using current sweep as seed
std::vector<size_t> randomizedChunkIndices;
randomizedChunkIndices.reserve(m_numChunks);
for (size_t i = 0; i < m_numChunks; i++)
{
randomizedChunkIndices.push_back(i);
}
std::mt19937 m_rng(static_cast<int>(m_sweep));
std::shuffle(randomizedChunkIndices.begin(), randomizedChunkIndices.end(), m_rng);
// Place randomized chunks on global time line
m_randomizedChunks.clear();
m_randomizedChunks.reserve(m_numChunks + 1);
size_t chunkId, samplePosition, sequencePosition;
for (chunkId = 0, samplePosition = m_sweepStartInSamples, sequencePosition = 0; chunkId < m_numChunks; chunkId++)
{
const size_t originalChunkIndex = randomizedChunkIndices[chunkId];
const size_t numSequences =
m_chunkInformation[originalChunkIndex + 1].m_sequencePositionStart -
m_chunkInformation[originalChunkIndex].m_sequencePositionStart;
const size_t numSamples =
m_chunkInformation[originalChunkIndex + 1].m_samplePositionStart -
m_chunkInformation[originalChunkIndex].m_samplePositionStart;
m_randomizedChunks.push_back(RandomizedChunk { sequencePosition, samplePosition, originalChunkIndex });
samplePosition += numSamples;
sequencePosition += numSequences;
}
// Add sentinel
m_randomizedChunks.push_back(RandomizedChunk { sequencePosition, samplePosition, SIZE_MAX });
// For each chunk, compute the randomization range (w.r.t. the randomized chunk sequence)
size_t halfWindowRange = m_randomizationRangeInSamples / 2;
for (size_t chunkId = 0; chunkId < m_numChunks; chunkId++)
{
auto& chunk = m_randomizedChunks[chunkId];
// start with the range of left neighbor
if (chunkId == 0)
{
chunk.m_windowBegin = 0;
chunk.m_windowEnd = 1;
}
else
{
chunk.m_windowBegin = m_randomizedChunks[chunkId - 1].m_windowBegin; // might be too early
chunk.m_windowEnd = m_randomizedChunks[chunkId - 1].m_windowEnd; // might have more space
}
while (chunk.m_info.m_samplePositionStart - m_randomizedChunks[chunk.m_windowBegin].m_info.m_samplePositionStart > halfWindowRange)
chunk.m_windowBegin++; // too early
// TODO m_randomizedChunks[chunk.windowend + 1].info.samplePositionStart - m_randomizedChunks[chunk.windowbegin].info.samplePositionStart < m_randomizationRangeInSamples
while (chunk.m_windowEnd < m_numChunks &&
m_randomizedChunks[chunk.m_windowEnd + 1].m_info.m_samplePositionStart - chunk.m_info.m_samplePositionStart < halfWindowRange)
chunk.m_windowEnd++; // got more space
}
// Compute the randomization range for sequence positions.
m_sequencePositionToChunkIndex.clear();
m_sequencePositionToChunkIndex.reserve(m_numSequences);
for (size_t k = 0; k < m_numChunks; k++)
{
const size_t numSequences =
m_randomizedChunks[k + 1].m_info.m_sequencePositionStart -
m_randomizedChunks[k].m_info.m_sequencePositionStart;
for (size_t i = 0; i < numSequences; i++)
{
m_sequencePositionToChunkIndex.push_back(k);
}
}
assert(m_sequencePositionToChunkIndex.size() == m_numSequences);
}
bool BlockRandomizer::IsValidForPosition(size_t targetPosition, const SequenceDescription& seqDesc) const
{
const auto& chunk = m_randomizedChunks[m_sequencePositionToChunkIndex[targetPosition]];
return chunk.m_windowBegin <= seqDesc.m_chunkId && seqDesc.m_chunkId < chunk.m_windowEnd;
}
void BlockRandomizer::Randomize()
{
const auto& timeline = m_deserializer->GetSequenceDescriptions();
RandomizeChunks();
// Set up m_randomTimeline, shuffled by chunks.
m_randomTimeline.clear();
m_randomTimeline.reserve(m_numSequences);
for (size_t chunkId = 0; chunkId < m_numChunks; chunkId++)
{
auto originalChunkIndex = m_randomizedChunks[chunkId].m_originalChunkIndex;
for (size_t sequencePosition = m_chunkInformation[originalChunkIndex].m_sequencePositionStart;
sequencePosition < m_chunkInformation[originalChunkIndex + 1].m_sequencePositionStart;
sequencePosition++)
{
SequenceDescription randomizedSeqDesc = *timeline[sequencePosition];
randomizedSeqDesc.m_chunkId = chunkId;
m_randomTimeline.push_back(randomizedSeqDesc);
}
}
assert(m_randomTimeline.size() == m_numSequences);
// Check we got those setup right
foreach_index (i, m_randomTimeline)
{
assert(IsValidForPosition(i, m_randomTimeline[i]));
}
// Now randomly shuffle m_randomTimeline, while considering the
// constraints of what chunk range needs to be in memory.
srand(static_cast<unsigned int>(m_sweep + 1));
foreach_index (i, m_randomTimeline)
{
// Get valid randomization range, expressed in chunks
const size_t chunkId = m_sequencePositionToChunkIndex[i];
const size_t windowBegin = m_randomizedChunks[chunkId].m_windowBegin;
const size_t windowEnd = m_randomizedChunks[chunkId].m_windowEnd;
// Get valid randomization range, expressed in sequence positions.
size_t posBegin = m_randomizedChunks[windowBegin].m_info.m_sequencePositionStart;
size_t posEnd = m_randomizedChunks[windowEnd].m_info.m_sequencePositionStart;
for (;;)
{
// Pick a sequence position from [posBegin, posEnd)
const size_t j = rand(posBegin, posEnd);
// Try again if the sequence currently at j cannot be placed at position i.
if (!IsValidForPosition(i, m_randomTimeline[j]))
continue;
// Try again if the sequence currently at i cannot be placed at position j.
if (!IsValidForPosition(j, m_randomTimeline[i]))
continue;
// Swap and break out.
std::swap(m_randomTimeline[i], m_randomTimeline[j]); // TODO old swap was perhaps more efficient
break;
}
}
// Verify that we got it right
foreach_index (i, m_randomTimeline)
{
// TODO assert only
if (!IsValidForPosition(i, m_randomTimeline[i]))
LogicError("BlockRandomizer::Randomize: randomization logic mangled!");
}
}
void BlockRandomizer::RandomizeIfNewSweepIsEntered()
{
// Check that StartEpoch() was called
assert(m_sequencePositionInSweep != SIZE_MAX);
if (m_sequencePositionInSweep >= m_numSequences)
{
if (m_verbosity > 0)
std::cerr << __FUNCTION__ << ": re-randomizing for sweep " << m_sweep
<< " in " << (m_frameMode ? "frame" : "utterance") << " mode" << endl;
m_sweep++;
m_sweepStartInSamples += m_numSamples;
Randomize();
m_sequencePositionInSweep -= m_numSequences;
assert(m_sequencePositionInSweep < m_numSequences); // cannot jump ahead more than a sweep
};
}
void BlockRandomizer::RandomizeForGlobalSamplePosition(const size_t samplePosition)
{
size_t sweep = samplePosition / m_numSamples;
if (m_sweep != sweep)
{
m_sweep = sweep;
m_sweepStartInSamples = sweep * m_numSamples;
Randomize();
}
m_sequencePositionInSweep = samplePosition % m_numSamples; // TODO only for m_frameMode
};
//
// Public methods
//
BlockRandomizer::BlockRandomizer(int verbosity, size_t randomizationRangeInSamples, DataDeserializerPtr deserializer)
: m_verbosity(verbosity),
m_randomizationRangeInSamples(randomizationRangeInSamples),
m_distributionMode(DistributionMode::sequences_strides),
m_deserializer(deserializer),
m_sweep(SIZE_MAX),
m_sequencePositionInSweep(SIZE_MAX),
m_samplePositionInEpoch(SIZE_MAX),
m_epochSize(SIZE_MAX)
{
assert(deserializer != nullptr);
const SequenceDescriptions& timeline = m_deserializer->GetSequenceDescriptions();
assert(TimelineIsValidForRandomization(timeline));
if (timeline.size() == 0)
{
m_numSequences = 0;
m_numChunks = 0;
}
else
{
// TODO let timeline keep this info?
m_numSequences = timeline.back()->m_id + 1;
m_numChunks = timeline.back()->m_chunkId + 1;
}
// Generate additional information about physical chunks
assert(m_chunkInformation.size() == 0);
m_chunkInformation.reserve(m_numChunks + 1);
m_chunkInformation.insert(m_chunkInformation.begin(),
m_numChunks + 1,
ChunkInformation{SIZE_MAX, SIZE_MAX});
size_t maxNumberOfSamples = 0;
m_numSamples = 0;
for (const auto& seqDesc : timeline)
{
// TODO let timeline keep this info?
auto& chunkInformation = m_chunkInformation[seqDesc->m_chunkId];
chunkInformation.m_sequencePositionStart =
min(chunkInformation.m_sequencePositionStart, seqDesc->m_id);
chunkInformation.m_samplePositionStart =
min(chunkInformation.m_samplePositionStart, m_numSamples);
maxNumberOfSamples = max(maxNumberOfSamples, seqDesc->m_numberOfSamples);
m_numSamples += seqDesc->m_numberOfSamples;
}
// Add sentinel
m_chunkInformation[m_numChunks] = {m_numSequences, m_numSamples};
// Frame mode to the randomizer just means there are only single-sample sequences
m_frameMode = (maxNumberOfSamples == 1);
}
void BlockRandomizer::Initialize(TransformerPtr next, const ConfigParameters& readerConfig)
{
// Not used for the block randomizer.
UNREFERENCED_PARAMETER(next);
UNREFERENCED_PARAMETER(readerConfig);
}
void BlockRandomizer::StartEpoch(const EpochConfiguration& config)
{
m_deserializer->StartEpoch(config);
m_workerRank = config.m_workerRank;
m_numberOfWorkers = config.m_numberOfWorkers;
// eldak: check partial minibatches.
if (config.m_totalEpochSizeInSamples == requestDataSize)
{
m_epochSize = m_numSamples;
}
else
{
m_epochSize = config.m_totalEpochSizeInSamples;
}
// TODO add some asserts on EpochConfiguration
m_samplePositionInEpoch = 0;
size_t timeframe = m_epochSize * config.m_epochIndex;
assert(m_frameMode); // TODO !m_frameMode needs fixes
assert(timeframe != SIZE_MAX); // used as special value for init
RandomizeForGlobalSamplePosition(timeframe);
};
bool BlockRandomizer::GetNextSequenceIds(size_t sampleCount, std::vector<size_t>& originalIds)
{
assert(m_frameMode); // TODO !m_frameMode not implemented yet
assert(originalIds.size() == 0);
assert(sampleCount < m_numSamples);
if (m_samplePositionInEpoch < m_epochSize)
{
if (m_distributionMode == DistributionMode::chunk_modulus)
{
assert(m_numberOfWorkers == 1); // TODO needs implementation
while ((m_samplePositionInEpoch < m_epochSize) &&
(originalIds.size() < sampleCount))
{
RandomizeIfNewSweepIsEntered();
const auto& seqDesc = m_randomTimeline[m_sequencePositionInSweep];
if ((seqDesc.m_chunkId % m_numberOfWorkers) == m_workerRank)
{
// Got one, collect it
originalIds.push_back(seqDesc.m_id);
}
m_samplePositionInEpoch += seqDesc.m_numberOfSamples;
m_sequencePositionInSweep++;
}
}
else
{
assert(m_distributionMode == DistributionMode::sequences_strides);
size_t nextSamplePositionInEpoch = std::min(m_epochSize, m_samplePositionInEpoch + sampleCount);
size_t distributedSampleCount = nextSamplePositionInEpoch - m_samplePositionInEpoch;
size_t strideBegin = distributedSampleCount * m_workerRank / m_numberOfWorkers;
size_t strideEnd = distributedSampleCount * (m_workerRank + 1) / m_numberOfWorkers;
for (size_t i = 0; i < distributedSampleCount; ++i, ++m_samplePositionInEpoch, ++m_sequencePositionInSweep)
{
RandomizeIfNewSweepIsEntered();
if (strideBegin <= i && i < strideEnd)
{
const auto& seqDesc = m_randomTimeline[m_sequencePositionInSweep];
originalIds.push_back(seqDesc.m_id);
}
}
assert(m_samplePositionInEpoch == nextSamplePositionInEpoch);
}
}
return m_epochSize <= m_samplePositionInEpoch;
}
Sequences BlockRandomizer::GetNextSequences(size_t sampleCount)
{
assert(m_samplePositionInEpoch != SIZE_MAX); // SetEpochConfiguration() must be called first
std::vector<size_t> originalIds;
Sequences result;
assert(m_frameMode); // TODO sequence mode not implemented yet
result.m_endOfEpoch = GetNextSequenceIds(sampleCount, originalIds);
if (originalIds.size() == 0)
{
return result;
}
// TODO implement require and release chunks from the data deserializer, but only for this worker
// (probably in GetNextSequenceIds())
result.m_data = m_deserializer->GetSequencesById(originalIds);
return result;
};
} } }

Просмотреть файл

@ -0,0 +1,104 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <vector>
#include "Transformer.h"
#include "DataDeserializer.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// The class represents a randomizer that does randomization based on chunks/sequences inside a set of chunk.
// TODO: currently this code moved from the old block randomizer.
// The class will be further refactored and common based will be extracted with NoRandomizer.
// Currently works only for frame mode (numberOfSample in sequence == 1)
class BlockRandomizer : public Transformer
{
public:
BlockRandomizer(int verbosity, size_t randomizationRangeInSamples, DataDeserializerPtr deserializer);
virtual ~BlockRandomizer()
{
}
virtual void Initialize(TransformerPtr next, const ConfigParameters& readerConfig) override;
virtual void StartEpoch(const EpochConfiguration& config) override;
virtual Sequences GetNextSequences(size_t sampleCount) override;
virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
{
return m_deserializer->GetStreamDescriptions();
}
private:
enum class DistributionMode {
// TODO better names, description
chunk_modulus,
sequences_strides
};
// Structure for per-chunk information
struct ChunkInformation
{
size_t m_sequencePositionStart;
size_t m_samplePositionStart;
};
// Structure that will be maintained for each randomized chunk
struct RandomizedChunk
{
struct ChunkInformation m_info; // sample positions are global // TODO could drop 'global' requirement?
size_t m_originalChunkIndex;
// Randomization range (in randomized chunk positions; right-side open)
size_t m_windowBegin;
size_t m_windowEnd;
};
// General configuration
int m_verbosity;
size_t m_randomizationRangeInSamples; // full window
DistributionMode m_distributionMode;
// Deserializer and information on the original timeline
DataDeserializerPtr m_deserializer;
size_t m_numSequences;
size_t m_numChunks;
size_t m_numSamples;
bool m_frameMode; // true iff only single-sample sequences
std::vector<ChunkInformation> m_chunkInformation; // (includes a sentinel)
// Per-epoch configuration
size_t m_workerRank;
size_t m_numberOfWorkers;
size_t m_epochSize;
size_t m_samplePositionInEpoch;
// Per-randomization-sweep information
size_t m_sweep;
size_t m_sweepStartInSamples; // TODO do we need it?
size_t m_sequencePositionInSweep;
std::vector<RandomizedChunk> m_randomizedChunks; // (includes a sentinel)
std::vector<size_t> m_sequencePositionToChunkIndex; // TODO find on m_randomizedChunks instead?
std::vector<SequenceDescription> m_randomTimeline;
// Check that timeline has only valid sequences of non-zero length
// with incrementing IDs and non-decreasing chunk identifiers.
bool TimelineIsValidForRandomization(const SequenceDescriptions& timeline) const;
void RandomizeChunks();
bool IsValidForPosition(size_t targetPosition, const SequenceDescription& seqDesc) const;
void Randomize();
void RandomizeForGlobalSamplePosition(const size_t samplePosition);
void RandomizeIfNewSweepIsEntered();
bool GetNextSequenceIds(size_t sampleCount, std::vector<size_t>& ids);
};
} } }

Просмотреть файл

@ -0,0 +1,43 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <memory>
#include <CUDAPageLockedMemAllocator.h>
#include "MemoryProvider.h"
namespace Microsoft { namespace MSR { namespace CNTK {
/// TODO: Memory provider should reside on the matrix. It is responsibility of the network
/// to decide what memory to use per stream. This class will be moved in the near future.
class CudaMemoryProvider : public MemoryProvider
{
std::unique_ptr<CUDAPageLockedMemAllocator> m_allocator;
public:
CudaMemoryProvider(int deviceId)
{
m_allocator = std::make_unique<CUDAPageLockedMemAllocator>(deviceId);
}
virtual void* Alloc(size_t elementSize, size_t numberOfElements) override
{
size_t totalSize = elementSize * numberOfElements;
return m_allocator->Malloc(totalSize);
}
virtual void Free(void* p) override
{
if (!p)
{
return;
}
m_allocator->Free(reinterpret_cast<char*>(p));
}
};
} } }

Просмотреть файл

@ -0,0 +1,105 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <vector>
#include "Reader.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// Defines main properties of a sequence.
// Sequence descriptions are used by the randomizer to establish a global timeline for complete input.
// A sequence is defined as an ordered set of samples (size == 1 is used for sample training).
struct SequenceDescription
{
size_t m_id; // Sequence id, uniquely identifies the sequence.
size_t m_numberOfSamples; // Number of samples in a sequence.
size_t m_chunkId; // Each sequence belongs to an I/O chunk, how chunk is defined is specific to a
// particular data deserializer. The randomizer guarantees to request sequences
// from only limited subset of chunks at any moment in time.
bool m_isValid; // Indicates whether the sequence is valid.
};
typedef std::vector<const SequenceDescription*> SequenceDescriptions;
// Defines sequence data and its layout.
// Currently CNTK supports dense and sparse sequences (csc).
// The storageType in the corresponding stream description identifies what type of SequenceData
// data deserializer or transformer can provide provides.
struct SequenceDataBase
{
SequenceDataBase() : m_data(nullptr) { }
// A non-owned pointer. The actual size is provided for particular sequences,
// i.e. see DenseSequenceData, or SparseSequenceData.
void* m_data;
};
typedef std::shared_ptr<SequenceDataBase> SequenceDataPtr;
// Dense sequence. Should be returned by the deserializer for streams with storage type StorageType::dense.
// All samples are stored in the 'data' member as a contiguous array.
// The layout of samples are described in the sampleLayout.
// All samples in the sequence should have the same layout.
struct DenseSequenceData : SequenceDataBase
{
DenseSequenceData() : m_numberOfSamples(0) { }
TensorShapePtr m_sampleLayout; // Sample layout, can be shared by several sequences.
size_t m_numberOfSamples; // Number of samples in the sequence
};
typedef std::shared_ptr<DenseSequenceData> DenseSequenceDataPtr;
// Sparse sequence. Should be returned by the deserializer for streams with storage type StorageType::csc_sparse.
// All non zero values are store in the 'data' member as a contiguous array.
// The corresponding row indices are stored in 'indices' per sample.
// All samples in the sequence should have the same layout.
struct SparseSequenceData : SequenceDataBase
{
std::vector<std::vector<size_t>> m_indices;
};
typedef std::shared_ptr<SparseSequenceData> SparseSequenceDataPtr;
//////////////////////////////////////////////////////////////////////////////////////////////////
// Interface all data deserializers should implement.
// Data deserializers are intimately familiar with a particular input formats and responsible for bringing
// the serialized data into sequences in memory. Very often data for different streams (i.e. features/lattices)
// reside in the same physical storage (file), so the data deserializer can expose not a single but several
// streams. Examples of data include image data deserializer or htkmlf data deserializer.
// TODO: This interface will become ABI and deserializers can be implemented in different languages, i.e. Python.
//////////////////////////////////////////////////////////////////////////////////////////////////
class DataDeserializer
{
public:
// Describes streams this data deserializer can produce. Streams correspond to network inputs.
virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const = 0;
// Retrieves description of all sequences this data deserializer can produce.
virtual const SequenceDescriptions& GetSequenceDescriptions() const = 0;
// Sets epoch configuration.
virtual void StartEpoch(const EpochConfiguration& config) = 0;
// Gets sequences by id.
// The return value can be used until the next call to GetSequencesById.
// All non-owned pointers returned are valid till the next call to this method.
virtual std::vector<std::vector<SequenceDataPtr>> GetSequencesById(const std::vector<size_t>& ids) = 0;
// Requires the chunk. Each sequence is assigned to the IO chunk by the data deserializer.
// This information is communicated thru GetSequenceDescriptions method.
// The randomizer guarantees that it accesses sequences only from a limited number of chunks.
// When randomizer requires a sequence from a particular chunk it notifies about this the data deserializer,
// so that the data deserializer can load/cache sequences more efficiently (loading complete chunks in memory).
virtual void RequireChunk(size_t chunkIndex) = 0;
// Releases the chunk.
// When randomizer read all sequences from a particular chunk it notifies the data deserializer
// that the chunk can be freed.
virtual void ReleaseChunk(size_t chunkIndex) = 0;
virtual ~DataDeserializer() {};
};
typedef std::shared_ptr<DataDeserializer> DataDeserializerPtr;
} } }

Просмотреть файл

@ -0,0 +1,58 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include "DataDeserializer.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// Base class for data deserializers.
// Has a default implementation for a subset of methods.
class DataDeserializerBase : public DataDeserializer
{
public:
DataDeserializerBase() : m_sequencesInitialized(false)
{}
// Sets configuration for the current epoch.
void StartEpoch(const EpochConfiguration& /*config*/) override {};
// Provides description of all sequences the deserializer can produce.
const SequenceDescriptions& GetSequenceDescriptions() const override
{
if (!m_sequencesInitialized)
{
FillSequenceDescriptions(m_sequences);
m_sequencesInitialized = true;
}
return m_sequences;
}
// To be called by the randomizer for prefetching the next chunk.
// By default IO read-ahead is not implemented.
void RequireChunk(size_t /*chunkIndex*/) override{};
// To be called by the randomizer for releasing a prefetched chunk.
// By default IO read-ahead is not implemented.
void ReleaseChunk(size_t /*chunkIndex*/) override{};
protected:
// Fills the timeline with sequence descriptions.
// Inherited classes should provide the complete Sequence descriptions for all input data.
virtual void FillSequenceDescriptions(SequenceDescriptions& timeline) const = 0;
// Streams this data deserializer can produce.
std::vector<StreamDescriptionPtr> m_streams;
private:
DataDeserializerBase(const DataDeserializerBase&) = delete;
DataDeserializerBase& operator=(const DataDeserializerBase&) = delete;
mutable SequenceDescriptions m_sequences;
mutable bool m_sequencesInitialized;
};
}}}

Просмотреть файл

@ -0,0 +1,29 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <vector>
#include <memory>
#include "Reader.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// Returns the size of the type.
inline size_t GetSizeByType(ElementType type)
{
switch (type)
{
case ElementType::tfloat:
return sizeof(float);
case ElementType::tdouble:
return sizeof(double);
case ElementType::tatom:
return sizeof(char);
default:
RuntimeError("Unsupported type '%d'", type);
}
}
} } }

Просмотреть файл

@ -0,0 +1,30 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <algorithm>
#include "MemoryProvider.h"
namespace Microsoft { namespace MSR { namespace CNTK {
class HeapMemoryProvider : public MemoryProvider
{
static const size_t size_of_first_pointer = sizeof(void*);
public:
virtual void* Alloc(size_t elementSize, size_t numberOfElements) override
{
// Currently not alligned.
return ::operator new(elementSize * numberOfElements);
}
virtual void Free(void* p) override
{
::operator delete(p);
}
};
}}}

Просмотреть файл

@ -0,0 +1,29 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <memory>
namespace Microsoft { namespace MSR { namespace CNTK {
//////////////////////////////////////////////////////////////////////////////////////////////////
// Interface used for allocating stream data returned by the reader.
// TODO: Should be injected by CNTK into the reader (will be a member of Matrix class).
//////////////////////////////////////////////////////////////////////////////////////////////////
class MemoryProvider
{
public:
// Allocates contiguous storage for specified number of elements of provided size.
virtual void* Alloc(size_t elementSize, size_t numberOfElements) = 0;
// Frees contiguous storage.
virtual void Free(void* ptr) = 0;
virtual ~MemoryProvider() { }
};
typedef std::shared_ptr<MemoryProvider> MemoryProviderPtr;
} } }

Просмотреть файл

@ -0,0 +1,86 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#define _CRT_SECURE_NO_WARNINGS
#include <algorithm>
#include "NoRandomizer.h"
#include "DataReader.h"
namespace Microsoft { namespace MSR { namespace CNTK {
NoRandomizer::NoRandomizer(DataDeserializerPtr deserializer)
: m_deserializer(deserializer),
m_samplePositionInEpoch(0),
m_sequencePosition(0)
{
assert(deserializer != nullptr);
m_timeline = m_deserializer->GetSequenceDescriptions();
for (const auto& sequence : m_timeline)
{
if (sequence->m_numberOfSamples != 1)
{
RuntimeError("Currently, no randomizer supports only frame mode. Received a sequence with %d number of samples.",
static_cast<int>(sequence->m_numberOfSamples));
}
}
}
void NoRandomizer::Initialize(TransformerPtr, const ConfigParameters&)
{
}
void NoRandomizer::StartEpoch(const EpochConfiguration& config)
{
m_deserializer->StartEpoch(config);
m_config = config;
if (m_config.m_totalEpochSizeInSamples == requestDataSize)
{
m_config.m_totalEpochSizeInSamples = m_timeline.size();
}
m_samplePositionInEpoch = 0;
size_t globalSamplePosition = m_config.m_totalEpochSizeInSamples * config.m_epochIndex;
m_sequencePosition = globalSamplePosition % m_timeline.size();
};
Sequences NoRandomizer::GetNextSequences(size_t sampleCount)
{
Sequences result;
if(m_config.m_totalEpochSizeInSamples <= m_samplePositionInEpoch)
{
result.m_endOfEpoch = true;
return result;
}
size_t maxSampleCount = std::min(sampleCount, m_config.m_totalEpochSizeInSamples - m_samplePositionInEpoch);
size_t start = maxSampleCount * m_config.m_workerRank / m_config.m_numberOfWorkers;
size_t end = maxSampleCount * (m_config.m_workerRank + 1) / m_config.m_numberOfWorkers;
size_t subsetSize = end - start;
std::vector<size_t> originalIds;
originalIds.reserve(subsetSize);
for (size_t i = start; i < end; ++i)
{
const auto& sequence = m_timeline[(m_sequencePosition + i) % m_timeline.size()];
assert(sequence->m_numberOfSamples == 1);
originalIds.push_back(sequence->m_id);
}
m_samplePositionInEpoch += maxSampleCount;
m_sequencePosition = (m_sequencePosition + maxSampleCount) % m_timeline.size();
if (originalIds.size() == 0)
{
return result;
}
result.m_data = m_deserializer->GetSequencesById(originalIds);
return result;
}
}}}

Просмотреть файл

@ -0,0 +1,45 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <vector>
#include "Transformer.h"
#include "DataDeserializer.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// The class represents a randomizer that does not randomize input (identity function over the original timeline).
// TODO: currently this code moved from the old block randomizer.
// The class will be further refactored and common based will be extracted with BlockRandomizer.
// Currently works only for frame mode (numberOfSample in sequence == 1) and without chunking
class NoRandomizer : public Transformer
{
public:
NoRandomizer(DataDeserializerPtr deserializer);
virtual void Initialize(TransformerPtr next, const ConfigParameters& readerConfig) override;
virtual void StartEpoch(const EpochConfiguration& config) override;
virtual Sequences GetNextSequences(size_t sampleCount) override;
virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
{
return m_deserializer->GetStreamDescriptions();
}
private:
// Deserializer and information on the original timeline
DataDeserializerPtr m_deserializer;
// Initial timeline.
SequenceDescriptions m_timeline;
// Epoch configuration
EpochConfiguration m_config;
size_t m_samplePositionInEpoch;
size_t m_sequencePosition;
};
}}}

Просмотреть файл

@ -0,0 +1,108 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <vector>
#include <memory>
#include "Sequences.h"
#include "TensorShape.h"
namespace Microsoft { namespace MSR { namespace CNTK {
typedef std::shared_ptr<TensorShape> TensorShapePtr;
struct MBLayout;
typedef std::shared_ptr<MBLayout> MBLayoutPtr;
// Configuration for the current epoch.
// Each time the epoch is started CNTK should provide the configuration to the reader using StartEpoch method
// and the below structure.
struct EpochConfiguration
{
size_t m_numberOfWorkers; // Number of the Open MPI workers for the current epoch
size_t m_workerRank; // Rank of the Open MPI worker, worker rank has to be less than the number of workers
size_t m_minibatchSizeInSamples; // Maximum minibatch size for the epoch in samples
size_t m_totalEpochSizeInSamples; // Total size of the epoch in samples
size_t m_epochIndex; // Current epoch index [0 .. max number of epochs)
};
// Supported primitive element types, will be extended in the future.
enum class ElementType
{
tfloat, // single precision
tdouble, // double precision
tatom // sizeof(atom) == 1 constitute of blobs -> sequences of atoms (i.e. used for lattices, hmmm, etc.)
};
// Supported storage types, will be extended in the future.
enum class StorageType
{
dense,
sparse_csc,
};
typedef size_t StreamId;
// This class describes a particular stream: its name, element type, storage, etc.
struct StreamDescription
{
std::wstring m_name; // Unique name of the stream
StreamId m_id; // Unique identifier of the stream
StorageType m_storageType; // Storage type of the stream
ElementType m_elementType; // Element type of the stream
TensorShapePtr m_sampleLayout; // Layout of the sample for the stream
// If not specified - can be specified per sequence
};
typedef std::shared_ptr<StreamDescription> StreamDescriptionPtr;
// Represent a minibatch date for a single stream formatted in according to the minibatch layout.
// This data is returned per stream as a part of Minibatch from the ReadMinibatch function.
// All raw non owned pointers are valid till the next call to the ReadMinibatch function.
struct StreamMinibatch
{
void* m_data; // Contiguous array of data. Can be encoded in dense or sparse formats depending on the stream description.
size_t m_dataSize; // Data size in bytes.
MBLayoutPtr m_layout; // Layout of the data
};
typedef std::shared_ptr<StreamMinibatch> StreamMinibatchPtr;
// Represents a single minibatch, that contains information about all streams.
struct Minibatch
{
// Indicates that the end of epoch has been reached.
// It is set to true for the last minibatch, there still
// can be data in m_data field even if this flag is set.
bool m_endOfEpoch;
// Minibatch data
std::vector<StreamMinibatchPtr> m_data;
Minibatch() : m_endOfEpoch(false)
{
}
};
//////////////////////////////////////////////////////////////////////////////////////////////////
// Main Reader interface. The border interface between the CNTK and reader libraries.
// TODO: Expect to change in a little bit: stream matrices provided by the network as input.
//////////////////////////////////////////////////////////////////////////////////////////////////
class Reader
{
public:
// Describes the streams this reader produces.
virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() = 0;
// Starts a new epoch with the provided configuration
virtual void StartEpoch(const EpochConfiguration& config) = 0;
// Reads a minibatch that contains data across all streams.
virtual Minibatch ReadMinibatch() = 0;
virtual ~Reader() {};
};
typedef std::shared_ptr<Reader> ReaderPtr;
}}}

Просмотреть файл

@ -0,0 +1,93 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{F0A9637C-20DA-42F0-83D4-23B4704DE602}</ProjectGuid>
<RootNamespace>Reader</RootNamespace>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<PlatformToolset>v120</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level4</WarningLevel>
<SDLCheck>true</SDLCheck>
<TreatWarningAsError>true</TreatWarningAsError>
<AdditionalIncludeDirectories>..\..\common\include;..\..\math</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<Optimization>Disabled</Optimization>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="DataDeserializerBase.h" />
<ClInclude Include="TransformerBase.h" />
<ClInclude Include="BlockRandomizer.h" />
<ClInclude Include="NoRandomizer.h" />
<ClInclude Include="CudaMemoryProvider.h" />
<ClInclude Include="DataDeserializer.h" />
<ClInclude Include="ElementTypeUtils.h" />
<ClInclude Include="SampleModePacker.h" />
<ClInclude Include="HeapMemoryProvider.h" />
<ClInclude Include="MemoryProvider.h" />
<ClInclude Include="Reader.h" />
<ClInclude Include="ReaderShim.h" />
<ClInclude Include="StringUtils.h" />
<ClInclude Include="Transformer.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="BlockRandomizer.cpp" />
<ClCompile Include="NoRandomizer.cpp" />
<ClCompile Include="SampleModePacker.cpp" />
<ClCompile Include="ReaderShim.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

Просмотреть файл

@ -0,0 +1,84 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClInclude Include="BlockRandomizer.h">
<Filter>Randomizers</Filter>
</ClInclude>
<ClInclude Include="NoRandomizer.h">
<Filter>Randomizers</Filter>
</ClInclude>
<ClInclude Include="CudaMemoryProvider.h">
<Filter>MemoryProviders</Filter>
</ClInclude>
<ClInclude Include="HeapMemoryProvider.h">
<Filter>MemoryProviders</Filter>
</ClInclude>
<ClInclude Include="ReaderShim.h">
<Filter>Utils</Filter>
</ClInclude>
<ClInclude Include="Reader.h">
<Filter>Interfaces</Filter>
</ClInclude>
<ClInclude Include="DataDeserializer.h">
<Filter>Interfaces</Filter>
</ClInclude>
<ClInclude Include="MemoryProvider.h">
<Filter>Interfaces</Filter>
</ClInclude>
<ClInclude Include="Transformer.h">
<Filter>Interfaces</Filter>
</ClInclude>
<ClInclude Include="ElementTypeUtils.h">
<Filter>Utils</Filter>
</ClInclude>
<ClInclude Include="StringUtils.h">
<Filter>Utils</Filter>
</ClInclude>
<ClInclude Include="TransformerBase.h">
<Filter>Transformers</Filter>
</ClInclude>
<ClInclude Include="DataDeserializerBase.h">
<Filter>Deserializers</Filter>
</ClInclude>
<ClInclude Include="SampleModePacker.h">
<Filter>Packers</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="BlockRandomizer.cpp">
<Filter>Randomizers</Filter>
</ClCompile>
<ClCompile Include="NoRandomizer.cpp">
<Filter>Randomizers</Filter>
</ClCompile>
<ClCompile Include="ReaderShim.cpp">
<Filter>Utils</Filter>
</ClCompile>
<ClCompile Include="SampleModePacker.cpp">
<Filter>Packers</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="Interfaces">
<UniqueIdentifier>{0a7ac112-ad49-4d5e-83d8-a7640b199568}</UniqueIdentifier>
</Filter>
<Filter Include="Randomizers">
<UniqueIdentifier>{0e2c2f22-20f2-42c1-a5d1-d9ac7a4bd33a}</UniqueIdentifier>
</Filter>
<Filter Include="Packers">
<UniqueIdentifier>{b7203d1c-fd6e-40de-9680-ba4042ce7c7f}</UniqueIdentifier>
</Filter>
<Filter Include="MemoryProviders">
<UniqueIdentifier>{6ada575c-9f2c-476f-8c97-8617e1ecd01a}</UniqueIdentifier>
</Filter>
<Filter Include="Utils">
<UniqueIdentifier>{3cd09e43-18cb-48ad-a5f7-89553c063d8c}</UniqueIdentifier>
</Filter>
<Filter Include="Deserializers">
<UniqueIdentifier>{2f73a66b-9551-41c4-91c5-cf51537b9feb}</UniqueIdentifier>
</Filter>
<Filter Include="Transformers">
<UniqueIdentifier>{90d4b51b-73ae-47f5-9a9e-97ef287dcead}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>

Просмотреть файл

@ -0,0 +1,158 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// ReaderShim.cpp: implementation for shim wrapping the new reader interface
//
#define _CRT_SECURE_NO_WARNINGS
#ifdef _WIN32
#include <objbase.h>
#endif
#include "Basics.h"
#define DATAREADER_EXPORTS // creating the exports here
#include "DataReader.h"
//#include "commandArgUtil.h"
#include "ReaderShim.h"
namespace Microsoft { namespace MSR { namespace CNTK {
template <class ElemType>
ReaderShim<ElemType>::ReaderShim(ReaderFactory factory)
: m_layout(make_shared<MBLayout>()), m_factory(factory)
{
}
template <class ElemType>
void ReaderShim<ElemType>::Init(const ConfigParameters& config)
{
intargvector numberOfuttsPerMinibatchForAllEpochs =
config(L"nbruttsineachrecurrentiter", ConfigParameters::Array(intargvector(vector<int> { 1 })));
bool prefetch = config(L"prefetch", true);
// if prefetch - launching asynchronously,
// otherwise deferring - synchronous execution during .get() call
m_launchType = prefetch ? launch::async : launch::deferred;
auto numSeqsPerMBForAllEpochs = numberOfuttsPerMinibatchForAllEpochs;
m_layout->Init(numSeqsPerMBForAllEpochs[0], 0);
m_reader = m_factory(config);
m_streams = m_reader->GetStreamDescriptions();
for (auto i : m_streams)
{
m_nameToStreamId.insert(std::make_pair(i->m_name, i->m_id));
}
}
template <class ElemType>
void ReaderShim<ElemType>::StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize)
{
return StartDistributedMinibatchLoop(mbSize, epoch, 0, 1, requestedEpochSamples);
}
template <class ElemType>
void ReaderShim<ElemType>::StartDistributedMinibatchLoop(
size_t requestedMBSize,
size_t epoch,
size_t subsetNum,
size_t numSubsets,
size_t requestedEpochSamples /*= requestDataSize*/)
{
EpochConfiguration config;
config.m_workerRank = subsetNum;
config.m_numberOfWorkers = numSubsets;
config.m_minibatchSizeInSamples = requestedMBSize;
config.m_totalEpochSizeInSamples = requestedEpochSamples;
config.m_epochIndex = epoch;
m_reader->StartEpoch(config);
m_endOfEpoch = false;
m_prefetchTask = std::async(m_launchType, [this]()
{
return m_reader->ReadMinibatch();
});
}
template <class ElemType>
bool ReaderShim<ElemType>::GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices)
{
if (m_endOfEpoch)
{
return false;
}
// Check that all matrices have the same device id.
// If not we should inject the IMemoryProvider per stream.
int deviceId = matrices.begin()->second->GetDeviceId();
for (auto mx : matrices)
{
if (mx.second->GetDeviceId() != deviceId)
{
assert(false);
}
}
assert(m_prefetchTask.valid());
Minibatch minibatch = m_prefetchTask.get();
if (minibatch.m_endOfEpoch)
{
m_endOfEpoch = true;
if (minibatch.m_data.empty())
{
return false;
}
}
if (!minibatch.m_data.empty())
{
// Copy returned minibatch to the matrices.
for (const auto& mx : matrices)
{
assert(m_nameToStreamId.find(mx.first) != m_nameToStreamId.end());
size_t streamId = m_nameToStreamId[mx.first];
const auto& stream = minibatch.m_data[streamId];
m_layout = stream->m_layout;
size_t columnNumber = m_layout->GetNumCols();
size_t rowNumber = m_streams[streamId]->m_sampleLayout->GetNumElements();
auto data = reinterpret_cast<const ElemType*>(stream->m_data);
mx.second->SetValue(rowNumber, columnNumber, mx.second->GetDeviceId(), const_cast<ElemType*>(data), matrixFlagNormal);
}
}
m_prefetchTask = std::async(m_launchType, [this]()
{
return m_reader->ReadMinibatch();
});
return !minibatch.m_data.empty();
}
template <class ElemType>
bool ReaderShim<ElemType>::DataEnd(EndDataType /*endDataType*/)
{
return false;
}
template <class ElemType>
void ReaderShim<ElemType>::CopyMBLayoutTo(MBLayoutPtr layout)
{
layout->CopyFrom(m_layout);
}
template <class ElemType>
size_t ReaderShim<ElemType>::GetNumParallelSequences()
{
return m_layout->GetNumParallelSequences();
}
template class ReaderShim<float>;
template class ReaderShim<double>;
} } }

Просмотреть файл

@ -0,0 +1,68 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// ReaderShim.h: Currently we are preserving the old interface in SGD. So this shim exposes the old interface and calls into the
// reader implemented with the new interfaces (reader/packer/transforms/serializers)
//
#pragma once
#include <map>
#include <string>
#include "DataReader.h"
#include <future>
#include "Reader.h"
namespace Microsoft { namespace MSR { namespace CNTK {
typedef ReaderPtr (*ReaderFactory)(const ConfigParameters& parameters);
template <class ElemType>
class ReaderShim : public IDataReader<ElemType>
{
public:
explicit ReaderShim(ReaderFactory factory);
virtual ~ReaderShim() { }
virtual void Init(const ScriptableObjects::IConfigRecord& /*config*/) override
{
assert(false);
}
virtual void Init(const ConfigParameters& config) override;
virtual void Destroy() override
{
delete this;
}
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples) override;
virtual void StartDistributedMinibatchLoop(size_t requestedMBSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples) override;
virtual bool SupportsDistributedMBRead() const override
{
return true;
}
virtual bool GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices) override;
virtual bool DataEnd(EndDataType endDataType) override;
void CopyMBLayoutTo(MBLayoutPtr) override;
virtual size_t GetNumParallelSequences() override;
private:
std::future<Minibatch> m_prefetchTask;
ReaderPtr m_reader;
ReaderFactory m_factory;
bool m_endOfEpoch;
MBLayoutPtr m_layout;
std::map<std::wstring, size_t> m_nameToStreamId;
std::vector<StreamDescriptionPtr> m_streams;
launch m_launchType;
};
}}}

Просмотреть файл

@ -0,0 +1,152 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#define _CRT_SECURE_NO_WARNINGS
#define _SCL_SECURE_NO_WARNINGS
#include "SampleModePacker.h"
#include "ElementTypeUtils.h"
namespace Microsoft { namespace MSR { namespace CNTK {
SampleModePacker::SampleModePacker(
MemoryProviderPtr memoryProvider,
TransformerPtr transformer,
size_t minibatchSize,
const std::vector<StreamDescriptionPtr>& streams)
: m_transformer(transformer),
m_minibatchSize(minibatchSize),
m_outputStreams(streams),
m_minibatchLayout(std::make_shared<MBLayout>()),
m_memoryProvider(memoryProvider)
{
m_inputStreams = m_transformer->GetStreamDescriptions();
assert(m_inputStreams.size() == m_outputStreams.size());
assert(
std::find_if(
m_outputStreams.begin(),
m_outputStreams.end(),
[](const StreamDescriptionPtr& s)
{
return s->m_storageType == StorageType::sparse_csc;
}) == m_outputStreams.end());
assert(m_minibatchSize > 0);
for (int i = 0; i < m_outputStreams.size(); ++i)
{
const auto& stream = m_outputStreams[i];
// Input and output should match in everything except for sparse/dense.
assert(stream->m_elementType == ElementType::tfloat || stream->m_elementType == ElementType::tdouble);
assert(stream->m_name == m_inputStreams[i]->m_name);
assert(stream->m_id == m_inputStreams[i]->m_id);
assert(GetSampleSize(m_inputStreams[i]) == GetSampleSize(stream));
m_streamBuffers.push_back(
AllocateBuffer(m_minibatchSize * stream->m_sampleLayout->GetNumElements(), GetSizeByType(stream->m_elementType)));
}
}
Minibatch SampleModePacker::ReadMinibatch()
{
auto sequences = m_transformer->GetNextSequences(m_minibatchSize);
Minibatch minibatch;
minibatch.m_endOfEpoch = sequences.m_endOfEpoch;
// Iterating for sequences inside the batch of sequences.
for (size_t sequenceIndex = 0; sequenceIndex < sequences.m_data.size(); sequenceIndex++)
{
// For each sequence iterating thru all the streams with this sequence id and copying to the buffer.
assert(m_streamBuffers.size() == sequences.m_data[sequenceIndex].size());
for (int streamIndex = 0; streamIndex < sequences.m_data[sequenceIndex].size(); ++streamIndex)
{
CopySequenceToBuffer(sequenceIndex, streamIndex, sequences.m_data);
}
}
if (sequences.m_data.size() == 0)
{
return minibatch;
}
// Creating output minibatch with shared layout between all streams.
m_minibatchLayout->InitAsFrameMode(sequences.m_data.size());
for (int i = 0; i < m_outputStreams.size(); ++i)
{
auto stream = std::make_shared<StreamMinibatch>();
stream->m_data = m_streamBuffers[i].get();
stream->m_dataSize = sequences.m_data.size() * GetSampleSize(m_outputStreams[i]);
stream->m_layout = m_minibatchLayout;
minibatch.m_data.push_back(stream);
}
return minibatch;
}
size_t SampleModePacker::GetSampleSize(StreamDescriptionPtr stream)
{
assert(stream != nullptr);
size_t elementSize = GetSizeByType(stream->m_elementType);
return stream->m_sampleLayout->GetNumElements() * elementSize;
}
void SampleModePacker::CopySequenceToBuffer(size_t sampleIndex, size_t streamIndex, const std::vector<std::vector<SequenceDataPtr>>& sequences)
{
// In framemode sequence just contains a single sample.
const auto& sample = sequences[sampleIndex][streamIndex];
size_t sampleSize = GetSampleSize(m_inputStreams[streamIndex]);
auto sampleData = reinterpret_cast<const char*>(sample->m_data);
const auto& stream = m_inputStreams[streamIndex];
auto elementSize = GetSizeByType(stream->m_elementType);
auto buffer = m_streamBuffers[streamIndex].get();
if (stream->m_storageType == StorageType::dense)
{
auto data = reinterpret_cast<DenseSequenceData&>(*sample);
// Expect single sample.
assert(data.m_numberOfSamples == 1);
// Copying the sequence to its position in the buffer. Effectivly a buffer contains concatenation of samples for a stream.
std::copy(sampleData, sampleData + sampleSize, buffer + sampleIndex * sampleSize);
}
else if (stream->m_storageType == StorageType::sparse_csc)
{
auto data = reinterpret_cast<SparseSequenceData&>(*sample);
// Expect single sample.
assert(data.m_indices.size() == 1);
// Currently sparse data has to be unpacked to the dense one. Possibly can be done later
// in the network or as a transformation.
// Fill it in with zeros.
std::fill(buffer + sampleIndex * sampleSize, buffer + (sampleIndex + 1) * sampleSize, 0);
// Copy the non zero data to the buffer.
size_t nonZeroCount = data.m_indices[0].size();
for (size_t nonZeroIndex = 0; nonZeroIndex < nonZeroCount; ++nonZeroIndex)
{
size_t rowIndex = data.m_indices[0][nonZeroIndex];
char* destination = buffer + sampleIndex * sampleSize + rowIndex * elementSize;
std::copy(sampleData + nonZeroIndex * elementSize, sampleData + (nonZeroIndex + 1) * elementSize, destination);
}
}
else
{
RuntimeError("Storage type %d is not supported.", m_inputStreams[streamIndex]->m_storageType);
}
}
std::shared_ptr<char> SampleModePacker::AllocateBuffer(size_t numElements, size_t elementSize)
{
return std::shared_ptr<char>(
reinterpret_cast<char*>(m_memoryProvider->Alloc(elementSize, numElements)),
[this](char* p)
{
m_memoryProvider->Free(p);
});
}
} } }

Просмотреть файл

@ -0,0 +1,42 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include "Reader.h"
#include "MemoryProvider.h"
#include "Transformer.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// A sample packer that densely packs samples in parallel for GPU consumptions.
class SampleModePacker
{
public:
SampleModePacker(
MemoryProviderPtr memoryProvider,
TransformerPtr transformer,
size_t minibatchSize,
const std::vector<StreamDescriptionPtr>& streams);
Minibatch ReadMinibatch();
private:
std::shared_ptr<char> AllocateBuffer(size_t numElements, size_t elementSize);
size_t GetSampleSize(StreamDescriptionPtr stream);
void CopySequenceToBuffer(size_t sequenceIndex, size_t streamIndex, const std::vector<std::vector<SequenceDataPtr>>& sequences);
MemoryProviderPtr m_memoryProvider;
TransformerPtr m_transformer;
std::vector<StreamDescriptionPtr> m_outputStreams;
std::vector<StreamDescriptionPtr> m_inputStreams;
std::vector<std::shared_ptr<char>> m_streamBuffers;
MBLayoutPtr m_minibatchLayout;
size_t m_minibatchSize;
};
typedef std::shared_ptr<SampleModePacker> SampleModePackerPtr;
} } }

Просмотреть файл

@ -0,0 +1,22 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <string>
#include <locale>
namespace Microsoft { namespace MSR { namespace CNTK {
// Compares two ASCII strings ignoring the case.
// TODO: Should be moved to common CNTK library and after switching to boost, boost::iequal should be used instead.
inline bool AreEqualIgnoreCase(const std::string& s1, const std::string& s2)
{
return std::equal(s1.begin(), s1.end(), s2.begin(), [](const char& a, const char& b)
{
return std::tolower(a) == std::tolower(b);
});
}
} } }

Просмотреть файл

@ -0,0 +1,61 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <vector>
#include "DataDeserializer.h"
namespace Microsoft { namespace MSR { namespace CNTK {
class ConfigParameters;
// Defines a set of sequences.
struct Sequences
{
Sequences()
: m_endOfEpoch(false)
{
}
// Data for up to a requested number of sequences.
// Indices in the inner vector have to correspond to the stream IDs
// given by GetStream().
std::vector<std::vector<SequenceDataPtr>> m_data;
// Indicates whether the epoch ends with the data returned.
bool m_endOfEpoch;
};
class Transformer;
typedef std::shared_ptr<Transformer> TransformerPtr;
// Defines a data transformation interface.
// Transformers are responsible for doing custom transformation of sequences.
// For example for images, there could be scale, crop, or median transformation.
// TODO: Adopt to the C#/Java iterator pattern.
class Transformer
{
public:
// Initialization.
virtual void Initialize(
TransformerPtr next,
const ConfigParameters& readerConfig) = 0;
// Describes streams the transformer produces.
virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const = 0;
// Sets current epoch configuration.
virtual void StartEpoch(const EpochConfiguration& config) = 0;
// Gets next sequences up to a maximum count of samples.
// The return value can be used until the next call to GetNextSequences.
virtual Sequences GetNextSequences(size_t sampleCount) = 0;
virtual ~Transformer()
{
}
};
} } }

Просмотреть файл

@ -0,0 +1,104 @@
//
// <copyright company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#include <set>
#include "Transformer.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// Currently supports only dense data format.
template <class TBufferElement>
class TransformerBase : public Transformer
{
public:
// Initializes the transformer.
virtual void Initialize(TransformerPtr next,
const ConfigParameters &) override
{
m_next = next;
m_inputStreams = m_next->GetStreamDescriptions();
}
// Sets configuration for the current epoch.
virtual void StartEpoch(const EpochConfiguration &config) override
{
assert(m_next != nullptr);
m_next->StartEpoch(config);
}
// Description of streams that the transformer provides.
virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
{
return this->GetOutputStreams();
}
// Gets next sequences up to a maximum count of samples.
// Sequences contains data for all streams.
virtual Sequences GetNextSequences(size_t sampleCount) override
{
assert(m_next != nullptr);
Sequences samples = m_next->GetNextSequences(sampleCount);
if (samples.m_endOfEpoch)
{
return samples;
}
const auto &appliedStreamIds = GetAppliedStreamIds();
const auto &outputStreams = GetOutputStreams();
assert(m_inputStreams.size() == outputStreams.size());
m_buffer.resize(samples.m_data.size());
#pragma omp parallel for ordered schedule(dynamic)
for (int i = 0; i < samples.m_data.size(); ++i)
{
auto &sample = samples.m_data[i];
assert(sample.size() == m_inputStreams.size());
m_buffer[i].resize(appliedStreamIds.size());
for (int j = 0; j < appliedStreamIds.size(); ++j)
{
size_t id = appliedStreamIds[j];
assert(m_inputStreams[id]->m_storageType == StorageType::dense);
const DenseSequenceData &sequence =
reinterpret_cast<DenseSequenceData &>(*sample[id]);
sample[id] = Apply(sequence, *m_inputStreams[id], m_buffer[i][j],
*outputStreams[id]);
}
}
return samples;
}
protected:
virtual const std::vector<StreamId> &GetAppliedStreamIds() const = 0;
virtual const std::vector<StreamDescriptionPtr> &GetOutputStreams() const
{
return m_inputStreams;
}
const std::vector<StreamDescriptionPtr> &GetInputStreams()
{
return m_inputStreams;
}
private:
// Applies transformation to the sequence.
virtual SequenceDataPtr Apply(const DenseSequenceData &inputSequence,
const StreamDescription &inputStream,
TBufferElement &buffer,
const StreamDescription &outputStream) = 0;
TransformerPtr m_next;
std::vector<StreamId> m_featureStreamIds;
std::vector<std::vector<TBufferElement>> m_buffer;
std::vector<StreamDescriptionPtr> m_inputStreams;
};
}}}

Просмотреть файл

@ -0,0 +1,65 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "stdafx.h"
#include "BlockRandomizer.h"
#include "DataDeserializer.h"
using namespace Microsoft::MSR::CNTK;
namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {
BOOST_AUTO_TEST_SUITE(ReaderLibTests)
class MockDeserializer : public DataDeserializer
{
private:
SequenceDescriptions m_sequenceDescriptions;
public:
std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
{
std::vector<StreamDescriptionPtr> result;
return result;
}
const SequenceDescriptions& GetSequenceDescriptions() const override
{
return m_sequenceDescriptions;
}
void StartEpoch(const EpochConfiguration& config) override
{
UNREFERENCED_PARAMETER(config);
}
std::vector<std::vector<SequenceDataPtr>> GetSequencesById(const std::vector<size_t>& ids) override
{
UNREFERENCED_PARAMETER(ids);
return std::vector<std::vector<SequenceDataPtr>>();
}
void RequireChunk(size_t chunkIndex) override
{
UNREFERENCED_PARAMETER(chunkIndex);
}
void ReleaseChunk(size_t chunkIndex) override
{
UNREFERENCED_PARAMETER(chunkIndex);
}
};
BOOST_AUTO_TEST_CASE(BlockRandomizerInstantiate)
{
auto mockDeserializer = std::make_shared<MockDeserializer>();
auto randomizer = std::make_shared<BlockRandomizer>(0, SIZE_MAX, mockDeserializer);
}
BOOST_AUTO_TEST_SUITE_END()
} } } }

Просмотреть файл

@ -75,14 +75,14 @@
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64' Or '$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">
<LinkIncremental>false</LinkIncremental>
<IncludePath>..\..\..\Source\Common\include;..\..\..\Source\Math;$(IncludePath)</IncludePath>
<IncludePath>..\..\..\Source\Readers\Reader;..\..\..\Source\Common\include;..\..\..\Source\Math;$(IncludePath)</IncludePath>
<LibraryPath>$(OutDir);$(LibraryPath)</LibraryPath>
<OutDir>$(SolutionDir)$(Platform)\$(Configuration)\UnitTests\</OutDir>
<IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64' Or '$(Configuration)|$(Platform)'=='Release_CpuOnly|x64'">
<LinkIncremental>false</LinkIncremental>
<IncludePath>..\..\..\Source\Common\include;..\..\..\Source\Math;$(IncludePath)</IncludePath>
<IncludePath>..\..\..\Source\Readers\Reader;..\..\..\Source\Common\include;..\..\..\Source\Math;$(IncludePath)</IncludePath>
<LibraryPath>$(OutDir);$(LibraryPath)</LibraryPath>
<OutDir>$(SolutionDir)$(Platform)\$(Configuration)\UnitTests\</OutDir>
<IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
@ -102,7 +102,7 @@
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalLibraryDirectories>$(BOOST_LIB_PATH);$(OutDir)..\;</AdditionalLibraryDirectories>
<AdditionalDependencies>htkmlfreader.lib;Math.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>htkmlfreader.lib;Math.lib;Reader.lib;%(AdditionalDependencies)</AdditionalDependencies>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
@ -127,7 +127,7 @@
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalLibraryDirectories>$(BOOST_LIB_PATH);$(OutDir)..\;</AdditionalLibraryDirectories>
<AdditionalDependencies>htkmlfreader.lib;Math.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>htkmlfreader.lib;Math.lib;Reader.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="!$(UsesCuda)">
@ -149,6 +149,7 @@
<ClCompile Include="..\..\..\Source\Common\fileutil.cpp" />
<ClCompile Include="..\..\..\Source\Common\TimerUtility.cpp" />
<ClCompile Include="HTKLMFReaderTests.cpp" />
<ClCompile Include="ReaderLibTests.cpp" />
<ClCompile Include="stdafx.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64' Or '$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64' Or '$(Configuration)|$(Platform)'=='Release_CpuOnly|x64'">Create</PrecompiledHeader>

Просмотреть файл

@ -32,6 +32,7 @@
<ClCompile Include="..\..\..\Source\Common\Config.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="ReaderLibTests.cpp" />
</ItemGroup>
<ItemGroup>
<Filter Include="Common">