Adding support for uchar
This commit is contained in:
Родитель
3734f7d229
Коммит
789b079c6f
|
@ -470,14 +470,12 @@ void TextParser<ElemType>::FillSequenceMetadata(SequenceBuffer& sequenceData, si
|
|||
{
|
||||
auto denseData = static_cast<DenseInputStreamBuffer*>(data);
|
||||
denseData->m_sampleLayout = m_streams[j]->m_sampleLayout;
|
||||
data->m_data = denseData->m_buffer.data();
|
||||
}
|
||||
else
|
||||
{
|
||||
auto sparseData = static_cast<SparseInputStreamBuffer*>(data);
|
||||
sparseData->m_indices = sparseData->m_indicesBuffer.data();
|
||||
assert(data->m_numberOfSamples == sparseData->m_nnzCounts.size());
|
||||
data->m_data = sparseData->m_buffer.data();
|
||||
}
|
||||
|
||||
data->m_id = sequenceId;
|
||||
|
|
|
@ -50,6 +50,11 @@ private:
|
|||
m_buffer.reserve(capacity);
|
||||
}
|
||||
|
||||
const void* GetDataBuffer() override
|
||||
{
|
||||
return m_buffer.data();
|
||||
}
|
||||
|
||||
std::vector<ElemType> m_buffer;
|
||||
};
|
||||
|
||||
|
@ -63,6 +68,11 @@ private:
|
|||
m_totalNnzCount = 0;
|
||||
}
|
||||
|
||||
const void* GetDataBuffer() override
|
||||
{
|
||||
return m_buffer.data();
|
||||
}
|
||||
|
||||
std::vector<IndexType> m_indicesBuffer;
|
||||
std::vector<ElemType> m_buffer;
|
||||
};
|
||||
|
|
|
@ -239,24 +239,28 @@ void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerC
|
|||
argvector<ConfigParameters> transforms = input("transforms");
|
||||
for (size_t j = 0; j < transforms.size(); ++j)
|
||||
{
|
||||
TransformerPtr transformer = CreateTransformer(transforms[j], defaultModule);
|
||||
TransformerPtr transformer = CreateTransformer(transforms[j], defaultModule, std::wstring());
|
||||
m_transforms.push_back(Transformation{transformer, inputName});
|
||||
}
|
||||
}
|
||||
|
||||
// Let's add a cast transformer by default. It is noop if the type provided by others is float
|
||||
// or double, but will do a proper cast if the type is uchar.
|
||||
auto cast = CreateTransformer(input, defaultModule, std::wstring(L"Cast"));
|
||||
m_transforms.push_back(Transformation{ cast, inputName });
|
||||
}
|
||||
}
|
||||
|
||||
// Create a transformer for a particular configuration. Loading it from the module of the deserializer if module is not specified, i.e.
|
||||
// transforms = [
|
||||
// [type = "Scale" width=...]:...
|
||||
TransformerPtr CompositeDataReader::CreateTransformer(const ConfigParameters& config, const string& defaultModule)
|
||||
TransformerPtr CompositeDataReader::CreateTransformer(const ConfigParameters& config, const string& defaultModule, const std::wstring& type)
|
||||
{
|
||||
typedef bool(*TransformerFactory) (Transformer** t, const std::wstring& type, const ConfigParameters& cfg);
|
||||
|
||||
std::string transformerModule = config("module", defaultModule.c_str());
|
||||
TransformerFactory f = (TransformerFactory)Plugin::Load(transformerModule, "CreateTransformer");
|
||||
|
||||
std::wstring transformerType = config("type");
|
||||
std::wstring transformerType = type.empty() ? config("type") : type;
|
||||
Transformer* t;
|
||||
if (!f(&t, transformerType, config))
|
||||
{
|
||||
|
|
|
@ -68,7 +68,7 @@ private:
|
|||
void CreateTransforms(const ConfigParameters& deserializerConfig);
|
||||
|
||||
IDataDeserializerPtr CreateDeserializer(const ConfigParameters& readerConfig, bool primary);
|
||||
TransformerPtr CreateTransformer(const ConfigParameters& config, const std::string& defaultModule);
|
||||
TransformerPtr CreateTransformer(const ConfigParameters& config, const std::string& defaultModule, const std::wstring& transformerType);
|
||||
|
||||
|
||||
enum class PackingMode
|
||||
|
|
|
@ -420,7 +420,11 @@ struct HTKFloatSequenceData : DenseSequenceData
|
|||
{
|
||||
RuntimeError("Maximum number of samples per sequence exceeded.");
|
||||
}
|
||||
m_data = m_buffer.GetData();
|
||||
}
|
||||
|
||||
const void* GetDataBuffer() override
|
||||
{
|
||||
return m_buffer.GetData();
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -437,7 +441,11 @@ struct HTKDoubleSequenceData : DenseSequenceData
|
|||
{
|
||||
RuntimeError("Maximum number of samples per sequence exceeded.");
|
||||
}
|
||||
m_data = m_buffer.data();
|
||||
}
|
||||
|
||||
const void* GetDataBuffer() override
|
||||
{
|
||||
return m_buffer.data();
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
@ -9,10 +9,12 @@
|
|||
#include <limits>
|
||||
#include "MLFDataDeserializer.h"
|
||||
#include "ConfigHelper.h"
|
||||
#include "SequenceData.h"
|
||||
#include "../HTKMLFReader/htkfeatio.h"
|
||||
#include "../HTKMLFReader/msra_mgram.h"
|
||||
#include "latticearchive.h"
|
||||
|
||||
|
||||
#undef max // max is defined in minwindef.h
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
@ -201,7 +203,7 @@ void MLFDataDeserializer::InitializeChunkDescriptions(CorpusDescriptorPtr corpus
|
|||
m_categoryIndices.reserve(dimension);
|
||||
for (size_t i = 0; i < dimension; ++i)
|
||||
{
|
||||
SparseSequenceDataPtr category = make_shared<SparseSequenceData>();
|
||||
auto category = make_shared<CategorySequenceData>();
|
||||
m_categoryIndices.push_back(static_cast<IndexType>(i));
|
||||
category->m_indices = &(m_categoryIndices[i]);
|
||||
category->m_nnzCounts.resize(1);
|
||||
|
@ -283,7 +285,11 @@ struct MLFSequenceData : SparseSequenceData
|
|||
m_numberOfSamples = (uint32_t) numberOfSamples;
|
||||
m_totalNnzCount = static_cast<IndexType>(numberOfSamples);
|
||||
m_indices = m_indicesPtr.get();
|
||||
m_data = m_values.data();
|
||||
}
|
||||
|
||||
const void* GetDataBuffer() override
|
||||
{
|
||||
return m_values.data();
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -34,6 +34,8 @@ extern "C" DATAREADER_API void GetReaderD(IDataReader** preader)
|
|||
*preader = new ReaderShim<double>(factory);
|
||||
}
|
||||
|
||||
//TODO: Names of transforms and deserializers should be case insensitive.
|
||||
|
||||
// TODO: Not safe from the ABI perspective. Will be uglified to make the interface ABI.
|
||||
// A factory method for creating image deserializers.
|
||||
extern "C" DATAREADER_API bool CreateDeserializer(IDataDeserializer** deserializer, const std::wstring& type, const ConfigParameters& deserializerConfig, CorpusDescriptorPtr corpus, bool)
|
||||
|
@ -63,6 +65,8 @@ extern "C" DATAREADER_API bool CreateTransformer(Transformer** transformer, cons
|
|||
*transformer = new MeanTransformer(config);
|
||||
else if (type == L"Transpose")
|
||||
*transformer = new TransposeTransformer(config);
|
||||
else if (type == L"Cast")
|
||||
*transformer = new CastTransformer(config);
|
||||
else
|
||||
// Unknown type.
|
||||
return false;
|
||||
|
|
|
@ -14,13 +14,16 @@
|
|||
#include "StringUtil.h"
|
||||
#include "ConfigUtil.h"
|
||||
#include "TimerUtility.h"
|
||||
#include "ImageTransformers.h"
|
||||
#include "SequenceData.h"
|
||||
#include "ImageUtil.h"
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
class ImageDataDeserializer::LabelGenerator
|
||||
{
|
||||
public:
|
||||
virtual void CreateLabelFor(size_t classId, SparseSequenceData& data) = 0;
|
||||
virtual void CreateLabelFor(size_t classId, CategorySequenceData& data) = 0;
|
||||
virtual ~LabelGenerator() { }
|
||||
};
|
||||
|
||||
|
@ -43,7 +46,7 @@ public:
|
|||
iota(m_indices.begin(), m_indices.end(), 0);
|
||||
}
|
||||
|
||||
virtual void CreateLabelFor(size_t classId, SparseSequenceData& data) override
|
||||
virtual void CreateLabelFor(size_t classId, CategorySequenceData& data) override
|
||||
{
|
||||
data.m_nnzCounts.resize(1);
|
||||
data.m_nnzCounts[0] = 1;
|
||||
|
@ -57,12 +60,6 @@ private:
|
|||
vector<IndexType> m_indices;
|
||||
};
|
||||
|
||||
// Used to keep track of the image. Accessed only using DenseSequenceData interface.
|
||||
struct DeserializedImage : DenseSequenceData
|
||||
{
|
||||
cv::Mat m_image;
|
||||
};
|
||||
|
||||
// For image, chunks correspond to a single image.
|
||||
class ImageDataDeserializer::ImageChunk : public Chunk, public std::enable_shared_from_this<ImageChunk>
|
||||
{
|
||||
|
@ -80,42 +77,47 @@ public:
|
|||
assert(sequenceId == m_description.m_id);
|
||||
const auto& imageSequence = m_description;
|
||||
|
||||
auto image = std::make_shared<DeserializedImage>();
|
||||
auto image = std::make_shared<ImageSequenceData>();
|
||||
image->m_image = std::move(m_parent.ReadImage(m_description.m_id, imageSequence.m_path, m_parent.m_grayscale));
|
||||
auto& cvImage = image->m_image;
|
||||
|
||||
if (!cvImage.data)
|
||||
{
|
||||
RuntimeError("Cannot open file '%s'", imageSequence.m_path.c_str());
|
||||
}
|
||||
|
||||
// Convert element type.
|
||||
int dataType = m_parent.m_featureElementType == ElementType::tfloat ? CV_32F : CV_64F;
|
||||
if (cvImage.type() != CV_MAKETYPE(dataType, cvImage.channels()))
|
||||
{
|
||||
cvImage.convertTo(cvImage, dataType);
|
||||
}
|
||||
|
||||
ElementType dataType = ConvertImageToSupportedDataType(cvImage);
|
||||
if (!cvImage.isContinuous())
|
||||
{
|
||||
cvImage = cvImage.clone();
|
||||
}
|
||||
assert(cvImage.isContinuous());
|
||||
|
||||
image->m_data = image->m_image.data;
|
||||
ImageDimensions dimensions(cvImage.cols, cvImage.rows, cvImage.channels());
|
||||
image->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(HWC));
|
||||
image->m_id = sequenceId;
|
||||
image->m_numberOfSamples = 1;
|
||||
image->m_chunk = shared_from_this();
|
||||
image->m_elementType = dataType;
|
||||
result.push_back(image);
|
||||
|
||||
SparseSequenceDataPtr label = std::make_shared<SparseSequenceData>();
|
||||
auto label = std::make_shared<CategorySequenceData>();
|
||||
label->m_chunk = shared_from_this();
|
||||
m_parent.m_labelGenerator->CreateLabelFor(imageSequence.m_classId, *label);
|
||||
label->m_numberOfSamples = 1;
|
||||
result.push_back(label);
|
||||
}
|
||||
|
||||
private:
|
||||
ElementType ConvertImageToSupportedDataType(cv::Mat& image)
|
||||
{
|
||||
ElementType resultType;
|
||||
if (!IdentifyElementTypeFromOpenCVType(image.depth(), resultType))
|
||||
{
|
||||
// Could not identify element type.
|
||||
// Natively unsupported image type. Let's convert it to required precision.
|
||||
int requiredType = m_parent.m_precision == ElementType::tfloat ? CV_32F : CV_64F;
|
||||
image.convertTo(image, requiredType);
|
||||
resultType = m_parent.m_precision;
|
||||
}
|
||||
return resultType;
|
||||
}
|
||||
};
|
||||
|
||||
// A new constructor to support new compositional configuration,
|
||||
|
@ -136,6 +138,8 @@ ImageDataDeserializer::ImageDataDeserializer(CorpusDescriptorPtr corpus, const C
|
|||
}
|
||||
|
||||
string precision = (ConfigValue)config("precision", "float");
|
||||
m_precision = AreEqualIgnoreCase(precision, "float") ? ElementType::tfloat : ElementType::tdouble;
|
||||
|
||||
m_verbosity = config(L"verbosity", 0);
|
||||
|
||||
// Feature stream.
|
||||
|
@ -144,9 +148,10 @@ ImageDataDeserializer::ImageDataDeserializer(CorpusDescriptorPtr corpus, const C
|
|||
features->m_id = 0;
|
||||
features->m_name = msra::strfun::utf16(featureSection.ConfigName());
|
||||
features->m_storageType = StorageType::dense;
|
||||
features->m_elementType = AreEqualIgnoreCase(precision, "float") ? ElementType::tfloat : ElementType::tdouble;
|
||||
|
||||
// Due to performance, now we support images of different types.
|
||||
features->m_elementType = ElementType::tvariant;
|
||||
m_streams.push_back(features);
|
||||
m_featureElementType = features->m_elementType;
|
||||
|
||||
// Label stream.
|
||||
ConfigParameters label = inputs(labelNames[0]);
|
||||
|
@ -156,7 +161,7 @@ ImageDataDeserializer::ImageDataDeserializer(CorpusDescriptorPtr corpus, const C
|
|||
labels->m_name = msra::strfun::utf16(label.ConfigName());
|
||||
labels->m_sampleLayout = std::make_shared<TensorShape>(labelDimension);
|
||||
labels->m_storageType = StorageType::sparse_csc;
|
||||
labels->m_elementType = AreEqualIgnoreCase(precision, "float") ? ElementType::tfloat : ElementType::tdouble;
|
||||
labels->m_elementType = m_precision;
|
||||
m_streams.push_back(labels);
|
||||
|
||||
m_labelGenerator = labels->m_elementType == ElementType::tfloat ?
|
||||
|
@ -184,6 +189,9 @@ ImageDataDeserializer::ImageDataDeserializer(const ConfigParameters& config)
|
|||
|
||||
m_verbosity = config(L"verbosity", 0);
|
||||
|
||||
string precision = (ConfigValue)config("precision", "float");
|
||||
m_precision = AreEqualIgnoreCase(precision, "float") ? ElementType::tfloat : ElementType::tdouble;
|
||||
|
||||
// Expect data in HWC.
|
||||
ImageDimensions dimensions(*feature->m_sampleLayout, configHelper.GetDataFormat());
|
||||
feature->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(HWC));
|
||||
|
@ -191,7 +199,9 @@ ImageDataDeserializer::ImageDataDeserializer(const ConfigParameters& config)
|
|||
label->m_storageType = StorageType::sparse_csc;
|
||||
feature->m_storageType = StorageType::dense;
|
||||
|
||||
m_featureElementType = feature->m_elementType;
|
||||
// Due to performance, now we support images of different types.
|
||||
feature->m_elementType = ElementType::tvariant;
|
||||
|
||||
size_t labelDimension = label->m_sampleLayout->GetDim(0);
|
||||
|
||||
if (label->m_elementType == ElementType::tfloat)
|
||||
|
|
|
@ -64,8 +64,8 @@ private:
|
|||
// Mapping of logical sequence key into sequence description.
|
||||
std::map<size_t, size_t> m_keyToSequence;
|
||||
|
||||
// Element type of the feature/label stream (currently float/double only).
|
||||
ElementType m_featureElementType;
|
||||
// Precision required by the network.
|
||||
ElementType m_precision;
|
||||
|
||||
// whether images shall be loaded in grayscale
|
||||
bool m_grayscale;
|
||||
|
|
|
@ -72,6 +72,10 @@ ImageReader::ImageReader(const ConfigParameters& config)
|
|||
transformations.push_back(Transformation{ std::make_shared<TransposeTransformer>(featureStream), featureName });
|
||||
}
|
||||
|
||||
// We should always have cast at the end.
|
||||
// It is noop if the matrix element type is already expected by the packer.
|
||||
transformations.push_back(Transformation{ std::make_shared<CastTransformer>(featureStream), featureName });
|
||||
|
||||
m_sequenceEnumerator = std::make_shared<TransformController>(transformations, randomizer);
|
||||
|
||||
m_packer = std::make_shared<FramePacker>(
|
||||
|
|
|
@ -109,6 +109,7 @@
|
|||
<ClInclude Include="ImageDataDeserializer.h" />
|
||||
<ClInclude Include="ImageReader.h" />
|
||||
<ClInclude Include="ImageTransformers.h" />
|
||||
<ClInclude Include="ImageUtil.h" />
|
||||
<ClInclude Include="stdafx.h" />
|
||||
<ClInclude Include="targetver.h" />
|
||||
</ItemGroup>
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
<ClInclude Include="ImageReader.h" />
|
||||
<ClInclude Include="ImageConfigHelper.h" />
|
||||
<ClInclude Include="ByteReader.h" />
|
||||
<ClInclude Include="ImageUtil.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="Common">
|
||||
|
|
|
@ -13,79 +13,27 @@
|
|||
#include "Config.h"
|
||||
#include "ConcStack.h"
|
||||
#include "StringUtil.h"
|
||||
#include "ElementTypeUtils.h"
|
||||
#include "SequenceData.h"
|
||||
#include "ImageUtil.h"
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK
|
||||
{
|
||||
|
||||
struct ImageSequenceData : DenseSequenceData
|
||||
{
|
||||
cv::Mat m_image;
|
||||
// In case we do not copy data - we have to preserve the original sequence.
|
||||
SequenceDataPtr m_original;
|
||||
};
|
||||
|
||||
ImageTransformerBase::ImageTransformerBase(const ConfigParameters& readerConfig) : m_imageElementType(0)
|
||||
{
|
||||
m_seed = readerConfig(L"seed", 0u);
|
||||
}
|
||||
|
||||
// The method describes how input stream is transformed to the output stream. Called once per applied stream.
|
||||
// Currently for image transformations we only support dense streams of type double or float.
|
||||
StreamDescription ImageTransformerBase::Transform(const StreamDescription& inputStream)
|
||||
{
|
||||
m_inputStream = inputStream;
|
||||
m_outputStream = m_inputStream;
|
||||
|
||||
if (m_inputStream.m_storageType != StorageType::dense)
|
||||
{
|
||||
LogicError("ImageTransformerBase supports only dense input streams.");
|
||||
}
|
||||
|
||||
if (m_inputStream.m_elementType == ElementType::tdouble)
|
||||
{
|
||||
m_imageElementType = CV_64F;
|
||||
}
|
||||
else if (m_inputStream.m_elementType == ElementType::tfloat)
|
||||
{
|
||||
m_imageElementType = CV_32F;
|
||||
}
|
||||
else
|
||||
{
|
||||
RuntimeError("Unsupported type");
|
||||
}
|
||||
|
||||
return m_outputStream;
|
||||
}
|
||||
|
||||
// Transforms a single sequence as open cv dense image. Called once per sequence.
|
||||
SequenceDataPtr ImageTransformerBase::Transform(SequenceDataPtr sequence)
|
||||
{
|
||||
auto inputSequence = static_cast<const DenseSequenceData&>(*sequence);
|
||||
|
||||
ImageDimensions dimensions(*inputSequence.m_sampleLayout, HWC);
|
||||
int columns = static_cast<int>(dimensions.m_width);
|
||||
int rows = static_cast<int>(dimensions.m_height);
|
||||
int channels = static_cast<int>(dimensions.m_numChannels);
|
||||
auto inputSequence = dynamic_cast<ImageSequenceData*>(sequence.get());
|
||||
if (inputSequence == nullptr)
|
||||
RuntimeError("Unexpected sequence provided");
|
||||
|
||||
auto result = std::make_shared<ImageSequenceData>();
|
||||
int type = CV_MAKETYPE(m_imageElementType, channels);
|
||||
cv::Mat buffer = cv::Mat(rows, columns, type, inputSequence.m_data);
|
||||
Apply(sequence->m_id, buffer);
|
||||
if (!buffer.isContinuous())
|
||||
{
|
||||
buffer = buffer.clone();
|
||||
}
|
||||
else
|
||||
{
|
||||
result->m_original = sequence;
|
||||
}
|
||||
assert(buffer.isContinuous());
|
||||
result->m_image = buffer;
|
||||
result->m_data = buffer.ptr();
|
||||
result->m_numberOfSamples = inputSequence.m_numberOfSamples;
|
||||
Apply(sequence->m_id, inputSequence->m_image);
|
||||
|
||||
ImageDimensions outputDimensions(buffer.cols, buffer.rows, buffer.channels());
|
||||
result->m_image = inputSequence->m_image;
|
||||
result->m_numberOfSamples = inputSequence->m_numberOfSamples;
|
||||
result->m_elementType = GetElementTypeFromOpenCVType(inputSequence->m_image.depth());
|
||||
|
||||
ImageDimensions outputDimensions(inputSequence->m_image.cols, inputSequence->m_image.rows, inputSequence->m_image.channels());
|
||||
result->m_sampleLayout = std::make_shared<TensorShape>(outputDimensions.AsTensorShape(HWC));
|
||||
return result;
|
||||
}
|
||||
|
@ -327,7 +275,7 @@ ScaleTransformer::ScaleTransformer(const ConfigParameters& config) : ImageTransf
|
|||
// Scale transformer transforms the stream so that all samples are of the same size.
|
||||
StreamDescription ScaleTransformer::Transform(const StreamDescription& inputStream)
|
||||
{
|
||||
ImageTransformerBase::Transform(inputStream);
|
||||
TransformBase::Transform(inputStream);
|
||||
m_outputStream.m_sampleLayout = std::make_shared<TensorShape>(ImageDimensions(m_imgWidth, m_imgHeight, m_imgChannels).AsTensorShape(HWC));
|
||||
return m_outputStream;
|
||||
}
|
||||
|
@ -336,19 +284,21 @@ void ScaleTransformer::Apply(size_t id, cv::Mat &mat)
|
|||
{
|
||||
UNUSED(id);
|
||||
|
||||
// If matrix has not been converted to the right type, do it now as rescaling
|
||||
// requires floating point type.
|
||||
if (mat.type() != CV_MAKETYPE(m_imageElementType, m_imgChannels))
|
||||
{
|
||||
mat.convertTo(mat, m_imageElementType);
|
||||
}
|
||||
|
||||
auto seed = GetSeed();
|
||||
auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
|
||||
|
||||
auto index = UniIntT(0, static_cast<int>(m_interp.size()) - 1)(*rng);
|
||||
assert(m_interp.size() > 0);
|
||||
cv::resize(mat, mat, cv::Size((int)m_imgWidth, (int)m_imgHeight), 0, 0, m_interp[index]);
|
||||
|
||||
// Skip cv::resize depending on interpolation only
|
||||
// There is no point in interpolation of the image of the same size, this
|
||||
// will only lower its sharpness.
|
||||
if (mat.cols != m_imgWidth || mat.rows != m_imgHeight)
|
||||
{
|
||||
// If matrix has not been converted to the right type, do it now as rescaling requires floating point type.
|
||||
ConvertToFloatingPointIfRequired(mat);
|
||||
cv::resize(mat, mat, cv::Size((int)m_imgWidth, (int)m_imgHeight), 0, 0, m_interp[index]);
|
||||
}
|
||||
|
||||
m_rngs.push(std::move(rng));
|
||||
}
|
||||
|
@ -390,100 +340,128 @@ void MeanTransformer::Apply(size_t id, cv::Mat &mat)
|
|||
(m_meanImg.size() == mat.size() &&
|
||||
m_meanImg.channels() == mat.channels()));
|
||||
|
||||
// REVIEW alexeyk: check type conversion (float/double).
|
||||
if (m_meanImg.size() == mat.size())
|
||||
{
|
||||
// If matrix has not been converted to the right type, do it now as maen requires floating point type.
|
||||
ConvertToFloatingPointIfRequired(mat);
|
||||
mat = mat - m_meanImg;
|
||||
}
|
||||
}
|
||||
|
||||
TransposeTransformer::TransposeTransformer(const ConfigParameters& config) : TransformBase(config),
|
||||
m_floatTransform(this), m_doubleTransform(this)
|
||||
{}
|
||||
|
||||
// The method describes how input stream is transformed to the output stream. Called once per applied stream.
|
||||
// Transpose transformer expects the dense input stream with samples as HWC and outputs CHW.
|
||||
StreamDescription TransposeTransformer::Transform(const StreamDescription& inputStream)
|
||||
{
|
||||
m_inputStream = inputStream;
|
||||
if (m_inputStream.m_storageType != StorageType::dense)
|
||||
{
|
||||
LogicError("Transpose transformer supports only dense streams.");
|
||||
}
|
||||
m_outputStream = TransformBase::Transform(inputStream);
|
||||
|
||||
// Changing from NHWC to NCHW
|
||||
m_outputStream = m_inputStream;
|
||||
m_outputStream.m_elementType = m_precision;
|
||||
if (m_inputStream.m_sampleLayout != nullptr)
|
||||
{
|
||||
ImageDimensions dimensions(*m_inputStream.m_sampleLayout, HWC);
|
||||
m_outputStream.m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(CHW));
|
||||
}
|
||||
|
||||
return m_outputStream;
|
||||
}
|
||||
|
||||
// Transformation of the sequence.
|
||||
SequenceDataPtr TransposeTransformer::Transform(SequenceDataPtr sequence)
|
||||
{
|
||||
if (m_inputStream.m_elementType == ElementType::tdouble)
|
||||
{
|
||||
return TypedTransform<double>(sequence);
|
||||
}
|
||||
auto inputSequence = dynamic_cast<ImageSequenceData*>(sequence.get());
|
||||
if (inputSequence == nullptr)
|
||||
RuntimeError("Currently Transpose transform only works with images.");
|
||||
|
||||
if (m_inputStream.m_elementType == ElementType::tfloat)
|
||||
{
|
||||
return TypedTransform<float>(sequence);
|
||||
}
|
||||
ElementType elementType = m_inputStream.m_elementType != ElementType::tvariant ?
|
||||
m_inputStream.m_elementType :
|
||||
sequence->m_elementType;
|
||||
|
||||
RuntimeError("Unsupported type");
|
||||
switch (elementType)
|
||||
{
|
||||
case ElementType::tdouble:
|
||||
if (m_precision == ElementType::tfloat)
|
||||
return m_floatTransform.Apply<double>(inputSequence);
|
||||
if (m_precision == ElementType::tdouble)
|
||||
return m_doubleTransform.Apply<double>(inputSequence);
|
||||
case ElementType::tfloat:
|
||||
if (m_precision == ElementType::tdouble)
|
||||
return m_doubleTransform.Apply<float>(inputSequence);
|
||||
if (m_precision == ElementType::tfloat)
|
||||
return m_floatTransform.Apply<float>(inputSequence);
|
||||
case ElementType::tuchar:
|
||||
if (m_precision == ElementType::tdouble)
|
||||
return m_doubleTransform.Apply<unsigned char>(inputSequence);
|
||||
if (m_precision == ElementType::tfloat)
|
||||
return m_floatTransform.Apply<unsigned char>(inputSequence);
|
||||
default:
|
||||
RuntimeError("Unsupported type. Please apply a cast transform with 'double' or 'float' precision.");
|
||||
}
|
||||
return nullptr; // Make compiler happy
|
||||
}
|
||||
|
||||
// The class represents a sequence that owns an internal data buffer.
|
||||
// Passed from the TransposeTransformer.
|
||||
// TODO: Transposition potentially could be done in place (alexeyk: performance might be much worse than of out-of-place transpose).
|
||||
struct DenseSequenceWithBuffer : DenseSequenceData
|
||||
template <class TElementTo>
|
||||
template<class TElementFrom>
|
||||
SequenceDataPtr TransposeTransformer::TypedTranspose<TElementTo>::Apply(ImageSequenceData* inputSequence)
|
||||
{
|
||||
std::vector<char> m_buffer;
|
||||
};
|
||||
TensorShapePtr shape = m_parent->m_inputStream.m_sampleLayout;
|
||||
if (shape == nullptr) // Taking the shape from the sequence.
|
||||
shape = inputSequence->m_sampleLayout;
|
||||
|
||||
template <class TElemType>
|
||||
SequenceDataPtr TransposeTransformer::TypedTransform(SequenceDataPtr sequence)
|
||||
{
|
||||
TensorShapePtr shape = m_inputStream.m_sampleLayout;
|
||||
if (shape == nullptr)
|
||||
{
|
||||
// Taking the shape from the sequence.
|
||||
shape = sequence->m_sampleLayout;
|
||||
}
|
||||
if (!shape)
|
||||
RuntimeError("Unknown shape of the sample in stream '%ls'.", m_parent->m_inputStream.m_name.c_str());
|
||||
|
||||
if (shape == nullptr)
|
||||
{
|
||||
RuntimeError("Unknown shape of the sample in stream '%ls'.", m_inputStream.m_name.c_str());
|
||||
}
|
||||
assert(inputSequence->m_numberOfSamples == 1);
|
||||
|
||||
auto inputSequence = static_cast<DenseSequenceData&>(*sequence);
|
||||
assert(inputSequence.m_numberOfSamples == 1);
|
||||
|
||||
size_t count = shape->GetNumElements() * GetSizeByType(m_inputStream.m_elementType);
|
||||
|
||||
auto result = std::make_shared<DenseSequenceWithBuffer>();
|
||||
result->m_buffer.resize(count);
|
||||
size_t count = shape->GetNumElements();
|
||||
auto result = std::make_shared<DenseSequenceWithBuffer<TElementTo>>(m_memBuffers, count);
|
||||
|
||||
ImageDimensions dimensions(*shape, ImageLayoutKind::HWC);
|
||||
size_t rowCount = dimensions.m_height * dimensions.m_width;
|
||||
size_t channelCount = dimensions.m_numChannels;
|
||||
|
||||
auto src = reinterpret_cast<TElemType*>(inputSequence.m_data);
|
||||
auto dst = reinterpret_cast<TElemType*>(result->m_buffer.data());
|
||||
auto dst = result->GetBuffer();
|
||||
|
||||
for (size_t irow = 0; irow < rowCount; irow++)
|
||||
if (channelCount == 3) // Unrolling for BGR, the most common case.
|
||||
{
|
||||
for (size_t icol = 0; icol < channelCount; icol++)
|
||||
size_t nRows = inputSequence->m_image.rows;
|
||||
size_t nCols = inputSequence->m_image.cols;
|
||||
|
||||
TElementTo* b = dst;
|
||||
TElementTo* g = dst + rowCount;
|
||||
TElementTo* r = dst + 2 * rowCount;
|
||||
|
||||
for (size_t i = 0; i < nRows; ++i)
|
||||
{
|
||||
dst[icol * rowCount + irow] = src[irow * channelCount + icol];
|
||||
auto* x = inputSequence->m_image.ptr<TElementFrom>((int)i);
|
||||
for (size_t j = 0; j < nCols; ++j)
|
||||
{
|
||||
auto row = j * 3;
|
||||
*b++ = static_cast<TElementTo>(x[row]);
|
||||
*g++ = static_cast<TElementTo>(x[row + 1]);
|
||||
*r++ = static_cast<TElementTo>(x[row + 2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto src = reinterpret_cast<const TElementFrom*>(inputSequence->GetDataBuffer());
|
||||
for (size_t irow = 0; irow < rowCount; irow++)
|
||||
{
|
||||
for (size_t icol = 0; icol < channelCount; icol++)
|
||||
{
|
||||
dst[icol * rowCount + irow] = static_cast<TElementTo>(src[irow * channelCount + icol]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result->m_sampleLayout = m_outputStream.m_sampleLayout != nullptr ?
|
||||
m_outputStream.m_sampleLayout :
|
||||
std::make_shared<TensorShape>(dimensions.AsTensorShape(CHW));;
|
||||
result->m_data = result->m_buffer.data();
|
||||
result->m_numberOfSamples = inputSequence.m_numberOfSamples;
|
||||
result->m_sampleLayout = m_parent->m_outputStream.m_sampleLayout != nullptr ?
|
||||
m_parent->m_outputStream.m_sampleLayout :
|
||||
std::make_shared<TensorShape>(dimensions.AsTensorShape(CHW));
|
||||
result->m_numberOfSamples = inputSequence->m_numberOfSamples;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -527,6 +505,11 @@ void IntensityTransformer::Apply(size_t id, cv::Mat &mat)
|
|||
if (m_eigVal.empty() || m_eigVec.empty() || m_curStdDev == 0)
|
||||
return;
|
||||
|
||||
// Have to convert to float.
|
||||
int type = m_precision == ElementType::tfloat ? CV_32F : CV_64F;
|
||||
if (mat.type() != type)
|
||||
mat.convertTo(mat, type);
|
||||
|
||||
if (mat.type() == CV_64FC(mat.channels()))
|
||||
Apply<double>(mat);
|
||||
else if (mat.type() == CV_32FC(mat.channels()))
|
||||
|
@ -601,6 +584,9 @@ void ColorTransformer::Apply(size_t id, cv::Mat &mat)
|
|||
if (m_curBrightnessRadius == 0 && m_curContrastRadius == 0 && m_curSaturationRadius == 0)
|
||||
return;
|
||||
|
||||
// Have to convert to float
|
||||
ConvertToFloatingPointIfRequired(mat);
|
||||
|
||||
if (mat.type() == CV_64FC(mat.channels()))
|
||||
Apply<double>(mat);
|
||||
else if (mat.type() == CV_32FC(mat.channels()))
|
||||
|
@ -674,4 +660,81 @@ void ColorTransformer::Apply(cv::Mat &mat)
|
|||
m_rngs.push(std::move(rng));
|
||||
}
|
||||
|
||||
CastTransformer::CastTransformer(const ConfigParameters& config) : TransformBase(config), m_floatTransform(this), m_doubleTransform(this)
|
||||
{
|
||||
}
|
||||
|
||||
StreamDescription CastTransformer::Transform(const StreamDescription& inputStream)
|
||||
{
|
||||
m_outputStream = TransformBase::Transform(inputStream);
|
||||
m_outputStream.m_elementType = m_precision;
|
||||
return m_outputStream;
|
||||
}
|
||||
|
||||
SequenceDataPtr CastTransformer::Transform(SequenceDataPtr sequence)
|
||||
{
|
||||
if (m_inputStream.m_elementType == m_precision || sequence->m_elementType == m_precision)
|
||||
{
|
||||
// No need to do anything, exit.
|
||||
return sequence;
|
||||
}
|
||||
|
||||
SequenceDataPtr result;
|
||||
ElementType inputType = m_inputStream.m_elementType != ElementType::tvariant
|
||||
? m_inputStream.m_elementType
|
||||
: sequence->m_elementType;
|
||||
|
||||
switch (m_precision)
|
||||
{
|
||||
case ElementType::tdouble:
|
||||
if (inputType == ElementType::tfloat)
|
||||
result = m_doubleTransform.Apply<float>(sequence);
|
||||
else if (inputType == ElementType::tuchar)
|
||||
result = m_doubleTransform.Apply<unsigned char>(sequence);
|
||||
else
|
||||
RuntimeError("Unsupported type. Please apply a cast transform with 'double' or 'float' precision.");
|
||||
break;
|
||||
case ElementType::tfloat:
|
||||
if (inputType == ElementType::tdouble)
|
||||
result = m_floatTransform.Apply<double>(sequence);
|
||||
if (inputType == ElementType::tuchar)
|
||||
result = m_floatTransform.Apply<unsigned char>(sequence);
|
||||
else
|
||||
RuntimeError("Unsupported type. Please apply a cast transform with 'double' or 'float' precision.");
|
||||
break;
|
||||
default:
|
||||
RuntimeError("Unsupported type. Please apply a cast transform with 'double' or 'float' precision.");
|
||||
}
|
||||
result->m_elementType = m_precision;
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class TElementTo>
|
||||
template<class TElementFrom>
|
||||
SequenceDataPtr CastTransformer::TypedCast<TElementTo>::Apply(SequenceDataPtr sequence)
|
||||
{
|
||||
TensorShapePtr shape = m_parent->m_inputStream.m_sampleLayout;
|
||||
if (!shape) // Taking the shape from the sequence.
|
||||
shape = sequence->m_sampleLayout;
|
||||
|
||||
if (!shape)
|
||||
RuntimeError("Unknown shape of the sample in stream '%ls'.", m_parent->m_inputStream.m_name.c_str());
|
||||
|
||||
auto& inputSequence = static_cast<DenseSequenceData&>(*sequence);
|
||||
size_t count = shape->GetNumElements() * sequence->m_numberOfSamples;
|
||||
auto result = std::make_shared<DenseSequenceWithBuffer<TElementTo>>(m_memBuffers, count);
|
||||
|
||||
auto src = reinterpret_cast<const TElementFrom*>(inputSequence.GetDataBuffer());
|
||||
auto dst = result->GetBuffer();
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
dst[i] = static_cast<TElementTo>(src[i]);
|
||||
}
|
||||
|
||||
result->m_sampleLayout = shape;
|
||||
result->m_numberOfSamples = inputSequence.m_numberOfSamples;
|
||||
return result;
|
||||
}
|
||||
|
||||
}}}
|
||||
|
|
|
@ -8,52 +8,70 @@
|
|||
#include <unordered_map>
|
||||
#include <random>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <boost/random/uniform_int_distribution.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
|
||||
#include "Transformer.h"
|
||||
#include "ConcStack.h"
|
||||
#include "Config.h"
|
||||
#include "ImageConfigHelper.h"
|
||||
#include <boost/random/uniform_int_distribution.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "TransformBase.h"
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// Sequence data that is used for images.
|
||||
struct ImageSequenceData : DenseSequenceData
|
||||
{
|
||||
cv::Mat m_image;
|
||||
|
||||
const void* GetDataBuffer() override
|
||||
{
|
||||
if (!m_image.isContinuous())
|
||||
{
|
||||
// According to the contract, dense sequence data
|
||||
// should return continuous data buffer.
|
||||
// TODO: This is potentially an expensive operation. Need to do some logging.
|
||||
m_image = m_image.clone();
|
||||
}
|
||||
|
||||
return m_image.ptr();
|
||||
}
|
||||
};
|
||||
|
||||
class ConfigParameters;
|
||||
|
||||
// Base class for image transformations based on OpenCV
|
||||
// that helps to wrap the sequences into OpenCV::Mat class.
|
||||
class ImageTransformerBase : public Transformer
|
||||
class ImageTransformerBase : public TransformBase
|
||||
{
|
||||
public:
|
||||
explicit ImageTransformerBase(const ConfigParameters& config);
|
||||
|
||||
void StartEpoch(const EpochConfiguration&) override {}
|
||||
|
||||
// Transformation of the stream.
|
||||
StreamDescription Transform(const StreamDescription& inputStream) override;
|
||||
explicit ImageTransformerBase(const ConfigParameters& config) : TransformBase(config)
|
||||
{};
|
||||
|
||||
// Transformation of the sequence.
|
||||
SequenceDataPtr Transform(SequenceDataPtr sequence) override;
|
||||
|
||||
protected:
|
||||
// Seed getter.
|
||||
unsigned int GetSeed() const
|
||||
{
|
||||
return m_seed;
|
||||
}
|
||||
|
||||
using Base = Transformer;
|
||||
using UniRealT = boost::random::uniform_real_distribution<double>;
|
||||
using UniIntT = boost::random::uniform_int_distribution<int>;
|
||||
|
||||
int ExpectedOpenCVPrecision() const
|
||||
{
|
||||
assert(m_precision == ElementType::tfloat || m_precision == ElementType::tdouble);
|
||||
return m_precision == ElementType::tfloat ? CV_32F : CV_64F;
|
||||
}
|
||||
|
||||
void ConvertToFloatingPointIfRequired(cv::Mat& image)
|
||||
{
|
||||
int depth = ExpectedOpenCVPrecision();
|
||||
if (image.depth() != depth)
|
||||
image.convertTo(image, depth);
|
||||
}
|
||||
|
||||
// The only function that should be redefined by the inherited classes.
|
||||
virtual void Apply(size_t id, cv::Mat &from) = 0;
|
||||
|
||||
protected:
|
||||
StreamDescription m_inputStream;
|
||||
StreamDescription m_outputStream;
|
||||
unsigned int m_seed;
|
||||
int m_imageElementType;
|
||||
conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
|
||||
};
|
||||
|
||||
|
@ -126,12 +144,10 @@ private:
|
|||
};
|
||||
|
||||
// Transpose transformation from HWC to CHW (note: row-major notation).
|
||||
class TransposeTransformer : public Transformer
|
||||
class TransposeTransformer : public TransformBase
|
||||
{
|
||||
public:
|
||||
explicit TransposeTransformer(const ConfigParameters&) {}
|
||||
|
||||
void StartEpoch(const EpochConfiguration&) override {}
|
||||
explicit TransposeTransformer(const ConfigParameters&);
|
||||
|
||||
// Transformation of the stream.
|
||||
StreamDescription Transform(const StreamDescription& inputStream) override;
|
||||
|
@ -140,11 +156,24 @@ public:
|
|||
SequenceDataPtr Transform(SequenceDataPtr sequence) override;
|
||||
|
||||
private:
|
||||
template <class TElement>
|
||||
SequenceDataPtr TypedTransform(SequenceDataPtr inputSequence);
|
||||
// A helper class transposes images using a set of typed memory buffers.
|
||||
template <class TElementTo>
|
||||
struct TypedTranspose
|
||||
{
|
||||
TransposeTransformer* m_parent;
|
||||
|
||||
StreamDescription m_inputStream;
|
||||
StreamDescription m_outputStream;
|
||||
TypedTranspose(TransposeTransformer* parent) : m_parent(parent) {}
|
||||
|
||||
template <class TElementFrom>
|
||||
SequenceDataPtr Apply(ImageSequenceData* inputSequence);
|
||||
conc_stack<std::vector<TElementTo>> m_memBuffers;
|
||||
};
|
||||
|
||||
// Auxiliary buffer to handle images of float type.
|
||||
TypedTranspose<float> m_floatTransform;
|
||||
|
||||
// Auxiliary buffer to handle images of double type.
|
||||
TypedTranspose<double> m_doubleTransform;
|
||||
};
|
||||
|
||||
// Intensity jittering based on PCA transform as described in original AlexNet paper
|
||||
|
@ -198,4 +227,40 @@ private:
|
|||
conc_stack<std::unique_ptr<cv::Mat>> m_hsvTemp;
|
||||
};
|
||||
|
||||
// Cast the input to a particular type.
|
||||
// Images coming from the deserializer/transformers could come in different types,
|
||||
// i.e. as a uchar due to performance reasons. On the other hand, the packer/network
|
||||
// currently only supports float and double. This transform is necessary to do a proper
|
||||
// casting before the sequence data enters the packer.
|
||||
class CastTransformer : public TransformBase
|
||||
{
|
||||
public:
|
||||
explicit CastTransformer(const ConfigParameters&);
|
||||
|
||||
// Transformation of the stream.
|
||||
StreamDescription Transform(const StreamDescription& inputStream) override;
|
||||
|
||||
// Transformation of the sequence.
|
||||
SequenceDataPtr Transform(SequenceDataPtr sequence) override;
|
||||
|
||||
private:
|
||||
|
||||
// A helper class casts images using a set of typed memory buffers.
|
||||
template <class TElementTo>
|
||||
struct TypedCast
|
||||
{
|
||||
CastTransformer* m_parent;
|
||||
|
||||
TypedCast(CastTransformer* parent) : m_parent(parent) {}
|
||||
|
||||
template <class TElementFrom>
|
||||
SequenceDataPtr Apply(SequenceDataPtr inputSequence);
|
||||
conc_stack<std::vector<TElementTo>> m_memBuffers;
|
||||
};
|
||||
|
||||
TypedCast<float> m_floatTransform;
|
||||
TypedCast<double> m_doubleTransform;
|
||||
};
|
||||
|
||||
|
||||
}}}
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
//
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include "Transformer.h"
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
inline bool IdentifyElementTypeFromOpenCVType(int openCvType, ElementType& type)
|
||||
{
|
||||
type = ElementType::tvariant;
|
||||
switch (openCvType)
|
||||
{
|
||||
case CV_64F:
|
||||
type = ElementType::tdouble;
|
||||
return true;
|
||||
case CV_32F:
|
||||
type = ElementType::tfloat;
|
||||
return true;
|
||||
case CV_8U:
|
||||
type = ElementType::tuchar;
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
inline ElementType GetElementTypeFromOpenCVType(int openCvType)
|
||||
{
|
||||
ElementType result;
|
||||
if (!IdentifyElementTypeFromOpenCVType(openCvType, result))
|
||||
RuntimeError("Unsupported OpenCV type '%d'", openCvType);
|
||||
return result;
|
||||
}
|
||||
|
||||
}}}
|
|
@ -131,8 +131,7 @@ cv::Mat ZipByteReader::Read(size_t seqId, const std::string& path, bool grayscal
|
|||
});
|
||||
m_zips.push(std::move(zipFile));
|
||||
|
||||
cv::Mat img;
|
||||
img = cv::imdecode(cv::Mat(1, (int)size, CV_8UC1, contents.data()), grayscale ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
|
||||
cv::Mat img = cv::imdecode(contents, grayscale ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
|
||||
assert(nullptr != img.data);
|
||||
m_workspace.push(std::move(contents));
|
||||
return img;
|
||||
|
|
|
@ -60,7 +60,7 @@ typedef std::shared_ptr<SequenceDescription> SequenceDescriptionPtr;
|
|||
// TODO: add type casts (As<T>() or AsRef<>() or AsPtr<>()) to subclasses as members here.
|
||||
struct SequenceDataBase
|
||||
{
|
||||
SequenceDataBase() : m_id(0), m_numberOfSamples(0), m_data(nullptr) {}
|
||||
SequenceDataBase() : m_id(0), m_numberOfSamples(0), m_elementType(ElementType::tvariant) {}
|
||||
virtual ~SequenceDataBase() = default;
|
||||
|
||||
// Sequence id.
|
||||
|
@ -68,10 +68,11 @@ struct SequenceDataBase
|
|||
uint32_t m_numberOfSamples; // Number of samples in the sequence
|
||||
|
||||
ChunkPtr m_chunk;
|
||||
// A non-owned pointer. The actual size is provided for particular sequences,
|
||||
// i.e. see DenseSequenceData, or SparseSequenceData.
|
||||
void* m_data;
|
||||
// Returns a pointer to the data buffer.
|
||||
// The actual size is provided for particular sequences,i.e. see DenseSequenceData, or SparseSequenceData.
|
||||
virtual const void* GetDataBuffer() = 0;
|
||||
|
||||
ElementType m_elementType; // Sequence element type.
|
||||
TensorShapePtr m_sampleLayout; // Sample layout, can be shared by several sequences.
|
||||
};
|
||||
typedef std::shared_ptr<SequenceDataBase> SequenceDataPtr;
|
||||
|
|
|
@ -5,8 +5,6 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "Reader.h"
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
@ -20,10 +18,9 @@ inline size_t GetSizeByType(ElementType type)
|
|||
return sizeof(float);
|
||||
case ElementType::tdouble:
|
||||
return sizeof(double);
|
||||
case ElementType::tatom:
|
||||
return sizeof(char);
|
||||
default:
|
||||
RuntimeError("Unsupported type '%d'", type);
|
||||
}
|
||||
}
|
||||
} } }
|
||||
|
||||
}}}
|
||||
|
|
|
@ -78,6 +78,13 @@ PackerBase::PackerBase(SequenceEnumeratorPtr sequenceEnumerator,
|
|||
const auto& stream = m_outputStreamDescriptions[i];
|
||||
UNUSED(stream);
|
||||
|
||||
// Check the input.
|
||||
if(m_inputStreamDescriptions[i]->m_elementType != ElementType::tdouble &&
|
||||
m_inputStreamDescriptions[i]->m_elementType != ElementType::tfloat)
|
||||
{
|
||||
RuntimeError("Please specify the type of the '%ls' stream. You can use 'Cast' transform for that.", m_inputStreamDescriptions[i]->m_name.c_str());
|
||||
}
|
||||
|
||||
// Input and output should match in everything except for sparse/dense storage type.
|
||||
assert(stream->m_elementType == ElementType::tfloat || stream->m_elementType == ElementType::tdouble);
|
||||
assert(stream->m_name == m_inputStreamDescriptions[i]->m_name);
|
||||
|
|
|
@ -101,13 +101,14 @@ inline void PackerBase::PackSparseSampleAsDense(char* destination, SparseSequenc
|
|||
// m_indices stores the corresponding indices for each element.
|
||||
// Iterate through non zero elements and copy from m_data them into the
|
||||
// destination at the offset given by the corresponding row index (m_index).
|
||||
const void* buffer = sequence->GetDataBuffer();
|
||||
for (size_t nonZeroIndex = 0; nonZeroIndex < nonZeroCount; ++nonZeroIndex)
|
||||
{
|
||||
auto sourceOffset = sampleOffset + nonZeroIndex;
|
||||
auto elementIndex = sequence->m_indices[sourceOffset];
|
||||
auto destinationOffset = elementIndex * elementSize;
|
||||
assert(destinationOffset < sampleSize);
|
||||
const auto* source = (const char*)(sequence->m_data) + (sourceOffset)* elementSize;
|
||||
const auto* source = (const char*)buffer + (sourceOffset)* elementSize;
|
||||
memcpy(destination + destinationOffset, source, elementSize);
|
||||
}
|
||||
}
|
||||
|
@ -115,7 +116,7 @@ inline void PackerBase::PackSparseSampleAsDense(char* destination, SparseSequenc
|
|||
inline void PackerBase::PackDenseSample(char* destination, SequenceDataPtr sequence, size_t sampleOffset, size_t sampleSize)
|
||||
{
|
||||
// Because the sample is dense - simply copying it to the output.
|
||||
memcpy(destination, (const char*)(sequence->m_data) + sampleOffset, sampleSize);
|
||||
memcpy(destination, (const char*)(sequence->GetDataBuffer()) + sampleOffset, sampleSize);
|
||||
}
|
||||
|
||||
}}}
|
||||
|
|
|
@ -36,9 +36,12 @@ struct EpochConfiguration
|
|||
// Supported primitive element types, will be extended in the future.
|
||||
enum class ElementType
|
||||
{
|
||||
tvariant,// Used by stream definition if deserializer can expose sequences of different type.
|
||||
// Before the sequence enters the network there should be a transform that
|
||||
// cast all sequences from such stream to the same type (i.e. tdouble or tfloat).
|
||||
tfloat, // single precision
|
||||
tdouble, // double precision
|
||||
tatom // sizeof(atom) == 1 constitute of blobs -> sequences of atoms (i.e. used for lattices, hmmm, etc.)
|
||||
tuchar, // unsigned char
|
||||
};
|
||||
|
||||
// Supported storage types, will be extended in the future.
|
||||
|
|
|
@ -47,6 +47,8 @@
|
|||
<ClInclude Include="ChunkRandomizer.h" />
|
||||
<ClInclude Include="ExceptionCapture.h" />
|
||||
<ClInclude Include="ReaderBase.h" />
|
||||
<ClInclude Include="SequenceData.h" />
|
||||
<ClInclude Include="TransformBase.h" />
|
||||
<ClInclude Include="TransformController.h" />
|
||||
<ClInclude Include="DataDeserializerBase.h" />
|
||||
<ClInclude Include="BlockRandomizer.h" />
|
||||
|
|
|
@ -82,6 +82,12 @@
|
|||
<ClInclude Include="ReaderBase.h">
|
||||
<Filter>Utils</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="SequenceData.h">
|
||||
<Filter>Utils</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="TransformBase.h">
|
||||
<Filter>Transformers</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="NoRandomizer.cpp">
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
//
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
//
|
||||
// Contains helper classes for exposing sequence data in deserializers.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "DataDeserializer.h"
|
||||
#include "ConcStack.h"
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// Class represents a sparse sequence for category data.
|
||||
// m_data is a non-owning pointer to some staticlly allocated category.
|
||||
// TOOD: Possibly introduce typed data here.
|
||||
struct CategorySequenceData : SparseSequenceData
|
||||
{
|
||||
const void* GetDataBuffer() override
|
||||
{
|
||||
return m_data;
|
||||
}
|
||||
|
||||
// Non-owning pointer to the static data describing the label.
|
||||
void *m_data;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<CategorySequenceData> CategorySequenceDataPtr;
|
||||
|
||||
// The class represents a sequence that returns the internal data buffer
|
||||
// back to the stack when destroyed.
|
||||
template<class TElemType>
|
||||
struct DenseSequenceWithBuffer : DenseSequenceData
|
||||
{
|
||||
DenseSequenceWithBuffer(conc_stack<std::vector<TElemType>>& memBuffers, size_t numberOfElements) : m_memBuffers(memBuffers)
|
||||
{
|
||||
m_buffer = m_memBuffers.pop_or_create([numberOfElements]() { return vector<TElemType>(numberOfElements); });
|
||||
m_buffer.resize(numberOfElements);
|
||||
}
|
||||
|
||||
const void* GetDataBuffer() override
|
||||
{
|
||||
return m_buffer.data();
|
||||
}
|
||||
|
||||
TElemType* GetBuffer()
|
||||
{
|
||||
return m_buffer.data();
|
||||
}
|
||||
|
||||
~DenseSequenceWithBuffer()
|
||||
{
|
||||
// Giving the memory back.
|
||||
m_memBuffers.push(std::move(m_buffer));
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<TElemType> m_buffer;
|
||||
conc_stack<std::vector<TElemType>>& m_memBuffers;
|
||||
DISABLE_COPY_AND_MOVE(DenseSequenceWithBuffer);
|
||||
};
|
||||
|
||||
} } }
|
|
@ -276,7 +276,7 @@ MBLayoutPtr SequencePacker::PackSparseStream(const StreamBatch& batch, size_t st
|
|||
// compute the sample offset in bytes.
|
||||
size_t sampleOffset = sequenceOffset * elementSize;
|
||||
// copy all nzz values from source sequence into the buffer.
|
||||
const auto* dataSrc = reinterpret_cast<const char*>(sequence->m_data) + sampleOffset;
|
||||
const auto* dataSrc = reinterpret_cast<const char*>(sequence->GetDataBuffer()) + sampleOffset;
|
||||
memcpy(dataDst, dataSrc, nnz * elementSize);
|
||||
dataDst += nnz * elementSize; // advance the destination pointer
|
||||
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
//
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "Transformer.h"
|
||||
#include "Config.h"
|
||||
#include "StringUtil.h"
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// Base class for transforms.
|
||||
class TransformBase : public Transformer
|
||||
{
|
||||
public:
|
||||
explicit TransformBase(const ConfigParameters& config)
|
||||
{
|
||||
m_seed = config(L"seed", 0u);
|
||||
std::wstring precision = config(L"precision", L"float");
|
||||
if (AreEqualIgnoreCase(precision, L"float"))
|
||||
m_precision = ElementType::tfloat;
|
||||
else if (AreEqualIgnoreCase(precision, L"double"))
|
||||
m_precision = ElementType::tdouble;
|
||||
else
|
||||
RuntimeError("Unsupported precision type is specified, '%ls'", precision.c_str());
|
||||
}
|
||||
|
||||
void StartEpoch(const EpochConfiguration&) override {}
|
||||
|
||||
// The method describes how input stream is transformed to the output stream. Called once per applied stream.
|
||||
// Currently we only support transforms of dense streams.
|
||||
StreamDescription Transform(const StreamDescription& inputStream) override
|
||||
{
|
||||
if (inputStream.m_storageType != StorageType::dense)
|
||||
{
|
||||
LogicError("The class currently only supports transforms on dense input streams.");
|
||||
}
|
||||
|
||||
m_inputStream = inputStream;
|
||||
m_outputStream = m_inputStream;
|
||||
return m_outputStream;
|
||||
}
|
||||
|
||||
virtual ~TransformBase() {}
|
||||
|
||||
protected:
|
||||
// Seed getter.
|
||||
unsigned int GetSeed() const
|
||||
{
|
||||
return m_seed;
|
||||
}
|
||||
|
||||
// Input stream.
|
||||
StreamDescription m_inputStream;
|
||||
// Output stream.
|
||||
StreamDescription m_outputStream;
|
||||
// Seed.
|
||||
unsigned int m_seed;
|
||||
// Required precision.
|
||||
ElementType m_precision;
|
||||
};
|
||||
|
||||
}}}
|
|
@ -293,8 +293,8 @@ BOOST_AUTO_TEST_CASE(ImageReaderInvalidEmptyTransforms)
|
|||
[](const std::runtime_error& ex)
|
||||
{
|
||||
return string("Packer currently does not support samples with varying shapes."
|
||||
"Please make sure there is a transform that unifies the shape of samples"
|
||||
" for input stream 'features' or the deserializer provides samples with the same shape.") == ex.what();
|
||||
"Please make sure there is a transform that unifies the shape of samples for input stream 'features' "
|
||||
"or the deserializer provides samples with the same shape.") == ex.what();
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ public:
|
|||
assert(m_chunkBegin <= sequenceId);
|
||||
assert(sequenceId < m_chunkEnd);
|
||||
|
||||
auto data = make_shared<DenseSequenceData>();
|
||||
auto data = make_shared<MockDenseSequenceData>();
|
||||
data->m_data = &m_sequenceData[sequenceId][0];
|
||||
data->m_numberOfSamples = m_sequenceLength;
|
||||
data->m_sampleLayout = m_sampleLayout;
|
||||
|
@ -327,9 +327,9 @@ void BlockRandomizerOneEpochTest(bool prefetch)
|
|||
BOOST_CHECK_EQUAL(sequences.m_data.size(), 1 - (i / data.size()));
|
||||
if (i < data.size())
|
||||
{
|
||||
auto data = reinterpret_cast<DenseSequenceData&>(*sequences.m_data[0][0]);
|
||||
auto& data = reinterpret_cast<DenseSequenceData&>(*sequences.m_data[0][0]);
|
||||
BOOST_CHECK_EQUAL(data.m_numberOfSamples, 1u);
|
||||
actual.push_back(*((float*)data.m_data));
|
||||
actual.push_back(*((float*)data.GetDataBuffer()));
|
||||
}
|
||||
BOOST_CHECK_EQUAL(sequences.m_endOfEpoch, (data.size() <= i));
|
||||
}
|
||||
|
@ -368,9 +368,9 @@ void BlockRandomizerOneEpochWithChunks1Test(bool prefetch)
|
|||
BOOST_CHECK_EQUAL(sequences.m_data.size(), 1 - (i / data.size()));
|
||||
if (i < data.size())
|
||||
{
|
||||
auto data = reinterpret_cast<DenseSequenceData&>(*sequences.m_data[0][0]);
|
||||
auto& data = reinterpret_cast<DenseSequenceData&>(*sequences.m_data[0][0]);
|
||||
BOOST_CHECK_EQUAL(data.m_numberOfSamples, 1u);
|
||||
actual.push_back(*((float*)data.m_data));
|
||||
actual.push_back(*((float*)data.GetDataBuffer()));
|
||||
}
|
||||
BOOST_CHECK_EQUAL(sequences.m_endOfEpoch, (data.size() <= i));
|
||||
}
|
||||
|
@ -413,9 +413,9 @@ void BlockRandomizerOneEpochWithChunks2Test(bool prefetch)
|
|||
BOOST_CHECK_EQUAL(sequences.m_data.size(), 1 - (i / data.size()));
|
||||
if (i < data.size())
|
||||
{
|
||||
auto data = reinterpret_cast<DenseSequenceData&>(*sequences.m_data[0][0]);
|
||||
auto& data = reinterpret_cast<DenseSequenceData&>(*sequences.m_data[0][0]);
|
||||
BOOST_CHECK_EQUAL(data.m_numberOfSamples, 1u);
|
||||
actual.push_back(*((float*)data.m_data));
|
||||
actual.push_back(*((float*)data.GetDataBuffer()));
|
||||
}
|
||||
BOOST_CHECK_EQUAL(sequences.m_endOfEpoch, (data.size() <= i));
|
||||
}
|
||||
|
@ -519,9 +519,9 @@ void BlockRandomizerOneEpochLegacyRandomizationTest(bool prefetch)
|
|||
BOOST_CHECK_EQUAL(sequences.m_data.size(), 1 - (i / data.size()));
|
||||
if (i < 10)
|
||||
{
|
||||
auto data = reinterpret_cast<DenseSequenceData&>(*sequences.m_data[0][0]);
|
||||
auto& data = reinterpret_cast<DenseSequenceData&>(*sequences.m_data[0][0]);
|
||||
BOOST_CHECK_EQUAL(data.m_numberOfSamples, 1u);
|
||||
actual.push_back(*((float*)data.m_data));
|
||||
actual.push_back(*((float*)data.GetDataBuffer()));
|
||||
|
||||
}
|
||||
BOOST_CHECK_EQUAL(sequences.m_endOfEpoch, (data.size() <= i));
|
||||
|
@ -561,9 +561,9 @@ BOOST_AUTO_TEST_CASE(NoRandomizerOneEpoch)
|
|||
BOOST_CHECK_EQUAL(sequences.m_data.size(), 1 - (i / data.size()));
|
||||
if (i < data.size())
|
||||
{
|
||||
auto data = reinterpret_cast<DenseSequenceData&>(*sequences.m_data[0][0]);
|
||||
auto& data = reinterpret_cast<DenseSequenceData&>(*sequences.m_data[0][0]);
|
||||
BOOST_CHECK_EQUAL(data.m_numberOfSamples, 1u);
|
||||
actual.push_back(*((float*)data.m_data));
|
||||
actual.push_back(*((float*)data.GetDataBuffer()));
|
||||
}
|
||||
|
||||
BOOST_CHECK_EQUAL(sequences.m_endOfEpoch, (data.size() <= i));
|
||||
|
|
|
@ -15,6 +15,16 @@
|
|||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {
|
||||
|
||||
struct MockDenseSequenceData : DenseSequenceData
|
||||
{
|
||||
const void* GetDataBuffer() override
|
||||
{
|
||||
return m_data;
|
||||
}
|
||||
|
||||
void* m_data;
|
||||
};
|
||||
|
||||
// A mock deserializer that produces N sequential samples
|
||||
// with value from 0 .. N-1
|
||||
|
||||
|
@ -59,7 +69,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {
|
|||
{
|
||||
const auto& data = m_data[sequenceId];
|
||||
|
||||
auto s = make_shared<DenseSequenceData>();
|
||||
auto s = make_shared<MockDenseSequenceData>();
|
||||
s->m_data = (void*)&data[0];
|
||||
s->m_numberOfSamples = (uint32_t)data.size();
|
||||
s->m_sampleLayout = m_sampleLayout;
|
||||
|
@ -215,7 +225,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {
|
|||
|
||||
for (auto& s : sequences.m_data[0])
|
||||
{
|
||||
float* casted = (float*)s->m_data;
|
||||
float* casted = (float*)s->GetDataBuffer();
|
||||
for (size_t i = 0; i < s->m_numberOfSamples; ++i)
|
||||
{
|
||||
epoch.push_back(casted[i]);
|
||||
|
|
Загрузка…
Ссылка в новой задаче