cleanedup ConvolutionNode vs. image interpretation of TensorShape;

TensorOp() optimization to use SGEMM disabled for 'double' in Debug builds, so we get our code path tested once in a while;
fixed ConvolutionEngineTests.cpp w.r.t. Create();
removed unused InputIsImage() methods
This commit is contained in:
Frank Seide 2016-01-01 12:25:24 -08:00
Родитель f369a8e94e
Коммит 0c79c928ff
8 изменённых файлов: 146 добавлений и 154 удалений

Просмотреть файл

@ -111,7 +111,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t numImages = parameter.size() > 3 ? ((NDLNode<ElemType>*)params[3])->GetScalar() : 1; // BUGBUG: This comes through MBLayout, and should be forbidden.
ImageLayoutKind imageLayoutKind = ImageLayoutKindFrom(node->GetOptionalParameter("imageLayout", "HWC"));
nodePtr = builder.CreateInputNode(name, ImageLayout(imageWidth, imageHeight, imageChannels, imageLayoutKind), numImages);
nodePtr = builder.CreateInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind), numImages);
}
}
else if (cnNodeType == L"SparseImageInput")
@ -129,7 +129,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t numImages = parameter.size() > 3 ? ((NDLNode<ElemType>*)params[3])->GetScalar() : 1;
ImageLayoutKind imageLayoutKind = ImageLayoutKindFrom(node->GetOptionalParameter("imageLayout", "HWC"));
nodePtr = builder.CreateSparseInputNode(name, ImageLayout(imageWidth, imageHeight, imageChannels, imageLayoutKind), numImages);
nodePtr = builder.CreateSparseInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind), numImages);
}
}
else if (OperationNameOf(LearnableParameter) == cnNodeType)
@ -325,7 +325,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t img_channels = node->GetOptionalParameter("imageChannels", "0");
bool needGradient = node->GetOptionalParameter("needGradient", "false");
nodePtr = builder.Reshape(NULL, num_rows, ImageLayoutWHC(img_width, img_height, img_channels), name); // BUGBUG: use a tensor descriptor instead
nodePtr = builder.Reshape(NULL, num_rows, ImageDimensions::AsTensorShape(img_width, img_height, img_channels, ImageLayoutKind::HWC/*legacy*/), name); // BUGBUG: use a tensor descriptor instead
nodePtr->SetParameterUpdateRequired(needGradient);
}
}

Просмотреть файл

@ -249,12 +249,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
const SmallVector<ptrdiff_t> & GetStrides() const { return m_strides; }
// interpretation as an image tensor
size_t GetNumChannels() const { if (m_dims.empty()) return 0; else return m_dims.size() > 0 ? m_dims[0] : 1; }
size_t GetWidth() const { if (m_dims.empty()) return 0; else return m_dims.size() > 1 ? m_dims[1] : 1; }
size_t GetHeight() const { if (m_dims.empty()) return 0; else return m_dims.size() > 2 ? m_dims[2] : 1; }
// heuristics used for pretty-printing
// TODO: This will go away.
bool IsInputAnImage() const { return GetRank() == 3 && (GetWidth() != 1 || GetNumChannels() != 1); }
//size_t GetNumChannels() const { if (m_dims.empty()) return 0; else return m_dims.size() > 0 ? m_dims[0] : 1; }
//size_t GetWidth() const { if (m_dims.empty()) return 0; else return m_dims.size() > 1 ? m_dims[1] : 1; }
//size_t GetHeight() const { if (m_dims.empty()) return 0; else return m_dims.size() > 2 ? m_dims[2] : 1; }
// legacy helper function for RowSliceNode. Will go away.
bool IsVectorStoredAsImage() const { return GetRank() == 3 && m_dims[0] == 1 && m_dims[1] == 1; }
// indexing
@ -468,19 +466,40 @@ namespace Microsoft { namespace MSR { namespace CNTK {
else if (s == L"HWC" || s == L"legacy") return ImageLayoutKind::HWC;
else InvalidArgument("ImageLayoutKindFrom: Unknown ImageLayoutKind '%ls', must be 'CHW' (cudnn) or 'HWC' (CNTK legacy)", s.c_str());
}
static inline TensorShape ImageLayout(size_t width, size_t height, size_t channels, ImageLayoutKind imageLayoutKind)
{
if (imageLayoutKind == ImageLayoutKind::CHW) return TensorShape(width, height, channels);
else if (imageLayoutKind == ImageLayoutKind::HWC) return TensorShape(channels, width, height);
else LogicError("ImageLayout: Invalid ImageLayoutKind");
}
// When constructing an image tensor with the usual W, H, C format, use the following function instead.
// This will sort the three parameters into the correct order.
// BUGBUG: This only works for ImageLayoutKind::HWC. Also the naming is bad.
static inline TensorShape ImageLayoutWHC(size_t width, size_t height, size_t channels)
// interpret TensorShape as an image descriptor
// considering that we support two ways of storingimages
struct ImageDimensions
{
return TensorShape(channels, width, height);
}
size_t m_width, m_height, m_numChannels;
// interpret TensorShape as image
ImageDimensions(const TensorShape & shape, ImageLayoutKind imageLayoutKind)
{
if (shape.GetRank() != 3)
InvalidArgument("Convolution operation currently only supports 1D or 2D convolution on 3D tensors.");
if (imageLayoutKind == ImageLayoutKind::CHW)
{
m_width = shape[0];
m_height = shape[1];
m_numChannels = shape[2];
}
else if (imageLayoutKind == ImageLayoutKind::HWC)
{
m_width = shape[1];
m_height = shape[2];
m_numChannels = shape[0];
}
else LogicError("WHC: Invalid ImageLayoutKind");
}
ImageDimensions(size_t width, size_t height, size_t numChannels) : m_width(width), m_height(height), m_numChannels(numChannels) {}
// intepret image as TensorShape
static TensorShape AsTensorShape(size_t width, size_t height, size_t numChannels, ImageLayoutKind imageLayoutKind/* = ImageLayoutKind::HWC*/)
{
if (imageLayoutKind == ImageLayoutKind::CHW) return TensorShape(width, height, numChannels);
else if (imageLayoutKind == ImageLayoutKind::HWC) return TensorShape(numChannels, width, height);
else LogicError("ImageLayout: Invalid ImageLayoutKind");
}
TensorShape AsTensorShape(ImageLayoutKind imageLayoutKind) { return AsTensorShape(m_width, m_height, m_numChannels, imageLayoutKind); }
};
}}}

Просмотреть файл

@ -503,9 +503,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
const char * mbSizeMark = child->m_pMBLayout ? "MBSize " : "";
if (child->m_sampleLayout.GetRank() == 3 && (child->m_sampleLayout.GetWidth() != 1 || child->m_sampleLayout.GetNumChannels() != 1)) // looks like an image: use WHC notation
if (child->m_sampleLayout.GetRank() == 3 && (child->m_sampleLayout[1] != 1 || child->m_sampleLayout[0] != 1)) // looks like an image: use WHC notation
fprintf(stderr, "%ls[%lu {W=%lu, H=%lu, C=%lu}, %s%lu]", child->NodeName().c_str(), child->GetNumRows(),
child->m_sampleLayout.GetWidth(), child->m_sampleLayout.GetHeight(), child->m_sampleLayout.GetNumChannels(), mbSizeMark, child->GetNumCols());
child->m_sampleLayout[1], child->m_sampleLayout[2], child->m_sampleLayout[0], mbSizeMark, child->GetNumCols());
//BUGBUG: This ^^ will print based on the old legacy layout, and we have no way of knowing here whether that is correct.
else if (child->m_sampleLayout.GetRank() > 1) // tensor: output the tensor dimensions --TODO: there will be no numRows in the future, only the tensor
fprintf(stderr, "%ls[%lu [%s], %s%lu]", child->NodeName().c_str(), child->GetNumRows(), string(child->m_sampleLayout).c_str(), mbSizeMark, child->GetNumCols());
else
@ -540,14 +541,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return !g_shareNodeValueMatrices || m_outputNeededDuringBackprop;
}
// TODO: Remove this.
// used from:
// - Plus/Minus/ElementTimesNode --> replace by max dim over inputs. Make this standard behavior for all binary element-wise ops.
bool IsInputAnImage(const size_t index) const
{
return m_inputs[index]->m_sampleLayout.IsInputAnImage();
}
const size_t GetNumInputs() const { return m_inputs.size(); }
virtual void SetInput(const size_t childIndex, const ComputationNodeBasePtr& node) = 0;
@ -1528,7 +1521,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#define UsingComputationNodeMembers /*without OperationName; needed to support inconsistent pattern of InputValue--TODO: This comment it out of date. */ \
protected: \
typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr; \
using Base::m_deviceId; using Base::SetDims; using Base::SetDims1; using Base::SetNumCols; using Base::GetNumRows; using Base::GetNumCols; using Base::UpdateFunctionValuesSize; using Base::LoadValue; \
using Base::m_deviceId; using Base::GetDeviceId; using Base::SetDims; using Base::SetDims1; using Base::SetNumCols; using Base::GetNumRows; using Base::GetNumCols; using Base::UpdateFunctionValuesSize; using Base::LoadValue; \
using Base::m_pMBLayout; using Base::GetNumTimeSteps; using Base::GetNumParallelSequences; \
using Base::MaskMissingColumnsToZero; using Base::MaskMissingValueColumnsToZero; using Base::MaskMissingGradientColumnsToZero; using Base::InvalidateMissingValueColumns; using Base::InvalidateMissingGradientColumns; \
using Base::DataFor; using Base::ValueFor; using Base::Gradient; using Base::GradientFor; \
@ -1547,7 +1540,7 @@ protected: \
using Base::DumpNodeInfo; using Base::EnumerateNodes; \
using Base::HasMBLayout; using Base::GetMBLayout; using Base::LinkToMBLayout; \
using Base::Input; using Base::SetInput; \
using Base::IsInputAnImage; using Base::IsEqualTo; using Base::IsOutputOlderThanInputs; using Base::IsLeaf; using Base::SetParameterUpdateRequired; \
using Base::IsEqualTo; using Base::IsOutputOlderThanInputs; using Base::IsLeaf; using Base::SetParameterUpdateRequired; \
using Base::Load; \
using Base::PrintNodeValuesToFile; using Base::PrintSelfBeforeValidation; \
using Base::Save; using Base::UpdateFunctionMBSize; \

Просмотреть файл

@ -70,17 +70,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_zeroPadding(false), m_maxTempMemSizeInSamples(SIZE_MAX),
m_imageLayoutKind(ImageLayoutKind::HWC)
{
SetDims(ImageLayoutWHC(1, 1, 0), 0); // TODO: what is this magic #channels == 0? Can this even be initialized at this time, or only inferred?
SetDims(ImageDimensions::AsTensorShape(1, 1, 0, m_imageLayoutKind), 0);
}
ConvolutionNode(DEVICEID_TYPE deviceId, const wstring & name, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t horizontalSubsample, const size_t verticalSubsample,
const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0, ImageLayoutKind imageLayoutKind = ImageLayoutKind::HWC) :
Base(deviceId, name),
m_outputChannels(outputChannels),
m_kernelWidth(kernelWidth), m_kernelHeight(kernelHeight),
m_horizontalSubsample(horizontalSubsample), m_verticalSubsample(verticalSubsample),
m_zeroPadding(zeroPadding), m_maxTempMemSizeInSamples(maxTempMemSizeInSamples),
m_imageLayoutKind(imageLayoutKind)
{
SetDims(ImageLayoutWHC(1, 1, outputChannels), 0);
SetDims(ImageDimensions::AsTensorShape(1, 1, m_outputChannels, m_imageLayoutKind), 0); // TODO: necessary?
m_factory = ConvolutionEngineFactory<ElemType>::Create(deviceId, ConvolutionEngineFactory<ElemType>::EngineType::Auto, m_imageLayoutKind);
}
ConvolutionNode(const ScriptableObjects::IConfigRecordPtr configp) :
@ -97,7 +98,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base::Save(fstream);
fstream << m_kernelWidth << m_kernelHeight << m_horizontalSubsample << m_verticalSubsample;
uint32_t imageLayoutKind = (uint32_t)m_imageLayoutKind;
uint32_t outputChannels = m_sampleLayout.GetNumChannels();
uint32_t outputChannels = (uint32_t)m_outputChannels;
fstream << imageLayoutKind << outputChannels;
fstream << m_zeroPadding << m_maxTempMemSizeInSamples;
}
@ -109,7 +110,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
uint32_t imageLayoutKind, outputChannels;
fstream >> imageLayoutKind >> outputChannels;
m_imageLayoutKind = (ImageLayoutKind) imageLayoutKind;
SetDims(ImageLayoutWHC(1, 1, outputChannels), 0); // TODO: Save this separately.
m_outputChannels = outputChannels;
SetDims(ImageDimensions::AsTensorShape(1, 1, m_outputChannels, m_imageLayoutKind), 0); // TODO: needed?
fstream >> m_zeroPadding >> m_maxTempMemSizeInSamples;
m_factory = ConvolutionEngineFactory<ElemType>::Create(GetDeviceId(), ConvolutionEngineFactory<ElemType>::EngineType::Auto, m_imageLayoutKind);
}
@ -165,20 +167,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return false;
}
// memory allocation goes here, also tensor reshaping
virtual void UpdateFunctionMBSize() override
{
Base::UpdateFunctionMBSize();
// TODO: change tensor dims here (setN())
}
void ForwardProp(const FrameRange & fr) override
{
const Matrix<ElemType>& input0 = Input(0)->Value();
Matrix<ElemType> sliceInput1Value = Input(1)->ValueFor(fr);
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
// REVIEW alexeyk: setting batch size, can it be done elsewhere in a single place? TODO: Yes, in UpdateFunctionMBSize().
// update the tensor dimension w.r.t. number of samples
size_t batchSize = sliceInput1Value.GetNumCols();
m_inT->setN(batchSize);
m_outT->setN(batchSize);
@ -212,83 +207,60 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base::Validate(isFinalValidationPass);
InferMBLayoutFromInputsForStandardCase();
// get input tensor shape
auto inputSampleLayout = GetInputSampleLayout(1);
auto sampleLayout = m_sampleLayout;
// get input and output tensor shape and interpret as image dimensions
auto inDims = ImageDimensions(GetInputSampleLayout(1), m_imageLayoutKind);
// the GetWidth() etc. functions below assume legacy format, so convert it if needed
// TODO: The right way is to swap this--use cudnn layout, which is our natural format (W,H,C),
// adapt GetWidth() etc. to that, and eliminate all notions of ImageLayoutWHC(), instead
// switch that for a legacy layout, but only for the purpose of this mangling right here.
if (m_imageLayoutKind == ImageLayoutKind::CHW)
{
inputSampleLayout = ImageLayoutWHC(inputSampleLayout[0], inputSampleLayout[1], inputSampleLayout[2]);
sampleLayout = ImageLayoutWHC(sampleLayout[0], sampleLayout[1], sampleLayout[2]);
}
if (inputSampleLayout.GetWidth() < m_kernelWidth || inputSampleLayout.GetHeight() < m_kernelHeight)
InvalidArgument("inputWidth must >= kernelWidth and inputHeight must >= kernelHeight.");
if (inDims.m_width < m_kernelWidth || inDims.m_height < m_kernelHeight)
InvalidArgument("%ls %ls operation requires that input width be >= kernelWidth and input height >= kernelHeight.", NodeName().c_str(), OperationName().c_str());
// determine output tensor shape
// BUGBUG: For cudnn, tensor is not WHC. How can we propagate this?
// TODO: This is the point where we need to know which tensor dimension refers to W,H,C.
// We should enforce rank and then somehow know which is which. But how? It's an option to the reader. Use the same option? Must be serialized...
// WATCH OUT: Number of channels is tucked away in sampleLayout and must be propagated.
TensorShape outputSampleLayout;
if (m_zeroPadding)
{
const int kernelWidthCenter = m_kernelWidth % 2;
const int kernelHeightCenter = m_kernelHeight % 2;
outputSampleLayout = ImageLayoutWHC(
(inputSampleLayout.GetWidth() - kernelWidthCenter) / m_horizontalSubsample + 1,
(inputSampleLayout.GetHeight() - kernelHeightCenter) / m_verticalSubsample + 1,
sampleLayout.GetNumChannels());
}
else
{
outputSampleLayout = ImageLayoutWHC(
(inputSampleLayout.GetWidth() - m_kernelWidth) / m_horizontalSubsample + 1,
(inputSampleLayout.GetHeight() - m_kernelHeight) / m_verticalSubsample + 1,
sampleLayout.GetNumChannels());
}
const int kernelWidthCenter = m_zeroPadding ? m_kernelWidth % 2 : m_kernelWidth;
const int kernelHeightCenter = m_zeroPadding ? m_kernelHeight % 2 : m_kernelHeight;
auto outDims = ImageDimensions(
(inDims.m_width - kernelWidthCenter) / m_horizontalSubsample + 1,
(inDims.m_height - kernelHeightCenter) / m_verticalSubsample + 1,
m_outputChannels);
size_t weightCols = m_kernelWidth * m_kernelHeight * inputSampleLayout.GetNumChannels();
size_t weightCols = m_kernelWidth * m_kernelHeight * inDims.m_numChannels;
// check/infer input [0] (weights)
if (Input(0)->Value().HasNoElements())
ValidateInferInputDims(0, outputSampleLayout.GetNumChannels(), weightCols);
ValidateInferInputDims(0, m_outputChannels, weightCols);
if (isFinalValidationPass && (Input(0)->GetNumCols() != weightCols || Input(0)->GetNumRows() != outputSampleLayout.GetNumChannels()))
LogicError("convolutionWeight matrix %ls should have dimension [%d, %d] which is [outputChannels, kernelWidth * kernelHeight * inputChannels]", Input(0)->NodeName().c_str(), (int)outputSampleLayout.GetNumChannels(), (int)weightCols);
if (isFinalValidationPass && (Input(0)->GetNumCols() != weightCols || Input(0)->GetNumRows() != m_outputChannels))
LogicError("convolutionWeight matrix %ls should have dimension [%d, %d] which is [outputChannels, kernelWidth * kernelHeight * inputChannels]", Input(0)->NodeName().c_str(), (int)m_outputChannels, (int)weightCols);
size_t inputDim = inputSampleLayout.GetWidth() * inputSampleLayout.GetHeight() * inputSampleLayout.GetNumChannels();
// check/infer input [1] (data)
size_t inputDim = inDims.m_width * inDims.m_height * inDims.m_numChannels;
if (Input(1)->GetNumRows() == 0)
ValidateInferInputDims(1, inputDim, Input(1)->GetNumCols());
if (isFinalValidationPass && Input(1)->GetNumRows() != inputDim)
LogicError("Each column of input to the convolution node %ls is a sample and should have dimension %d, which is inputWidth * inputHeight * inputChannels.", NodeName().c_str(), (int)inputDim);
LogicError("Each column of inDims to the convolution node %ls is a sample and should have dimension %d, which is inputWidth * inputHeight * inputChannels.", NodeName().c_str(), (int)inputDim);
// that's our dimension
SetDims(ImageLayout(outputSampleLayout.GetWidth(), outputSampleLayout.GetHeight(), outputSampleLayout.GetNumChannels(), m_imageLayoutKind), Input(1)->GetNumCols());
SetDims(outDims.AsTensorShape(m_imageLayoutKind), Input(1)->GetNumCols());
// set up the various engines and descriptor objects
// REVIEW alexeyk: is there a better place to create engines?
assert(m_factory);
//if (m_factory == nullptr)
// m_factory = ConvolutionEngineFactory<ElemType>::Create(m_deviceId, ConvolutionEngineFactory<ElemType>::EngineType::Auto, m_imageLayoutKind);
// TODO: This seems to expose too much internal knowlegde of the engine to the ConvolutionNode().
// Why not just pass everything to the engine creator, and get one object that holds everything.
if (m_factory == nullptr)
m_factory = ConvolutionEngineFactory<ElemType>::Create(m_deviceId, ConvolutionEngineFactory<ElemType>::EngineType::Auto, m_imageLayoutKind);
if (m_convEng == nullptr)
m_convEng = m_factory->CreateConvEngine(m_deviceId, m_maxTempMemSizeInSamples);
if (m_inT == nullptr)
m_inT = m_factory->CreateTensor(inputSampleLayout.GetWidth(), inputSampleLayout.GetHeight(), inputSampleLayout.GetNumChannels(), 1);
m_inT = m_factory->CreateTensor(inDims.m_width, inDims.m_height, inDims.m_numChannels, 1);
if (m_filterT == nullptr)
m_filterT = m_factory->CreateFilter(m_kernelWidth, m_kernelHeight, inputSampleLayout.GetNumChannels(), sampleLayout.GetNumChannels());
m_filterT = m_factory->CreateFilter(m_kernelWidth, m_kernelHeight, inDims.m_numChannels, m_outputChannels);
if (m_outT == nullptr)
m_outT = m_factory->CreateTensor(sampleLayout.GetWidth(), sampleLayout.GetHeight(), sampleLayout.GetNumChannels(), 1);
m_outT = m_factory->CreateTensor(outDims.m_width, outDims.m_height, outDims.m_numChannels, 1);
if (m_convDesc == nullptr)
m_convDesc = m_factory->CreateConvDescriptor(*m_inT, *m_filterT, m_horizontalSubsample, m_verticalSubsample, m_zeroPadding);
// REVIEW alexeyk: create per-channel (shared) bias. Consider adding other types of biases.
// REVIEW alexeyk: create per-channel bias (shared across all pixels). Consider adding other types of biases.
if (m_biasT == nullptr)
m_biasT = m_factory->CreateTensor(1, 1, sampleLayout.GetNumChannels(), 1);
m_biasT = m_factory->CreateTensor(1, 1, outDims.m_numChannels, 1);
}
void DumpNodeInfo(const bool printValues, File& fstream) const override
@ -298,11 +270,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
auto inputSampleLayout = GetInputSampleLayout(1);
char str[4096];
sprintf(str, "Input[Width:%lu, Height:%lu, Channels:%lu] \n", inputSampleLayout.GetWidth(), inputSampleLayout.GetHeight(), inputSampleLayout.GetNumChannels());
sprintf(str, "Input[Width:%lu, Height:%lu, Channels:%lu] \n", inputSampleLayout[1], inputSampleLayout[2], inputSampleLayout[0]);
fstream << string(str);
sprintf(str, "Kernel[Width:%lu, Height:%lu] SubSample[Horizontal:%lu, Vertical:%lu]\n", m_kernelWidth, m_kernelHeight, m_horizontalSubsample, m_verticalSubsample);
fstream << string(str);
sprintf(str, "Output[Width:%lu, Height:%lu, Channels:%lu] \n", m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_sampleLayout.GetNumChannels());
sprintf(str, "Output[Width:%lu, Height:%lu, Channels:%lu] \n", m_sampleLayout[1], m_sampleLayout[2], m_sampleLayout[0]);
fstream << string(str);
sprintf(str, "ZeroPadding=%ls maxTempMemSizeInSamples=%lu\n", m_zeroPadding? L"true" : L"false", m_maxTempMemSizeInSamples);
fstream << string(str);
@ -328,15 +300,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
private:
std::unique_ptr<ConvolutionEngineFactory<ElemType>> m_factory;
std::unique_ptr<ConvolutionEngine<ElemType>> m_convEng;
std::unique_ptr<ConvolutionTensor4D> m_inT;
std::unique_ptr<ConvolutionFilter> m_filterT;
std::unique_ptr<ConvolutionTensor4D> m_outT;
std::unique_ptr<ConvolutionDescriptor> m_convDesc;
std::unique_ptr<ConvolutionTensor4D> m_biasT;
size_t m_outputChannels;
size_t m_kernelWidth, m_kernelHeight;
size_t m_horizontalSubsample, m_verticalSubsample;
bool m_zeroPadding;
@ -346,6 +310,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t m_maxTempMemSizeInSamples; // can change during runtime
ImageLayoutKind m_imageLayoutKind; // how to interpret the tensor (which dimensions are X/Y and C)
std::unique_ptr<ConvolutionEngineFactory<ElemType>> m_factory;
std::unique_ptr<ConvolutionEngine<ElemType>> m_convEng;
std::unique_ptr<ConvolutionTensor4D> m_inT;
std::unique_ptr<ConvolutionFilter> m_filterT;
std::unique_ptr<ConvolutionTensor4D> m_outT;
std::unique_ptr<ConvolutionDescriptor> m_convDesc;
std::unique_ptr<ConvolutionTensor4D> m_biasT;
};
template class ConvolutionNode<float>;
@ -372,7 +345,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_windowWidth(windowWidth), m_windowHeight(windowHeight),
m_horizontalSubsample(horizontalSubsample), m_verticalSubsample(verticalSubsample)
{
m_factory = ConvolutionEngineFactory<ElemType>::Create(deviceId, ConvolutionEngineFactory<ElemType>::EngineType::Auto);
m_factory = ConvolutionEngineFactory<ElemType>::Create(deviceId, ConvolutionEngineFactory<ElemType>::EngineType::Auto/*..., m_imageLayoutKind*/);
}
PoolingNodeBase(const ScriptableObjects::IConfigRecordPtr configp) :
PoolingNodeBase(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"windowWidth"), configp->Get(L"windowHeight"), configp->Get(L"horizontalSubsample"), configp->Get(L"verticalSubsample"))
@ -445,20 +418,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base::Validate(isFinalValidationPass);
InferMBLayoutFromInputsForStandardCase();
// get input tensor shape
auto inputSampleLayout = GetInputSampleLayout(0);
// get input tensor shape and interpret as image dimensions
const auto m_imageLayoutKind = ImageLayoutKind::HWC; // BUGBUG: Finish this. Must be serialized.
auto inDims = ImageDimensions(GetInputSampleLayout(0), m_imageLayoutKind);
if (inputSampleLayout.GetWidth() < m_windowWidth || inputSampleLayout.GetHeight() < m_windowHeight)
if (inDims.m_width < m_windowWidth || inDims.m_height < m_windowHeight)
InvalidArgument("PoolingNodeBase: inputWidth must >= windowWidth and inputHeight must >= windowHeight.");
// determine output tensor shape
auto outputSampleLayout = ImageLayoutWHC(
(inputSampleLayout.GetWidth() - m_windowWidth) / m_horizontalSubsample + 1,
(inputSampleLayout.GetHeight() - m_windowHeight) / m_verticalSubsample + 1,
inputSampleLayout.GetNumChannels());
auto outDims = ImageDimensions(
(inDims.m_width - m_windowWidth) / m_horizontalSubsample + 1,
(inDims.m_height - m_windowHeight) / m_verticalSubsample + 1,
inDims.m_numChannels);
m_inputSizePerSample = inputSampleLayout.GetWidth() * inputSampleLayout.GetHeight() * inputSampleLayout.GetNumChannels();
//m_outputSizePerSample = outputSampleLayout.GetWidth() * outputSampleLayout.GetHeight() * outputSampleLayout.GetNumChannels();
m_inputSizePerSample = inDims.m_width * inDims.m_height * inDims.m_numChannels;
if (Input(0)->GetNumRows() == 0)
ValidateInferInputDims(0, m_inputSizePerSample, Input(0)->GetNumCols()); // TODO: We should infer a tensor dimension for the input instead.
@ -466,18 +439,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (isFinalValidationPass && Input(0)->GetNumRows() != m_inputSizePerSample) // TODO: Can be removed once tensor shape and numRows are perfectly in sync.
LogicError("each column of input to the MaxPooling node %ls is a sample and should have dimension %d, which is inputWidth * inputHeight * inputChannels", NodeName().c_str(), (int)m_inputSizePerSample);
SetDims(outputSampleLayout, Input(0)->GetNumCols());
SetDims(outDims.AsTensorShape(m_imageLayoutKind), Input(0)->GetNumCols());
// set up various engines and descriptor objects
// REVIEW alexeyk: is there a better place to create engines?
if (m_factory == nullptr)
m_factory = ConvolutionEngineFactory<ElemType>::Create(m_deviceId, ConvolutionEngineFactory<ElemType>::EngineType::Auto);
m_factory = ConvolutionEngineFactory<ElemType>::Create(m_deviceId, ConvolutionEngineFactory<ElemType>::EngineType::Auto, m_imageLayoutKind);
if (m_poolEng == nullptr)
m_poolEng = m_factory->CreatePoolEngine(m_deviceId);
if (m_inT == nullptr)
m_inT = m_factory->CreateTensor(inputSampleLayout.GetWidth(), inputSampleLayout.GetHeight(), inputSampleLayout.GetNumChannels(), 1);
m_inT = m_factory->CreateTensor(inDims.m_width, inDims.m_height, inDims.m_numChannels, 1);
if (m_outT == nullptr)
m_outT = m_factory->CreateTensor(m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_sampleLayout.GetNumChannels(), 1);
m_outT = m_factory->CreateTensor(m_sampleLayout[1], m_sampleLayout[2], m_sampleLayout[0], 1);
}
void DumpNodeInfo(const bool printValues, File& fstream) const override
@ -487,11 +460,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
auto inputSampleLayout = GetInputSampleLayout(0);
char str[4096];
sprintf(str, "Input[Width:%lu, Height:%lu, Channels:%lu] \n", inputSampleLayout.GetWidth(), inputSampleLayout.GetHeight(), inputSampleLayout.GetNumChannels());
sprintf(str, "Input[Width:%lu, Height:%lu, Channels:%lu] \n", inputSampleLayout[1], inputSampleLayout[2], inputSampleLayout[0]);
fstream << string(str);
sprintf(str, "PoolingWindow[Width:%lu, Height:%lu] SubSampling[Horizontal:%lu, Vertical:%lu]\n", m_windowWidth, m_windowHeight, m_horizontalSubsample, m_verticalSubsample);
fstream << string(str);
sprintf(str, "Output[Width:%lu, Height:%lu, Channels:%lu] \n", m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_sampleLayout.GetNumChannels());
sprintf(str, "Output[Width:%lu, Height:%lu, Channels:%lu] \n", m_sampleLayout[1], m_sampleLayout[2], m_sampleLayout[0]);
fstream << string(str);
sprintf(str, "TotalSizePerSample[Input:%lu, Output:%lu] \n", m_inputSizePerSample, m_outputSizePerSample);
fstream << string(str);
@ -740,18 +713,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
SetDims(Input(0));
const auto m_imageLayoutKind = ImageLayoutKind::HWC; // BUGBUG: Finish this. Must be serialized.
auto dims = ImageDimensions(GetSampleLayout(), m_imageLayoutKind);
if (m_factory == nullptr)
m_factory = ConvolutionEngineFactory<ElemType>::Create(m_deviceId, ConvolutionEngineFactory<ElemType>::EngineType::Auto);
m_factory = ConvolutionEngineFactory<ElemType>::Create(m_deviceId, ConvolutionEngineFactory<ElemType>::EngineType::Auto, m_imageLayoutKind);
if (m_convEng == nullptr)
m_convEng = m_factory->CreateConvEngine(m_deviceId, 0);
if (m_inT == nullptr)
m_inT = m_factory->CreateTensor(m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_sampleLayout.GetNumChannels(), 1);
m_inT = m_factory->CreateTensor(dims.m_width, dims.m_height, dims.m_numChannels, 1);
if (m_scaleBiasT == nullptr)
{
if (m_spatial)
m_scaleBiasT = m_factory->CreateTensor(1, 1, m_sampleLayout.GetNumChannels(), 1);
m_scaleBiasT = m_factory->CreateTensor(1, 1, dims.m_numChannels, 1);
else
m_scaleBiasT = m_factory->CreateTensor(m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_sampleLayout.GetNumChannels(), 1);
m_scaleBiasT = m_factory->CreateTensor(dims.m_width, dims.m_height, dims.m_numChannels, 1);
}
}

Просмотреть файл

@ -279,7 +279,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (!isImage)
Init(configp->Get(L"shape"), isSparse);
else
Init(ImageLayout(configp->Get(L"imageWidth"), configp->Get(L"imageHeight"), configp->Get(L"imageChannels"), ImageLayoutKindFrom(configp->Get(L"imageLayout"))), isSparse);
Init(ImageDimensions::AsTensorShape(configp->Get(L"imageWidth"), configp->Get(L"imageHeight"), configp->Get(L"imageChannels"), ImageLayoutKindFrom(configp->Get(L"imageLayout"))), isSparse);
}
public:

Просмотреть файл

@ -170,7 +170,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_targetImageLayout(imageLayout)
{ }
ReshapeNode(const ScriptableObjects::IConfigRecordPtr configp) :
ReshapeNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"numRows"), ImageLayoutWHC(configp->Get(L"imageWidth"), configp->Get(L"imageHeight"), configp->Get(L"imageChannels")))
ReshapeNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"numRows"), ImageDimensions::AsTensorShape(configp->Get(L"imageWidth"), configp->Get(L"imageHeight"), configp->Get(L"imageChannels"), ImageLayoutKind::HWC/*legacy*/))
{
// BUGBUG: We should not operate on image layouts here, but on a proper tensor layout.
AttachInputs(configp, this->GetExpectedNumInputs());
@ -215,7 +215,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
else
fprintf(stderr, "%ls[%lu, %lu]", child->NodeName().c_str(), child->GetNumRows(), child->GetNumCols());
}
fprintf(stderr, ", NumOfRows=%lu, imageWidth=%lu, imageHeight=%lu, imageChannels=%lu)", m_numTargetRows, m_targetImageLayout.GetWidth(), m_targetImageLayout.GetHeight(), m_targetImageLayout.GetNumChannels());
fprintf(stderr, ", NumOfRows=%lu, imageWidth=%lu, imageHeight=%lu, imageChannels=%lu)", m_numTargetRows, m_targetImageLayout[1], m_targetImageLayout[2], m_targetImageLayout[0]);
// BUGBUG: This interpretaion as image dims is only correct for the 'legacy format, not for cudnn.
}
virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
@ -248,7 +249,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// setting any dimension to 0 means lose the tensor, flatten to vector
// TODO: We can use 0 to indicate "infer". One value can be 0. It will be filled in to match row dim.
if (m_targetImageLayout.GetWidth() == 0 || m_targetImageLayout.GetHeight() == 0 || m_targetImageLayout.GetNumChannels() == 0)
if (m_targetImageLayout[1] == 0 || m_targetImageLayout[2] == 0 || m_targetImageLayout[0] == 0)
{
if (Input(0)->HasSampleLayout())
fprintf(stderr, "WARNING: Reshape operation cannot inherit image size information from its child. Image size info is lost.\n");
@ -382,32 +383,32 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// TODO: Say in one sentence what this logic does.
void InferTargetSampleLayout()
{
// BUGBUG: We should not operate on image layouts here, but on a proper tensor layout.
if (m_targetImageLayout.GetWidth() > 0)
// BUGBUG: Below is the result of refactoring and only works for rank-3 tensors. Generalize.
if (m_targetImageLayout[1] > 0)
{
if (m_targetImageLayout.GetHeight() > 0)
if (m_targetImageLayout[2] > 0)
{
if (m_targetImageLayout.GetNumChannels() > 0)
if (m_targetImageLayout[0] > 0)
{
if (m_targetImageLayout.GetNumElements() != m_numTargetRows)
RuntimeError("Image dimensions do not match row size.");
}
else
{
if (m_numTargetRows % (m_targetImageLayout.GetWidth() * m_targetImageLayout.GetHeight()) > 0)
if (m_numTargetRows % (m_targetImageLayout[1] * m_targetImageLayout[2]) > 0)
RuntimeError("Image row size is not a multiple of specified image dimensions.");
else
m_targetImageLayout = ImageLayoutWHC(m_targetImageLayout.GetWidth(), m_targetImageLayout.GetHeight(), m_numTargetRows / (m_targetImageLayout.GetWidth() * m_targetImageLayout.GetHeight()));
m_targetImageLayout = TensorShape(m_numTargetRows / (m_targetImageLayout[1] * m_targetImageLayout[2]), m_targetImageLayout[1], m_targetImageLayout[2]);
}
}
else
{
if (m_targetImageLayout.GetNumChannels() > 0)
if (m_targetImageLayout[0] > 0)
{
if (m_numTargetRows % (m_targetImageLayout.GetWidth() * m_targetImageLayout.GetNumChannels()) > 0)
if (m_numTargetRows % (m_targetImageLayout[1] * m_targetImageLayout[0]) > 0)
RuntimeError("Image row size is not a multiple of specified image dimensions.");
else
m_targetImageLayout = ImageLayoutWHC(m_targetImageLayout.GetWidth(), m_numTargetRows / (m_targetImageLayout.GetWidth() * m_targetImageLayout.GetNumChannels()), m_targetImageLayout.GetNumChannels());
m_targetImageLayout = TensorShape(m_targetImageLayout[0], m_targetImageLayout[1], m_numTargetRows / (m_targetImageLayout[1] * m_targetImageLayout[0]));
}
else
{
@ -417,22 +418,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
else
{
if (m_targetImageLayout.GetHeight() > 0)
if (m_targetImageLayout[2] > 0)
{
if (m_targetImageLayout.GetNumChannels() > 0)
if (m_targetImageLayout[0] > 0)
{
if (m_numTargetRows % (m_targetImageLayout.GetHeight() * m_targetImageLayout.GetNumChannels()) > 0)
if (m_numTargetRows % (m_targetImageLayout[2] * m_targetImageLayout[0]) > 0)
RuntimeError("Image row size is not a multiple of specified image dimensions.");
else
m_targetImageLayout = ImageLayoutWHC(m_numTargetRows / (m_targetImageLayout.GetHeight() * m_targetImageLayout.GetNumChannels()), m_targetImageLayout.GetHeight(), m_targetImageLayout.GetNumChannels());
m_targetImageLayout = TensorShape(m_targetImageLayout[0], m_numTargetRows / (m_targetImageLayout[2] * m_targetImageLayout[0]), m_targetImageLayout[2]);
}
else
RuntimeError("At least two image dimensions must be specified.");
}
else if (m_targetImageLayout.GetNumChannels() > 0)
else if (m_targetImageLayout[0] > 0)
RuntimeError("At least two image dimensions must be specified.");
else
m_targetImageLayout = ImageLayoutWHC(m_numTargetRows, 1, 1);
m_targetImageLayout = TensorShape(1, m_numTargetRows, 1);
}
}
};

Просмотреть файл

@ -4473,6 +4473,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// Note: A minor risk is that with this, our own reduction function will rarely be used.
// That function was tested to give the same results with 'double', and nearly the same with 'float' (different summation order matters).
else if (op == ElementWiseOperator::opCopy && // we are just adding to target without any further operation
#ifdef _DEBUG
sizeof(ElemType) == sizeof(float) && // in debug don't shortcut 'double' so we have some test of our own codepath
#endif
regularOpDims.size() == 1 && regularStrides[0][0] == 1 && regularStrides[1][0] == 1 && // we are processing a column
reducingOpDims.size() == 1 && reducingStrides[0][0] >= (ptrdiff_t)regularOpDims[0]) // reducing across columns and no overlap
{

Просмотреть файл

@ -26,7 +26,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { namespace Test
{
try
{
return ConvFact::Create(0, ConvFact::EngineType::CuDnn) != nullptr;
return ConvFact::Create(0, ConvFact::EngineType::CuDnn, ImageLayoutKind::CHW) != nullptr;
}
catch (std::runtime_error)
{
@ -55,7 +55,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { namespace Test
for (int deviceId : { 0 })
{
auto fact = ConvFact::Create(deviceId);
auto fact = ConvFact::Create(deviceId, ConvFact::EngineType::Auto);
auto tt = typeid(fact).name();
UNUSED(tt);
auto eng = fact->CreateConvEngine(deviceId, 0);
@ -128,7 +128,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { namespace Test
for (int deviceId : { -1, 0 })
{
auto fact = ConvFact::Create(deviceId);
auto fact = ConvFact::Create(deviceId, ConvFact::EngineType::Auto);
auto eng = fact->CreateConvEngine(deviceId, 0);
auto inT = fact->CreateTensor(inW, inH, cmapIn, n);
auto filtT = fact->CreateFilter(kW, kH, cmapIn, cmapOut);
@ -175,7 +175,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { namespace Test
for (int deviceId : { 0 })
{
auto fact = ConvFact::Create(deviceId);
auto fact = ConvFact::Create(deviceId, ConvFact::EngineType::Auto);
auto eng = fact->CreateConvEngine(deviceId, 0);
auto srcGradT = fact->CreateTensor(outW, outH, cmapOut, n);
auto filtT = fact->CreateFilter(kW, kH, cmapIn, cmapOut);
@ -231,7 +231,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { namespace Test
for (int deviceId : { 0 })
{
auto fact = ConvFact::Create(deviceId);
auto fact = ConvFact::Create(deviceId, ConvFact::EngineType::Auto);
auto eng = fact->CreateConvEngine(deviceId, 0);
auto srcGradT = fact->CreateTensor(outW, outH, cmapOut, n);
auto filtT = fact->CreateFilter(kW, kH, cmapIn, cmapOut);
@ -296,7 +296,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { namespace Test
for (int deviceId : { 0 })
{
auto fact = ConvFact::Create(deviceId);
auto fact = ConvFact::Create(deviceId, ConvFact::EngineType::Auto);
auto eng = fact->CreatePoolEngine(deviceId);
auto inT = fact->CreateTensor(inW, inH, cmap, n);
auto outT = fact->CreateTensor(outW, outH, cmap, n);
@ -346,7 +346,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { namespace Test
for (int deviceId : { 0 })
{
auto fact = ConvFact::Create(deviceId);
auto fact = ConvFact::Create(deviceId, ConvFact::EngineType::Auto);
auto eng = fact->CreatePoolEngine(deviceId);
auto inT = fact->CreateTensor(inW, inH, cmap, n);
auto outT = fact->CreateTensor(outW, outH, cmap, n);
@ -406,7 +406,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { namespace Test
for (int deviceId : { 0 })
{
auto fact = ConvFact::Create(deviceId);
auto fact = ConvFact::Create(deviceId, ConvFact::EngineType::Auto);
auto eng = fact->CreatePoolEngine(deviceId);
auto inT = fact->CreateTensor(inW, inH, cmap, n);
auto outT = fact->CreateTensor(outW, outH, cmap, n);
@ -456,7 +456,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { namespace Test
for (int deviceId : { 0 })
{
auto fact = ConvFact::Create(deviceId);
auto fact = ConvFact::Create(deviceId, ConvFact::EngineType::Auto);
auto eng = fact->CreatePoolEngine(deviceId);
auto inT = fact->CreateTensor(inW, inH, cmap, n);
auto outT = fact->CreateTensor(outW, outH, cmap, n);