disabled TensorView for PlusNode::BackpropTo(), as that causes a difference for Image/QuickE2E;

GetTensorShape() now adds the column dimension as one more dimension
This commit is contained in:
Frank Seide 2015-12-30 16:40:49 -08:00
Родитель 4ce3b1a8eb
Коммит c87e2f7550
6 изменённых файлов: 21 добавлений и 12 удалений

Просмотреть файл

@ -37,6 +37,7 @@ using namespace std;
wstring computationNodes = // TODO: use actual TypeName() here? would first need to make it a wide string; we should also extract those two methods into the base macro
L"LearnableParameter(rows, cols, needGradient = true, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ dims = (rows : cols) ] /*plus the function args*/ ]\n"
L"Parameter = LearnableParameter // deprecated \n"
L"ParameterTensor(dims, needGradient = true, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
// ^^ already works; vv untested
L"Input(rows, tag='feature') = new ComputationNode [ operation = 'InputValue' ; shape = new TensorShape [ dims = (rows) ] ; isImage = false /*plus the function args*/ ]\n" // note: naming a little inconsistent // TODO: re-test after flag change
L"SparseInput(rows, tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; shape = new TensorShape [ dims = (rows) ] ; isImage = false /*plus the function args*/ ]\n"

Просмотреть файл

@ -457,8 +457,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
};
static inline ImageLayoutKind ImageLayoutKindFrom(const wstring & s)
{
if (s == L"CHW") return ImageLayoutKind::CHW;
else if (s == L"HWC") return ImageLayoutKind::HWC;
if (s == L"CHW" || s == L"cudnn") return ImageLayoutKind::CHW;
else if (s == L"HWC" || s == L"legacy") return ImageLayoutKind::HWC;
else InvalidArgument("ImageLayoutKindFrom: Unknown ImageLayoutKind '%ls', must be 'CHW' (cudnn) or 'HWC' (CNTK legacy)", s.c_str());
}
static inline TensorShape ImageLayout(size_t width, size_t height, size_t channels, ImageLayoutKind imageLayoutKind)

Просмотреть файл

@ -72,6 +72,7 @@ namespace Microsoft {
size_t rows0 = Input(0)->GetNumRows(), cols0 = Input(0)->GetNumCols();
size_t rows1 = Input(1)->GetNumRows(), cols1 = Input(1)->GetNumCols();
#if 1//ndef ENABLE_TENSORVIEW
// TODO: This test will go away once we switch to full tensor lib.
if (isFinalValidationPass && !(
(rows0 == rows1 && (Input(0)->GetMBLayout() == Input(1)->GetMBLayout() || cols0 == cols1)) || // matching size (obvious case)
@ -81,6 +82,9 @@ namespace Microsoft {
{
LogicError("The Matrix dimensions in the %ls %ls operation do not match.", NodeName().c_str(), OperationName().c_str());
}
#else
rows0; rows1;
#endif
// result has tensor shape with dimensions being the max over both
let shape0 = GetInputSampleLayout(0);
@ -204,6 +208,8 @@ namespace Microsoft {
for (size_t i = 0; i < GetNumInputs(); i++)
{
size_t rank = Input(i)->GetAndValidateSampleLayout().GetRank();
if (!HasMBLayout()) // no MBLayout: last dim is column dimension
rank++;
if (maxRank < rank)
maxRank = rank;
}
@ -215,8 +221,9 @@ namespace Microsoft {
TensorShape ComputationNodeBase::GetTensorShape(size_t rank, const FrameRange & fr) const
{
//GetAndValidateSampleLayout(); // no need to validate because rank comes from DetermineElementwiseTensorRank() which validates all
if (!HasMBLayout()) // no MBLayout: just return sample layout (if other participants have layout, tensor lib will broadcast)
return GetSampleLayout(); // .Pad(rank); // no need for padding
if (!HasMBLayout())
return GetSampleLayout().Append(GetSampleLayout().GetRank(), GetNumCols()); // last dim is column dimension
// TODO: This is not nice! Instead, of no MBLayout then have sample layout explain whole matrix.
else if (fr.IsAllFrames())
{
// we have an MBLayout, and for refers to the entire MB

Просмотреть файл

@ -26,8 +26,8 @@
#include <sstream>
#include <iostream>
#define ENABLE_TENSORVIEW // flip this switch once the tensor lib is confirmed to be working
#define ENABLE_BROADCASTING_ELEMENTTIMES // if set then ScaleNode and Row/ColumnElementTimes are redirected to ElementTimes
#define ENABLE_TENSORVIEW // flip this switch once the tensor lib is confirmed to be working
#define ENABLE_BROADCASTING_ELEMENTTIMES // if set then ScaleNode and Row/ColumnElementTimes are redirected to ElementTimes
#define DEFAULT_HIDDEN_ACTIVATION 0.1

Просмотреть файл

@ -43,7 +43,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & fr) override
{
#ifdef ENABLE_TENSORVIEW
#if 0//def ENABLE_TENSORVIEW
size_t rank = DetermineElementwiseTensorRank();
auto gradient = GradientTensorFor(rank, fr);
auto inputGradient = Input(inputIndex)->GradientTensorFor(rank, fr.AllowBroadcast());

Просмотреть файл

@ -28,10 +28,11 @@ train = [
convW = Parameter(outMap, inWCount, init="uniform", initValueScale=wScale, initOnCPUOnly=false)
conv = Convolution(convW, inp, kW, kH, outMap, hStride, vStride, zeroPadding=false)
convB = Parameter(outMap, 1, init="fixedValue", value=bValue)
#convB = ParameterTensor((1 : 1 : outMap : 1/*col dim*/), init="fixedValue", value=bValue)
convPlusB = Plus(conv, convB);
out = RectifiedLinear(convPlusB);
]
DNNSigmoidLayer(inDim, outDim, x, parmScale) = [ // Sigmoid non-linearity
W = Parameter(outDim, inDim, init="uniform", initValueScale=parmScale, initOnCPUOnly=false)
b = Parameter(outDim, 1, init="uniform", initValueScale=parmScale, initOnCPUOnly=false)
@ -39,7 +40,7 @@ train = [
z = Plus(t, b)
out = Sigmoid(z)
]
DNNLayer(inDim, outDim, x, parmScale) = [ //no non-linearity, as input for SoftMax
W = Parameter(outDim, inDim, init="uniform", initValueScale=parmScale, initOnCPUOnly=false)
b = Parameter(outDim, 1, init="uniform", initValueScale=parmScale, initOnCPUOnly=false)
@ -50,8 +51,8 @@ train = [
imageW = 28
imageH = 28
labelDim = 10
features = ImageInput(imageW, imageH, 1, imageLayout="HWC", tag="feature")
features = ImageInput(imageW, imageH, 1, imageLayout="legacy", tag="feature")
featScale = Constant(0.00390625)
featScaled = Scale(featScale, features)
labels = Input(labelDim, tag="label")
@ -94,7 +95,7 @@ train = [
# DNNSigmoidLayer and DNNLayer are defined in Macros.ndl
h1 = DNNSigmoidLayer(512, h1Dim, pool2, 1).out
ol = DNNLayer(h1Dim, labelDim, h1, 1).out
ce = CrossEntropyWithSoftmax(labels, ol, tag="criterion")
err = ErrorPrediction(labels, ol, tag="eval")
outputNodes = ol