disabled TensorView for PlusNode::BackpropTo(), as that causes a difference for Image/QuickE2E;
GetTensorShape() now adds the column dimension as one more dimension
This commit is contained in:
Родитель
4ce3b1a8eb
Коммит
c87e2f7550
|
@ -37,6 +37,7 @@ using namespace std;
|
|||
wstring computationNodes = // TODO: use actual TypeName() here? would first need to make it a wide string; we should also extract those two methods into the base macro
|
||||
L"LearnableParameter(rows, cols, needGradient = true, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ dims = (rows : cols) ] /*plus the function args*/ ]\n"
|
||||
L"Parameter = LearnableParameter // deprecated \n"
|
||||
L"ParameterTensor(dims, needGradient = true, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
|
||||
// ^^ already works; vv untested
|
||||
L"Input(rows, tag='feature') = new ComputationNode [ operation = 'InputValue' ; shape = new TensorShape [ dims = (rows) ] ; isImage = false /*plus the function args*/ ]\n" // note: naming a little inconsistent // TODO: re-test after flag change
|
||||
L"SparseInput(rows, tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; shape = new TensorShape [ dims = (rows) ] ; isImage = false /*plus the function args*/ ]\n"
|
||||
|
|
|
@ -457,8 +457,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
};
|
||||
static inline ImageLayoutKind ImageLayoutKindFrom(const wstring & s)
|
||||
{
|
||||
if (s == L"CHW") return ImageLayoutKind::CHW;
|
||||
else if (s == L"HWC") return ImageLayoutKind::HWC;
|
||||
if (s == L"CHW" || s == L"cudnn") return ImageLayoutKind::CHW;
|
||||
else if (s == L"HWC" || s == L"legacy") return ImageLayoutKind::HWC;
|
||||
else InvalidArgument("ImageLayoutKindFrom: Unknown ImageLayoutKind '%ls', must be 'CHW' (cudnn) or 'HWC' (CNTK legacy)", s.c_str());
|
||||
}
|
||||
static inline TensorShape ImageLayout(size_t width, size_t height, size_t channels, ImageLayoutKind imageLayoutKind)
|
||||
|
|
|
@ -72,6 +72,7 @@ namespace Microsoft {
|
|||
size_t rows0 = Input(0)->GetNumRows(), cols0 = Input(0)->GetNumCols();
|
||||
size_t rows1 = Input(1)->GetNumRows(), cols1 = Input(1)->GetNumCols();
|
||||
|
||||
#if 1//ndef ENABLE_TENSORVIEW
|
||||
// TODO: This test will go away once we switch to full tensor lib.
|
||||
if (isFinalValidationPass && !(
|
||||
(rows0 == rows1 && (Input(0)->GetMBLayout() == Input(1)->GetMBLayout() || cols0 == cols1)) || // matching size (obvious case)
|
||||
|
@ -81,6 +82,9 @@ namespace Microsoft {
|
|||
{
|
||||
LogicError("The Matrix dimensions in the %ls %ls operation do not match.", NodeName().c_str(), OperationName().c_str());
|
||||
}
|
||||
#else
|
||||
rows0; rows1;
|
||||
#endif
|
||||
|
||||
// result has tensor shape with dimensions being the max over both
|
||||
let shape0 = GetInputSampleLayout(0);
|
||||
|
@ -204,6 +208,8 @@ namespace Microsoft {
|
|||
for (size_t i = 0; i < GetNumInputs(); i++)
|
||||
{
|
||||
size_t rank = Input(i)->GetAndValidateSampleLayout().GetRank();
|
||||
if (!HasMBLayout()) // no MBLayout: last dim is column dimension
|
||||
rank++;
|
||||
if (maxRank < rank)
|
||||
maxRank = rank;
|
||||
}
|
||||
|
@ -215,8 +221,9 @@ namespace Microsoft {
|
|||
TensorShape ComputationNodeBase::GetTensorShape(size_t rank, const FrameRange & fr) const
|
||||
{
|
||||
//GetAndValidateSampleLayout(); // no need to validate because rank comes from DetermineElementwiseTensorRank() which validates all
|
||||
if (!HasMBLayout()) // no MBLayout: just return sample layout (if other participants have layout, tensor lib will broadcast)
|
||||
return GetSampleLayout(); // .Pad(rank); // no need for padding
|
||||
if (!HasMBLayout())
|
||||
return GetSampleLayout().Append(GetSampleLayout().GetRank(), GetNumCols()); // last dim is column dimension
|
||||
// TODO: This is not nice! Instead, of no MBLayout then have sample layout explain whole matrix.
|
||||
else if (fr.IsAllFrames())
|
||||
{
|
||||
// we have an MBLayout, and for refers to the entire MB
|
||||
|
|
|
@ -26,8 +26,8 @@
|
|||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
#define ENABLE_TENSORVIEW // flip this switch once the tensor lib is confirmed to be working
|
||||
#define ENABLE_BROADCASTING_ELEMENTTIMES // if set then ScaleNode and Row/ColumnElementTimes are redirected to ElementTimes
|
||||
#define ENABLE_TENSORVIEW // flip this switch once the tensor lib is confirmed to be working
|
||||
#define ENABLE_BROADCASTING_ELEMENTTIMES // if set then ScaleNode and Row/ColumnElementTimes are redirected to ElementTimes
|
||||
|
||||
#define DEFAULT_HIDDEN_ACTIVATION 0.1
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & fr) override
|
||||
{
|
||||
#ifdef ENABLE_TENSORVIEW
|
||||
#if 0//def ENABLE_TENSORVIEW
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto gradient = GradientTensorFor(rank, fr);
|
||||
auto inputGradient = Input(inputIndex)->GradientTensorFor(rank, fr.AllowBroadcast());
|
||||
|
|
|
@ -28,10 +28,11 @@ train = [
|
|||
convW = Parameter(outMap, inWCount, init="uniform", initValueScale=wScale, initOnCPUOnly=false)
|
||||
conv = Convolution(convW, inp, kW, kH, outMap, hStride, vStride, zeroPadding=false)
|
||||
convB = Parameter(outMap, 1, init="fixedValue", value=bValue)
|
||||
#convB = ParameterTensor((1 : 1 : outMap : 1/*col dim*/), init="fixedValue", value=bValue)
|
||||
convPlusB = Plus(conv, convB);
|
||||
out = RectifiedLinear(convPlusB);
|
||||
]
|
||||
|
||||
|
||||
DNNSigmoidLayer(inDim, outDim, x, parmScale) = [ // Sigmoid non-linearity
|
||||
W = Parameter(outDim, inDim, init="uniform", initValueScale=parmScale, initOnCPUOnly=false)
|
||||
b = Parameter(outDim, 1, init="uniform", initValueScale=parmScale, initOnCPUOnly=false)
|
||||
|
@ -39,7 +40,7 @@ train = [
|
|||
z = Plus(t, b)
|
||||
out = Sigmoid(z)
|
||||
]
|
||||
|
||||
|
||||
DNNLayer(inDim, outDim, x, parmScale) = [ //no non-linearity, as input for SoftMax
|
||||
W = Parameter(outDim, inDim, init="uniform", initValueScale=parmScale, initOnCPUOnly=false)
|
||||
b = Parameter(outDim, 1, init="uniform", initValueScale=parmScale, initOnCPUOnly=false)
|
||||
|
@ -50,8 +51,8 @@ train = [
|
|||
imageW = 28
|
||||
imageH = 28
|
||||
labelDim = 10
|
||||
|
||||
features = ImageInput(imageW, imageH, 1, imageLayout="HWC", tag="feature")
|
||||
|
||||
features = ImageInput(imageW, imageH, 1, imageLayout="legacy", tag="feature")
|
||||
featScale = Constant(0.00390625)
|
||||
featScaled = Scale(featScale, features)
|
||||
labels = Input(labelDim, tag="label")
|
||||
|
@ -94,7 +95,7 @@ train = [
|
|||
# DNNSigmoidLayer and DNNLayer are defined in Macros.ndl
|
||||
h1 = DNNSigmoidLayer(512, h1Dim, pool2, 1).out
|
||||
ol = DNNLayer(h1Dim, labelDim, h1, 1).out
|
||||
|
||||
|
||||
ce = CrossEntropyWithSoftmax(labels, ol, tag="criterion")
|
||||
err = ErrorPrediction(labels, ol, tag="eval")
|
||||
outputNodes = ol
|
||||
|
|
Загрузка…
Ссылка в новой задаче