full tensor support for Mean and InvStdDev operations
This commit is contained in:
Родитель
e0be5a1c58
Коммит
359d90ab09
|
@ -47,7 +47,7 @@ public:
|
|||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto result = ValueTensorFor(rank, fr);
|
||||
auto result = ValueTensorFor(rank, fr);
|
||||
auto input0 = Input(0)->ValueTensorFor(rank, fr.AllowBroadcast());
|
||||
auto input1 = Input(1)->ValueTensorFor(rank, fr.AllowBroadcast());
|
||||
result.AssignSumOf(input0, input1);
|
||||
|
@ -56,7 +56,7 @@ public:
|
|||
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
|
||||
{
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto gradient = GradientTensorFor(rank, fr);
|
||||
auto gradient = GradientTensorFor(rank, fr);
|
||||
auto inputGradient = Input(inputIndex)->GradientTensorFor(rank, fr.AllowBroadcast());
|
||||
|
||||
// if reduction then mask the respective input(s) (zero out the gaps)
|
||||
|
@ -77,12 +77,8 @@ template class PlusNode<double>;
|
|||
template <class ElemType>
|
||||
class MinusNode : public BinaryElementWiseNode<ElemType>
|
||||
{
|
||||
typedef BinaryElementWiseNode<ElemType> Base;
|
||||
UsingBinaryElementwiseNodeBaseMembers;
|
||||
static const std::wstring TypeName()
|
||||
{
|
||||
return L"Minus";
|
||||
}
|
||||
typedef BinaryElementWiseNode<ElemType> Base; UsingBinaryElementwiseNodeBaseMembers;
|
||||
static const std::wstring TypeName() { return L"Minus"; }
|
||||
|
||||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(MinusNode);
|
||||
|
@ -95,7 +91,7 @@ public:
|
|||
{
|
||||
ElemType sign = inputIndex == 0 ? 1.0f : -1.0f;
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto gradient = GradientTensorFor(rank, fr);
|
||||
auto gradient = GradientTensorFor(rank, fr);
|
||||
auto inputGradient = Input(inputIndex)->GradientTensorFor(rank, fr.AllowBroadcast());
|
||||
|
||||
// if reduction then mask the respective input(s) (zero out the gaps)
|
||||
|
@ -108,7 +104,7 @@ public:
|
|||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto result = ValueTensorFor(rank, fr);
|
||||
auto result = ValueTensorFor(rank, fr);
|
||||
auto input0 = Input(0)->ValueTensorFor(rank, fr.AllowBroadcast());
|
||||
auto input1 = Input(1)->ValueTensorFor(rank, fr.AllowBroadcast());
|
||||
result.AssignDifferenceOf(input0, input1);
|
||||
|
@ -126,12 +122,8 @@ template class MinusNode<double>;
|
|||
template <class ElemType>
|
||||
class NegateNode : public ComputationNode<ElemType>, public NumInputs<1>
|
||||
{
|
||||
typedef ComputationNode<ElemType> Base;
|
||||
UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName()
|
||||
{
|
||||
return L"Negate";
|
||||
}
|
||||
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName() { return L"Negate"; }
|
||||
|
||||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(NegateNode);
|
||||
|
|
|
@ -138,8 +138,7 @@ public:
|
|||
template <class ElemType>
|
||||
class MeanInvStdDevNodeBase : public PreComputedNodeBase<ElemType>, public NumInputs<1>
|
||||
{
|
||||
typedef PreComputedNodeBase<ElemType> Base;
|
||||
UsingPreComputedNodeMembers;
|
||||
typedef PreComputedNodeBase<ElemType> Base; UsingPreComputedNodeMembers;
|
||||
// static const std::wstring TypeName() { return L"MeanInvStdDev (base)"; }
|
||||
public:
|
||||
// DeclareConstructorFromConfigWithNumInputs(MeanInvStdDevNodeBase);
|
||||
|
@ -219,12 +218,8 @@ protected:
|
|||
template <class ElemType>
|
||||
class MeanNode : public MeanInvStdDevNodeBase<ElemType>
|
||||
{
|
||||
typedef MeanInvStdDevNodeBase<ElemType> Base;
|
||||
UsingMeanInvStdDevNodeBaseNodeMembers;
|
||||
static const std::wstring TypeName()
|
||||
{
|
||||
return L"Mean";
|
||||
}
|
||||
typedef MeanInvStdDevNodeBase<ElemType> Base; UsingMeanInvStdDevNodeBaseNodeMembers;
|
||||
static const std::wstring TypeName() { return L"Mean"; }
|
||||
|
||||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(MeanNode);
|
||||
|
@ -232,11 +227,11 @@ public:
|
|||
: Base(deviceId, name)
|
||||
{
|
||||
}
|
||||
|
||||
MeanNode(DEVICEID_TYPE deviceId, const wstring& name, size_t)
|
||||
: Base(deviceId, name)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void /*PreComputedNodeBase::*/ MarkComputed(const bool hasComputed)
|
||||
{
|
||||
Base::MarkComputed(hasComputed);
|
||||
|
@ -260,19 +255,27 @@ public:
|
|||
// set gaps to zero, since we are reducing in time
|
||||
Input(0)->MaskMissingValueColumnsToZero(fr);
|
||||
|
||||
auto& samples = Input(0)->Value();
|
||||
auto& avg = Value();
|
||||
|
||||
#if NANCHECK
|
||||
samples.HasNan("Mean-Samples");
|
||||
#endif
|
||||
size_t numNewSamples = Input(0)->GetMBLayout()->GetActualNumSamples();
|
||||
size_t totalNumSamples = m_numSamples + numNewSamples;
|
||||
if (totalNumSamples == 0)
|
||||
totalNumSamples = 1; // 0/0=1 in this context
|
||||
Matrix<ElemType>::MultiplyAndWeightedAdd(1.0f / totalNumSamples, samples, false,
|
||||
ElemType alpha = 1.0f / totalNumSamples;
|
||||
ElemType beta = (ElemType)m_numSamples / totalNumSamples;
|
||||
#if 1
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto mean = ValueTensorFor(rank, FrameRange()); // mean is formed directly in our m_value
|
||||
auto input = Input(0)->ValueTensorFor(rank, fr);
|
||||
|
||||
mean.DoCopyOf(beta, input, alpha);
|
||||
// Note: We leverage that TensorView allows "broadcasting" the output,
|
||||
// which really means a reduction.
|
||||
#else
|
||||
auto& samples = Input(0)->Value();
|
||||
auto& avg = Value();
|
||||
Matrix<ElemType>::MultiplyAndWeightedAdd(alpha, samples, false,
|
||||
ConstOnes(Input(0)->Value().GetNumCols(), 1, samples.GetDeviceId()),
|
||||
false, (ElemType) m_numSamples / totalNumSamples, avg);
|
||||
false, beta, avg);
|
||||
#endif
|
||||
#if NANCHECK
|
||||
avg.HasNan("Mean-avg");
|
||||
#endif
|
||||
|
@ -292,12 +295,8 @@ template class MeanNode<double>;
|
|||
template <class ElemType>
|
||||
class InvStdDevNode : public MeanInvStdDevNodeBase<ElemType>
|
||||
{
|
||||
typedef MeanInvStdDevNodeBase<ElemType> Base;
|
||||
UsingMeanInvStdDevNodeBaseNodeMembers;
|
||||
static const std::wstring TypeName()
|
||||
{
|
||||
return L"InvStdDev";
|
||||
}
|
||||
typedef MeanInvStdDevNodeBase<ElemType> Base; UsingMeanInvStdDevNodeBaseNodeMembers;
|
||||
static const std::wstring TypeName() { return L"InvStdDev"; }
|
||||
|
||||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(InvStdDevNode);
|
||||
|
@ -316,31 +315,21 @@ public:
|
|||
if (!m_hasComputed) // initialize
|
||||
{
|
||||
// reset accumulators
|
||||
size_t inputDim = Input(0)->GetSampleMatrixNumRows();
|
||||
m_mean.Resize(inputDim, 1);
|
||||
m_var.Resize(inputDim, 1);
|
||||
m_mean.SetValue(0);
|
||||
m_var.SetValue(0);
|
||||
UpdateFunctionValuesSize();
|
||||
Value().SetValue(0); // also set this because not doing it may flag during debugging; avoids special-casing this
|
||||
Value().SetValue(0); // Note: We must do this here already because dimensions are verified at places.
|
||||
m_mean.Resize(Value()); // mean accumulator normalized by #samples in it
|
||||
m_var .Resize(Value()); // likewise the variance
|
||||
m_temp.Resize(Value()); // and a temp
|
||||
m_mean.SetValue(0); // reset the mean and var accumulators
|
||||
m_var .SetValue(0);
|
||||
}
|
||||
else // finalize
|
||||
{
|
||||
// m_value <- 1/stddev
|
||||
ElemType sqrtFloor = 1e-10f;
|
||||
m_var.InplaceTruncateBottom(sqrtFloor); // prevent too small variance (and negative square roots due to numeric inaccuracy)
|
||||
#if NANCHECK
|
||||
m_var.HasNan("MarkComputed-InplaceTruncateBottom");
|
||||
#endif
|
||||
m_var.InplaceSqrt();
|
||||
|
||||
#if NANCHECK
|
||||
m_var.HasNan("MarkComputed-InplaceSqrt");
|
||||
#endif
|
||||
m_var.ElementInverse();
|
||||
|
||||
#if NANCHECK
|
||||
m_var.HasNan("MarkComputed-ElementInverse()");
|
||||
#endif
|
||||
Value().SetValue(m_var);
|
||||
}
|
||||
}
|
||||
|
@ -357,29 +346,55 @@ public:
|
|||
// set gaps to zero, since we are reducing in time
|
||||
Input(0)->MaskMissingValueColumnsToZero(fr);
|
||||
|
||||
auto& samples = Input(0)->Value();
|
||||
#if NANCHECK
|
||||
samples.HasNan("InvStdDev-Samples");
|
||||
#endif
|
||||
m_temp.SetValue(m_mean);
|
||||
//m_temp.SetValue(m_mean); // old mean
|
||||
size_t numNewSamples = Input(0)->GetMBLayout()->GetActualNumSamples();
|
||||
size_t totalNumSamples = m_numSamples + numNewSamples;
|
||||
if (totalNumSamples == 0)
|
||||
totalNumSamples = 1; // 0/0=1 in this context
|
||||
Matrix<ElemType>::MultiplyAndWeightedAdd(1.0f / totalNumSamples, samples, false,
|
||||
ConstOnes(Input(0)->Value().GetNumCols(), 1, samples.GetDeviceId()),
|
||||
false, (ElemType) m_numSamples / totalNumSamples, m_mean);
|
||||
ElemType alpha = 1.0f / totalNumSamples;
|
||||
ElemType beta = (ElemType)m_numSamples / totalNumSamples;
|
||||
#if 1
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto input = Input(0)->ValueTensorFor( rank, fr);
|
||||
auto mean = DataTensorFor(m_mean, rank, FrameRange());
|
||||
auto temp = DataTensorFor(m_temp, rank, FrameRange());
|
||||
auto var = DataTensorFor(m_var, rank, FrameRange());
|
||||
|
||||
// preserve the old mean value for the next step
|
||||
temp.AssignCopyOf(mean);
|
||||
|
||||
// accumulate the mean
|
||||
mean.DoCopyOf(beta, input, alpha); // Note: This reduces over samples.
|
||||
#else
|
||||
auto& samples = Input(0)->Value();
|
||||
Matrix<ElemType>::MultiplyAndWeightedAdd(alpha, samples, false,
|
||||
ConstOnes(Input(0)->Value().GetNumCols(), 1, samples.GetDeviceId()),
|
||||
false, beta, m_mean);
|
||||
#endif
|
||||
|
||||
// compute the correction term
|
||||
#if 1
|
||||
// var += (oldMean - newMean)^2
|
||||
temp.DoCopyOf(1.0f, mean, -1.0f); // subtract new 'mean' from the old one
|
||||
var .DoSqrOf (1.0f, temp, 1.0f); // add the square
|
||||
|
||||
// var += (input - mean)^2
|
||||
auto& temp2 = temp; // another temp variable, for which we can reuse the first one
|
||||
temp2.AssignDifferenceOf(input, mean); // Note: This also reduces over samples.
|
||||
var.DoSqrOf(beta, temp2, alpha);
|
||||
#else
|
||||
// var += (oldMean - newMean)^2
|
||||
m_temp -= m_mean;
|
||||
m_temp.AssignElementPowerOf(m_temp, 2);
|
||||
m_var += m_temp;
|
||||
m_var += m_temp;
|
||||
|
||||
m_temp.AssignDifferenceOf(samples, m_mean);
|
||||
m_temp.AssignDifferenceOf(Input(0)->Value(), m_mean);
|
||||
m_temp.AssignElementPowerOf(m_temp, 2);
|
||||
|
||||
Matrix<ElemType>::MultiplyAndWeightedAdd(1.0f / totalNumSamples, m_temp, false,
|
||||
Matrix<ElemType>::MultiplyAndWeightedAdd(alpha, m_temp, false,
|
||||
ConstOnes(Input(0)->Value().GetNumCols(), 1, samples.GetDeviceId()),
|
||||
false, (ElemType) m_numSamples / totalNumSamples, m_var);
|
||||
false, beta, m_var);
|
||||
#endif
|
||||
|
||||
#if NANCHECK
|
||||
m_var.HasNan("InvStdDev-m_var");
|
||||
|
@ -415,6 +430,11 @@ template class InvStdDevNode<double>;
|
|||
|
||||
// -----------------------------------------------------------------------
|
||||
// PerDimMeanVarNormalizationNode (feature, mean, invStdDev)
|
||||
// Computes
|
||||
// output = (feature - mean) .* invStdDev
|
||||
// where mean and invStdDev are meant to be single elements while features
|
||||
// is minibatch data.
|
||||
// TODO: Why do we need this? Why not use Plus and ElementTimes?
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -441,34 +461,24 @@ public:
|
|||
|
||||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
#if 1
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto output = ValueTensorFor(rank, fr);
|
||||
auto input = Input(0)->ValueTensorFor(rank, fr);
|
||||
auto mean = Input(1)->ValueTensorFor(rank, fr.AllowBroadcast());
|
||||
auto invStdDev = Input(2)->ValueTensorFor(rank, fr.AllowBroadcast());
|
||||
|
||||
output.AssignDifferenceOf(input, mean); // output = input - mean
|
||||
output.AssignElementwiseProductOf(output, invStdDev); // output *= invStdDev
|
||||
#else
|
||||
// only feature (input0) and output needs to be sliced
|
||||
auto sliceInput0Value = Input(0)->ValueFor(fr);
|
||||
auto sliceOutputValue = ValueFor(fr);
|
||||
auto functionValues = Input(0)->ValueFor(fr);
|
||||
auto input0 = ValueFor(fr);
|
||||
const auto& input1 = Input(1)->Value(); // mean
|
||||
const auto& input2 = Input(2)->Value(); // inv stddev
|
||||
|
||||
ForwardPropS(sliceOutputValue, sliceInput0Value, Input(1)->Value(), Input(2)->Value());
|
||||
}
|
||||
|
||||
/*TODO: merge with call site*/ void ForwardPropS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0,
|
||||
const Matrix<ElemType>& input1, const Matrix<ElemType>& input2)
|
||||
{
|
||||
#if DUMPOUTPUT
|
||||
//input0.Print("PerDimMeanVarNormalization-input0");
|
||||
//input1.Print("PerDimMeanVarNormalization-input1");
|
||||
//input2.Print("PerDimMeanVarNormalization-input2");
|
||||
#endif
|
||||
|
||||
#if NANCHECK
|
||||
input0.HasNan("PerDimMeanVarNormalization-input0");
|
||||
input1.HasNan("PerDimMeanVarNormalization-input1");
|
||||
input2.HasNan("PerDimMeanVarNormalization-input2");
|
||||
#endif
|
||||
functionValues.AssignDifferenceOf(input0, input1);
|
||||
functionValues.ColumnElementMultiplyWith(input2);
|
||||
#if NANCHECK
|
||||
functionValues.HasNan("PerDimMeanVarNormalization");
|
||||
#endif
|
||||
#if DUMPOUTPUT
|
||||
functionValues.Print("PerDimMeanVarNormalizationNode");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -477,31 +487,11 @@ public:
|
|||
Base::Validate(isFinalValidationPass);
|
||||
InferMBLayoutFromInputsForStandardCase();
|
||||
|
||||
if (Input(0)->RequiresPreCompute())
|
||||
{
|
||||
LogicError(
|
||||
"PerDimMeanVarNormalizationNode criterion forbids first input from being a pre-compute node. "
|
||||
"The first input should be the node whose output should be normalized, and the second and third inputs "
|
||||
"should be LearnableParameter type or (Mean, InvStdDev) so that the values will be saved.");
|
||||
}
|
||||
|
||||
if (!(Input(1)->OperationName() == OperationNameOf(LearnableParameter) &&
|
||||
Input(2)->OperationName() == OperationNameOf(LearnableParameter)) &&
|
||||
!(Input(1)->OperationName() == OperationNameOf(MeanNode) &&
|
||||
Input(2)->OperationName() == OperationNameOf(InvStdDevNode)))
|
||||
{
|
||||
LogicError(
|
||||
"PerDimMeanVarNormalizationNode criterion requires the last two inputs to be LearnableParameter "
|
||||
"type or (Mean, InvStdDev) so that the values will be saved.");
|
||||
}
|
||||
|
||||
Input(1)->ValidateInferInputDimsFrom(Input(0)->GetSampleLayout());
|
||||
Input(2)->ValidateInferInputDimsFrom(Input(0)->GetSampleLayout());
|
||||
|
||||
if (isFinalValidationPass)
|
||||
{
|
||||
if (!Input(0)->HasMBLayout() || Input(1)->HasMBLayout() || Input(2)->HasMBLayout())
|
||||
InvalidArgument("PerDimMeanVarNormalizationNode: Inputs must be data, while mean and InvStdDev must be column vectors.");
|
||||
if (!Input(0)->GetSampleLayout().IsElementwiseCompatibleWith(Input(1)->GetSampleLayout()) || !Input(0)->GetSampleLayout().IsElementwiseCompatibleWith(Input(2)->GetSampleLayout()))
|
||||
InvalidArgument("PerDimMeanVarNormalizationNode: All inputs should have same sample layout.");
|
||||
}
|
||||
|
@ -515,17 +505,17 @@ template class PerDimMeanVarNormalizationNode<double>;
|
|||
|
||||
// -----------------------------------------------------------------------
|
||||
// PerDimMeanVarDeNormalizationNode (feature, mean, invStdDev)
|
||||
// Computes
|
||||
// output = feature ./ invStdDev + mean
|
||||
// with parameters the same as PerDimMeanVarNormalizationNode.
|
||||
// TODO: Why do we need this? Why not use Plus and ElementDividedBy?
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
class PerDimMeanVarDeNormalizationNode : public ComputationNode<ElemType>, public NumInputs<3>
|
||||
{
|
||||
typedef ComputationNode<ElemType> Base;
|
||||
UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName()
|
||||
{
|
||||
return L"PerDimMeanVarDeNormalization";
|
||||
}
|
||||
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName() { return L"PerDimMeanVarDeNormalization"; }
|
||||
|
||||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(PerDimMeanVarDeNormalizationNode);
|
||||
|
@ -539,44 +529,28 @@ public:
|
|||
InvalidArgument("PerDimMeanVarDeNormalizationNode should only be called in the evaluation stage. Is any of its descendents a learnable parameter that requires gradient?");
|
||||
}
|
||||
|
||||
// (feature-mean).*InvStdDev
|
||||
// feature ./ invStdDev + mean
|
||||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
#if 1
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto output = ValueTensorFor(rank, fr);
|
||||
auto input = Input(0)->ValueTensorFor(rank, fr);
|
||||
auto mean = Input(1)->ValueTensorFor(rank, fr.AllowBroadcast());
|
||||
auto invStdDev = Input(2)->ValueTensorFor(rank, fr.AllowBroadcast());
|
||||
|
||||
output.AssignElementwiseQuotientOf(input, invStdDev); // output = input / invStdDev
|
||||
output.AssignDifferenceOf(output, mean); // output += mean
|
||||
#else
|
||||
// only feature (input0) and output needs to be sliced
|
||||
auto sliceInput0Value = Input(0)->ValueFor(fr);
|
||||
auto sliceOutputValue = ValueFor(fr);
|
||||
auto functionValues = Input(0)->ValueFor(fr);
|
||||
auto input0 = ValueFor(fr);
|
||||
const auto& input1 = Input(1)->Value(); // mean
|
||||
const auto& input2 = Input(2)->Value(); // inv stddev
|
||||
|
||||
ForwardPropS(sliceOutputValue, sliceInput0Value, Input(1)->Value(), Input(2)->Value());
|
||||
}
|
||||
|
||||
/*TODO: merge with call site*/ void ForwardPropS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0,
|
||||
const Matrix<ElemType>& input1, const Matrix<ElemType>& input2)
|
||||
{
|
||||
#if DUMPOUTPUT
|
||||
//input0.Print("PerDimMeanVarDeNormalization-input0");
|
||||
//input1.Print("PerDimMeanVarDeNormalization-input1");
|
||||
//input2.Print("PerDimMeanVarDeNormalization-input2");
|
||||
#endif
|
||||
|
||||
#if NANCHECK
|
||||
input0.HasNan("PerDimMeanVarDeNormalization-input0");
|
||||
input1.HasNan("PerDimMeanVarDeNormalization-input1");
|
||||
input2.HasNan("PerDimMeanVarDeNormalization-input2");
|
||||
#endif
|
||||
// functionValues.AssignDifferenceOf(input0, input1);
|
||||
// functionValues.ColumnElementMultiplyWith(input2);
|
||||
// functionValues.AssignDifferenceOf(input0, input0);
|
||||
// functionValues += input2;
|
||||
// functionValues.ElementInverse();
|
||||
// functionValues.ElementMultiplyWith(input0);
|
||||
functionValues.SetValue(input0);
|
||||
functionValues.ColumnElementDivideBy(input2);
|
||||
functionValues += input1;
|
||||
#if NANCHECK
|
||||
functionValues.HasNan("PerDimMeanVarDeNormalization");
|
||||
#endif
|
||||
#if DUMPOUTPUT
|
||||
functionValues.Print("PerDimMeanVarDeNormalizationNode");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -585,31 +559,11 @@ public:
|
|||
Base::Validate(isFinalValidationPass);
|
||||
InferMBLayoutFromInputsForStandardCase();
|
||||
|
||||
if (Input(0)->RequiresPreCompute())
|
||||
{
|
||||
LogicError(
|
||||
"PerDimMeanVarDeNormalizationNode criterion forbids first input from being a pre-compute node. "
|
||||
"The first input should be the node whose output should be de-normalized, and the second and third inputs "
|
||||
"should be LearnableParameter type or (Mean, InvStdDev) so that the values will be saved.");
|
||||
}
|
||||
|
||||
if (!(Input(1)->OperationName() == OperationNameOf(LearnableParameter) &&
|
||||
Input(2)->OperationName() == OperationNameOf(LearnableParameter)) &&
|
||||
!(Input(1)->OperationName() == OperationNameOf(MeanNode) &&
|
||||
Input(2)->OperationName() == OperationNameOf(InvStdDevNode)))
|
||||
{
|
||||
LogicError(
|
||||
"PerDimMeanVarDeNormalizationNode criterion requires the last two inputs to be "
|
||||
"LearnableParameter type or (Mean, InvStdDev) so that the values will be saved.");
|
||||
}
|
||||
|
||||
Input(1)->ValidateInferInputDimsFrom(Input(0)->GetSampleLayout());
|
||||
Input(2)->ValidateInferInputDimsFrom(Input(0)->GetSampleLayout());
|
||||
|
||||
if (isFinalValidationPass)
|
||||
{
|
||||
if (!Input(0)->HasMBLayout() || Input(1)->HasMBLayout() || Input(2)->HasMBLayout())
|
||||
InvalidArgument("PerDimMeanVarDeNormalizationNode: Inputs must be data, while mean and InvStdDev must be column vectors.");
|
||||
if (!Input(0)->GetSampleLayout().IsElementwiseCompatibleWith(Input(1)->GetSampleLayout()) || !Input(0)->GetSampleLayout().IsElementwiseCompatibleWith(Input(2)->GetSampleLayout()))
|
||||
InvalidArgument("PerDimMeanVarDeNormalizationNode: All inputs should have same sample layout.");
|
||||
}
|
||||
|
|
|
@ -78,6 +78,7 @@ enum ElementWiseOperator
|
|||
opAbs,
|
||||
opSigmoid,
|
||||
opTanh,
|
||||
opSqr,
|
||||
opSqrt,
|
||||
opExp,
|
||||
opLog,
|
||||
|
@ -129,6 +130,7 @@ enum ElementWiseOperator
|
|||
Macro(Abs); \
|
||||
Macro(Sigmoid); \
|
||||
Macro(Tanh); \
|
||||
Macro(Sqr); \
|
||||
Macro(Sqrt); \
|
||||
Macro(Exp); \
|
||||
Macro(Log); \
|
||||
|
|
|
@ -357,6 +357,8 @@ public:
|
|||
Matrix<ElemType>& InplaceAbs();
|
||||
Matrix<ElemType>& AssignAbsOf(const Matrix<ElemType>& a);
|
||||
|
||||
// TODO: rename these to InPlaceFloor() and -Ceil() (I never know what it means to truncate a bottom)
|
||||
// And also document and implement that sparse matrices can only truncate towards 0.
|
||||
Matrix<ElemType>& InplaceTruncateBottom(const ElemType threshold);
|
||||
Matrix<ElemType>& AssignTruncateBottomOf(const Matrix<ElemType>& a, const ElemType threshold);
|
||||
Matrix<ElemType>& InplaceTruncateTop(const ElemType threshold);
|
||||
|
|
|
@ -96,6 +96,12 @@ DECL ElemType LinearRectifierDerivative(ElemType z)
|
|||
return z > 0 ? (ElemType) 1 : 0;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
DECL ElemType Sqr(ElemType z)
|
||||
{
|
||||
return z * z;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
DECL ElemType Sqrt(ElemType z)
|
||||
{
|
||||
|
@ -143,12 +149,6 @@ DECL ElemType LogAdd(ElemType x, ElemType y)
|
|||
}
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
DECL ElemType Sqr(ElemType z)
|
||||
{
|
||||
return z * z;
|
||||
}
|
||||
|
||||
// IndexElement reindexes a tensor along one dimension.
|
||||
// For the indexed dimension, the tensor op is prepared by setting 'a' to be broadcasting along the indexed dimension.
|
||||
// I.e. pa = &a points to the first element (as if index == 0).
|
||||
|
@ -188,6 +188,7 @@ DefUnaryOp(Not, !a);
|
|||
DefUnaryOp(Abs, fabs_(a));
|
||||
DefUnaryOp(Sigmoid, Sigmoid(a));
|
||||
DefUnaryOp(Tanh, tanh_(a));
|
||||
DefUnaryOp(Sqr, Sqr(a));
|
||||
DefUnaryOp(Sqrt, Sqrt(a));
|
||||
DefUnaryOp(Exp, exp_(a));
|
||||
DefUnaryOp(Log, ClippedLog(a));
|
||||
|
|
|
@ -56,7 +56,7 @@ public:
|
|||
// c.AssignDiffOf(c,a) means c -= a,
|
||||
// and c.AddElementwiseProductOf(a, b, 1) means c += a .* b.
|
||||
// All operators support elementwise in-place operations, i.e. a, b, and c
|
||||
// may all reference the same underlying SOB, with onee exception:
|
||||
// may all reference the same underlying SOB, with one exception:
|
||||
// The output cannot be in-place and inverse-broadcasting at the same time.
|
||||
// E.g. with c=[10] and a=[10 x 20], c.AssignDiffOf(c,a) will fail.
|
||||
// In that case, you can use c.AddCopyOf(a,-1).
|
||||
|
|
|
@ -1747,8 +1747,9 @@ size_t BatchSequenceReader<ElemType>::DetermineSequencesToProcess()
|
|||
// and count tokens
|
||||
numTokens += m_parser.mSentenceIndex2SentenceInfo[seq].sLen;
|
||||
}
|
||||
// if all are already done, we will return sln=0
|
||||
fprintf(stderr, "DetermineSequencesToProcess: %d sequences of len %d, %d tokens\n", (int) mToProcess.size(), (int) sln, (int) numTokens);
|
||||
// if all were already done, we will get here with sln=0 and return that
|
||||
|
||||
//fprintf(stderr, "DetermineSequencesToProcess: %d sequences of len %d, %d tokens\n", (int) mToProcess.size(), (int) sln, (int) numTokens);
|
||||
|
||||
return sln;
|
||||
}
|
||||
|
|
|
@ -54,7 +54,7 @@ COMMAND: currentDirectory=\\storage.ccp.philly.selfhost.corp.microsoft.com\pu
|
|||
|
||||
COMMAND: configFile=$(SolutionDir)Examples/Image/MNIST/Config/01_OneHidden.cntk currentDirectory=$(SolutionDir)Tests/EndToEndTests/Image/Data RunDir=$(SolutionDir)Tests/EndToEndTests/RunDir/Image/MNIST_01_OneHidden DataDir=$(SolutionDir)Tests/EndToEndTests/Image/Data ConfigDir=$(SolutionDir)Examples/Image/MNIST/Config OutputDir=$(SolutionDir)Tests/EndToEndTests/RunDir/Image/MNIST_01_OneHidden DeviceId=0 MNISTtrain=[reader=[file=$(SolutionDir)Tests/EndToEndTests/Image/Data/Train.txt]] MNISTtest=[reader=[file=$(SolutionDir)Tests/EndToEndTests/Image/Data/Test.txt]] MNISTtrain=[SGD=[maxEpochs=1]] MNISTtrain=[SGD=[epochSize=100]] MNISTtrain=[reader=[randomize=none]] imageLayout="cudnn" makeMode=false
|
||||
|
||||
COMMAND: configFile=$(SolutionDir)Examples/Image/MNIST/Config/02_Convolution.cntk currentDirectory=$(SolutionDir)Tests/EndToEndTests/Image/Data RunDir=$(SolutionDir)Tests/EndToEndTests/RunDir/Image/MNIST_02_Convolution DataDir=$(SolutionDir)Tests/EndToEndTests/Image/Data ConfigDir=$(SolutionDir)Examples/Image/MNIST/Config OutputDir=$(SolutionDir)Tests/EndToEndTests/RunDir/Image/MNIST_02_Convolution DeviceId=0 MNISTtrain=[reader=[file=$(SolutionDir)Tests/EndToEndTests/Image/Data/Train.txt]] MNISTtest=[reader=[file=$(SolutionDir)Tests/EndToEndTests/Image/Data/Test.txt]] MNISTtrain=[SGD=[maxEpochs=1]] MNISTtrain=[SGD=[epochSize=100]] MNISTtrain=[reader=[randomize=none]] imageLayout="cudnn" makeMode=false
|
||||
COMMAND: configFile=$(SolutionDir)Examples/Image/MNIST/Config/02_Convolution.cntk currentDirectory=$(SolutionDir)Tests/EndToEndTests/Image/Data RunDir=$(SolutionDir)Tests/EndToEndTests/RunDir/Image/MNIST_02_Convolution DataDir=$(SolutionDir)Tests/EndToEndTests/Image/Data ConfigDir=$(SolutionDir)Examples/Image/MNIST/Config OutputDir=$(SolutionDir)Tests/EndToEndTests/RunDir/Image/MNIST_02_Convolution DeviceId=0 train=[reader=[file=$(SolutionDir)Tests/EndToEndTests/Image/Data/Train.txt]] MNISTtest=[reader=[file=$(SolutionDir)Tests/EndToEndTests/Image/Data/Test.txt]] train=[SGD=[maxEpochs=1]] train=[SGD=[epochSize=100]] train=[reader=[randomize=none]] imageLayout="cudnn" makeMode=false
|
||||
|
||||
TODO out-of-date:
|
||||
COMMAND: currentDirectory=$(SolutionDir)ExampleSetups\Image\MNIST configFile=02_Conv.cntk configName=02_Conv
|
||||
|
|
Загрузка…
Ссылка в новой задаче