changed inferInputRank to inferInputRankToMap, and updated {Linear,Dense}Layer{} to accept the mutually exclusive inputRank and mapRank parameters

This commit is contained in:
Frank Seide 2016-08-23 17:59:31 -07:00
Родитель de864bffb4
Коммит 3c5aa75a1a
3 изменённых файлов: 38 добавлений и 36 удалений

Просмотреть файл

@ -335,7 +335,8 @@ static ConfigValuePtr NodeOp(const ExpressionPtr &e, ConfigValuePtr leftVal, Con
{
let one = MakePrimitiveConfigValuePtr(1.0, leftFailFn, exprPath);
config->Add(L"outputRank", leftFailFn, one);
config->Add(L"inferInputRank", leftFailFn, one);
let minusOne = MakePrimitiveConfigValuePtr(-1.0, leftFailFn, exprPath);
config->Add(L"inferInputRankToMap", leftFailFn, minusOne);
}
// instantiate the ComputationNode
let value = ConfigValuePtr(rtInfo->construct(config), MakeFailFn(e->location), exprPath);

Просмотреть файл

@ -30,19 +30,24 @@
# LinearLayer -- create a fully-connected linear projection layer
# Note: outDim may describe a tensor as well.
LinearLayer {outDim, bias = true, init='heNormal', initValueScale=1, inputRank=0} =
LinearLayer {outDim, bias = true, init='heNormal', initValueScale=1, inputRank=None, mapRank=None} =
{
W = ParameterTensor {_ConcatArrays (outDim, Inferred), init=init, initValueScale=initValueScale}
inputShape = if BS.Constants.IsNone (inputRank) then Inferred else Repeat (inputRank, Inferred)
W = ParameterTensor {_ConcatArrays (outDim, inputShape), init=init, initValueScale=initValueScale}
b = ParameterTensor {outDim, initValue=0}
outputRank = Length (_AsArray (outDim)) # support outputs with tensor layouts
inferInputRankToMap =
if BS.Constants.IsNone (mapRank) then -1 # means not specified
else if !BS.Constants.IsNone (inputRank) Fail ("'inputRank' and 'mapRank' cannot be specified at the same time.")
else mapRank
apply (x) =
if bias
then Times (W, x, outputRank=outputRank, inferInputRank=inputRank) + b
else Times (W, x, outputRank=outputRank, inferInputRank=inputRank)
then Times (W, x, outputRank=outputRank, inferInputRankToMap=inferInputRankToMap) + b
else Times (W, x, outputRank=outputRank, inferInputRankToMap=inferInputRankToMap)
}.apply
# DenseLayer -- create a fully-connected layer with optional non-linearity
DenseLayer{outDim, bias = true, activation=(x=>x), init='heNormal', initValueScale=1, inputRank=0} = Sequential ( LinearLayer{outDim, bias=bias, init=init, initValueScale=initValueScale, inferInputRank=inputRank} : activation )
DenseLayer{outDim, bias = true, activation=(x=>x), init='heNormal', initValueScale=1, inputRank=None, mapRank=None} = Sequential ( LinearLayer{outDim, bias=bias, init=init, initValueScale=initValueScale, inputRank=inputRank, mapRank=mapRank} : activation )
# EmbeddingLayer -- create a linear embedding layer
EmbeddingLayer {outDim, # dimension of embedding
@ -326,7 +331,7 @@ CNTK2 = [
// 4. Tensor operations
// Changes: Matrix -> Tensor. A -> x, B -> y. Data must come on y ("default parameter") hence not using _
Times(x, y, outputRank=1, inferInputRank=1, tag='') = new ComputationNode [ operation = 'Times' ; inputs = ( x : y ) /*plus the function args*/ ]
Times(x, y, outputRank=1, inferInputRankToMap=-1, tag='') = new ComputationNode [ operation = 'Times' ; inputs = ( x : y ) /*plus the function args*/ ]
// 5. Elementwise operations.
// Changes: "Matrix" -> "Tensor"; left input -> _; Clip: move input to front. ElementDivide/Times: anotherTensor -> y

Просмотреть файл

@ -238,8 +238,8 @@ class TimesNodeBase : public ComputationNode<ElemType>, public NumInputs<2>
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembers; using Base::OperationName; \
public:
TimesNodeBase(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRank = 1)
: Base(deviceId, name), m_outputRank(outputRank), m_inferInputRank(inferInputRank)
TimesNodeBase(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = 1)
: Base(deviceId, name), m_outputRank(outputRank), m_inferInputRankToMap(inferInputRankToMap)
{
}
@ -250,7 +250,7 @@ public:
{
auto node = dynamic_pointer_cast<TimesNodeBase<ElemType, m_transpose>>(nodeP);
node->m_outputRank = m_outputRank;
node->m_inferInputRank = m_inferInputRank;
node->m_inferInputRankToMap = m_inferInputRankToMap;
}
}
@ -258,7 +258,7 @@ public:
{
Base::Save(fstream);
fstream << m_outputRank;
fstream << m_inferInputRank;
fstream << m_inferInputRankToMap;
}
virtual void Load(File& fstream, size_t modelVersion) override
@ -269,9 +269,9 @@ public:
else
m_outputRank = 1;
if (modelVersion >= CNTK_MODEL_VERSION_11)
fstream >> m_inferInputRank;
fstream >> m_inferInputRankToMap;
else
m_inferInputRank = 1;
m_inferInputRankToMap = 1;
}
private:
@ -427,28 +427,24 @@ public:
InvalidArgument("%ls %ls operation: The outputRank (%d) dimensions in left argument's shape [%s] must not be 0.", NodeName().c_str(), OperationName().c_str(), (int)m_outputRank, dimsAstring.c_str());
// infer rank of dimsA
// For purpose of dimension inference, Times() accepts an optional parameter inferInputRank (default 1).
// The first 'inferInputRank' axes are considered those that the matrix product should reduce over,
// while the remaining axes are kept (Times() is applied one by one, like a "map" operation).
// Importantly, inferInputRank <= 0 will be interpreted from the end. Hence, inferInputRank=-1 denotes
// that the one last axis will not be reduced over.
// And inferInputRank=0 means to reduce over all input axes, e.g. for an image input that
// For purpose of dimension inference, Times() accepts an optional parameter inferInputRankToMap (default -1=unspecified).
// The last 'inferInputRankToMap' axes are considered those that the matrix product should keep (Times()
// is applied one by one, like a "map" operation) rather than reducing over.
// Specifically, inferInputRankToMap=0 means to reduce over all input axes, e.g. for an image input that
// should be flattened.
// Examples:
// [I x Inferred] * [J x K], inferInputRank=1 --> Inferred := J, result is [I x K]
// [I x Inferred] * [W x H x C], inferInputRank=1 --> Inferred := W, result is [I x H x C] (not desired)
// [I x Inferred] * [W x H x C], inferInputRank=0 --> Inferred := W x H x C, result is [I] (desired)
// [I x Inferred] * [W x H x C x R], inferInputRank=-1 --> Inferred := W x H x C, result is [I x R] (desired)
// In each case,
// * if the output tensor is too short *and* the last dimension is 0, it will be extended
// * output tensor dimensions that are not 0 are not touched
if (dimsA.back() == 0) // if last entry is 0, we infer the tensor rank as well
// [I x Inferred] * [J x K], inferInputRankToMap=n/a --> Inferred := J, result is [I x K]
// [I x Inferred] * [W x H x C], inferInputRankToMap=n/a --> Inferred := W, result is [I x H x C] (not desired)
// [I x Inferred x Inferred] * [W x H x C], inferInputRankToMap=n/a --> Inf x Inf := [W x H], result is [I x C]
// [I x Inferred] * [W x H x C], inferInputRankToMap=0 --> Inferred := W x H x C, result is [I] (desired)
// [I x Inferred] * [W x H x C x R], inferInputRankToMap=1 --> Inferred := W x H x C, result is [I x R] (desired)
// If W's shape is too short, it will be padded with 0 (i.e. inferred in a subsequent step).
if (m_inferInputRankToMap >= 0) // if given, we pad if needed
{
if (abs(m_inferInputRank) > dimsB.size())
InvalidArgument("%ls %ls operation: 'inputDims' argument %d exceeds rank of second operand [%s].", NodeName().c_str(), OperationName().c_str(), m_inferInputRank, dimsBstring.c_str());
size_t inferInputRank = (size_t)(m_inferInputRank > 0 ? m_inferInputRank : (int)dimsB.size() + m_inferInputRank);
if ((size_t)m_inferInputRankToMap >= dimsB.size() && isFinalValidationPass) // at least one axis must be left to reduce over
InvalidArgument("%ls %ls operation: 'inferInputRankToMap' argument %d must be less than rank of second operand [%s].", NodeName().c_str(), OperationName().c_str(), m_inferInputRankToMap, dimsBstring.c_str());
assert(dimsA.size() == m_outputRank + numReductionDims);
while (numReductionDims < inferInputRank)
while (numReductionDims + (size_t)m_inferInputRankToMap < dimsB.size())
{
dimsA.push_back(0);
numReductionDims++;
@ -502,7 +498,7 @@ public:
private:
size_t m_outputRank;
int m_inferInputRank; // can be negative to indicate counting from end
int m_inferInputRankToMap; // -1 (not specified) or says how to expand shape of W, to keep this many mapping dims
};
// -----------------------------------------------------------------------
@ -529,12 +525,12 @@ class TimesNode : public TimesNodeBase<ElemType, false>
static const std::wstring TypeName() { return L"Times"; }
public:
TimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRank = 1)
: Base(deviceId, name, outputRank, inferInputRank)
TimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = 1)
: Base(deviceId, name, outputRank, inferInputRankToMap)
{
}
TimesNode(const ScriptableObjects::IConfigRecordPtr configp)
: TimesNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"outputRank"), configp->Get(L"inferInputRank"))
: TimesNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"outputRank"), configp->Get(L"inferInputRankToMap"))
{
AttachInputsFromConfig(configp, this->GetExpectedNumInputs());
}
@ -562,7 +558,7 @@ class TransposeTimesNode : public TimesNodeBase<ElemType, true>
public:
DeclareConstructorFromConfigWithNumInputs(TransposeTimesNode);
TransposeTimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1)
: Base(deviceId, name, outputRank, /*inferInputRank=*/1)
: Base(deviceId, name, outputRank, /*inferInputRankToMap=*/1)
{
}
};