addressed CR feedback; modifed an e2e test to cover inferInputRankToMap
This commit is contained in:
Родитель
7aa9b83f40
Коммит
bd5f9cb3f4
|
@ -32,13 +32,19 @@
|
||||||
# Note: outDim may describe a tensor as well.
|
# Note: outDim may describe a tensor as well.
|
||||||
LinearLayer {outDim, bias = true, init='heNormal', initValueScale=1, inputRank=None, mapRank=None} =
|
LinearLayer {outDim, bias = true, init='heNormal', initValueScale=1, inputRank=None, mapRank=None} =
|
||||||
{
|
{
|
||||||
inputShape = if BS.Constants.IsNone (inputRank) then Inferred else Repeat (inputRank, Inferred)
|
# inputRank given: number of zeroes to add to W (mapRank must not be given)
|
||||||
|
# mapRank given: expand W to leave exactly mapRank axes (inputRank must not be given)
|
||||||
|
# none given: expand W to all (same as mapRank=0)
|
||||||
|
inputShape =
|
||||||
|
if BS.Constants.IsNone (inputRank) then Inferred # not given: one Inferred, which will get expanded
|
||||||
|
else if !BS.Constants.IsNone (mapRank) then Fail ("'inputRank' and 'mapRank' cannot be specified at the same time.")
|
||||||
|
else Repeat (inputRank, Inferred)
|
||||||
W = ParameterTensor {_ConcatArrays (outDim, inputShape), init=init, initValueScale=initValueScale}
|
W = ParameterTensor {_ConcatArrays (outDim, inputShape), init=init, initValueScale=initValueScale}
|
||||||
b = ParameterTensor {outDim, initValue=0}
|
b = ParameterTensor {outDim, initValue=0}
|
||||||
outputRank = Length (_AsArray (outDim)) # support outputs with tensor layouts
|
outputRank = Length (_AsArray (outDim)) # support outputs with tensor layouts
|
||||||
inferInputRankToMap =
|
inferInputRankToMap =
|
||||||
if BS.Constants.IsNone (mapRank) then -1 # means not specified
|
if !BS.Constants.IsNone (inputRank) then -1 # means not specified
|
||||||
else if !BS.Constants.IsNone (inputRank) Fail ("'inputRank' and 'mapRank' cannot be specified at the same time.")
|
else if BS.Constants.IsNone (mapRank) then 0 # default to 'use all input dims'
|
||||||
else mapRank
|
else mapRank
|
||||||
apply (x) =
|
apply (x) =
|
||||||
if bias
|
if bias
|
||||||
|
|
|
@ -238,7 +238,7 @@ class TimesNodeBase : public ComputationNode<ElemType>, public NumInputs<2>
|
||||||
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembers; using Base::OperationName; \
|
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembers; using Base::OperationName; \
|
||||||
|
|
||||||
public:
|
public:
|
||||||
TimesNodeBase(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = 1)
|
TimesNodeBase(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = -1)
|
||||||
: Base(deviceId, name), m_outputRank(outputRank), m_inferInputRankToMap(inferInputRankToMap)
|
: Base(deviceId, name), m_outputRank(outputRank), m_inferInputRankToMap(inferInputRankToMap)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -249,8 +249,8 @@ public:
|
||||||
if (flags & CopyNodeFlags::copyNodeValue)
|
if (flags & CopyNodeFlags::copyNodeValue)
|
||||||
{
|
{
|
||||||
auto node = dynamic_pointer_cast<TimesNodeBase<ElemType, m_transpose>>(nodeP);
|
auto node = dynamic_pointer_cast<TimesNodeBase<ElemType, m_transpose>>(nodeP);
|
||||||
node->m_outputRank = m_outputRank;
|
node->m_outputRank = m_outputRank;
|
||||||
node->m_inferInputRankToMap = m_inferInputRankToMap;
|
node->m_inferInputRankToMap = m_inferInputRankToMap;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -271,7 +271,7 @@ public:
|
||||||
if (modelVersion >= CNTK_MODEL_VERSION_11)
|
if (modelVersion >= CNTK_MODEL_VERSION_11)
|
||||||
fstream >> m_inferInputRankToMap;
|
fstream >> m_inferInputRankToMap;
|
||||||
else
|
else
|
||||||
m_inferInputRankToMap = 1;
|
m_inferInputRankToMap = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -525,7 +525,7 @@ class TimesNode : public TimesNodeBase<ElemType, false>
|
||||||
static const std::wstring TypeName() { return L"Times"; }
|
static const std::wstring TypeName() { return L"Times"; }
|
||||||
|
|
||||||
public:
|
public:
|
||||||
TimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = 1)
|
TimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = -1)
|
||||||
: Base(deviceId, name, outputRank, inferInputRankToMap)
|
: Base(deviceId, name, outputRank, inferInputRankToMap)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -558,7 +558,7 @@ class TransposeTimesNode : public TimesNodeBase<ElemType, true>
|
||||||
public:
|
public:
|
||||||
DeclareConstructorFromConfigWithNumInputs(TransposeTimesNode);
|
DeclareConstructorFromConfigWithNumInputs(TransposeTimesNode);
|
||||||
TransposeTimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1)
|
TransposeTimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1)
|
||||||
: Base(deviceId, name, outputRank, /*inferInputRankToMap=*/1)
|
: Base(deviceId, name, outputRank, /*inferInputRankToMap=*/-1)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -90,8 +90,8 @@ speechTrain = [
|
||||||
// parameter macros--these carry their own weight matrices
|
// parameter macros--these carry their own weight matrices
|
||||||
B() = BS.Parameters.BiasParam (cellDim)
|
B() = BS.Parameters.BiasParam (cellDim)
|
||||||
|
|
||||||
W(v) = BS.Parameters.WeightParam (cellDim, inputDim) * Stabilize (v, enabled=enableSelfStabilization) // input-to-hidden
|
W(v) = BS.Parameters.WeightParam (cellDim, Inferred/*inputDim*/) * Stabilize (v, enabled=enableSelfStabilization) // input-to-hidden
|
||||||
H(h) = BS.Parameters.WeightParam (cellDim, outputDim) * Stabilize (h, enabled=enableSelfStabilization) // hidden-to-hidden
|
H(h) = BS.Parameters.WeightParam (cellDim, Inferred/*outputDim*/) * Stabilize (h, enabled=enableSelfStabilization) // hidden-to-hidden
|
||||||
C(c) = BS.Parameters.DiagWeightParam (cellDim) .* Stabilize (c, enabled=enableSelfStabilization) // cell-to-hiddden (note: applied elementwise)
|
C(c) = BS.Parameters.DiagWeightParam (cellDim) .* Stabilize (c, enabled=enableSelfStabilization) // cell-to-hiddden (note: applied elementwise)
|
||||||
|
|
||||||
// note: the W(x) here are all different, they all come with their own set of weights; same for H(dh), C(dc), and B()
|
// note: the W(x) here are all different, they all come with their own set of weights; same for H(dh), C(dc), and B()
|
||||||
|
@ -133,10 +133,11 @@ speechTrain = [
|
||||||
else /*BS.RNNs.*/ RecurrentLSTMP (hiddenDim, cellDim=innerCellDim, /*S*/ (LSTMoutput[k-1]), inputDim=hiddenDim, enableSelfStabilization=useSelfStabilization).h
|
else /*BS.RNNs.*/ RecurrentLSTMP (hiddenDim, cellDim=innerCellDim, /*S*/ (LSTMoutput[k-1]), inputDim=hiddenDim, enableSelfStabilization=useSelfStabilization).h
|
||||||
|
|
||||||
// and add a softmax layer on top
|
// and add a softmax layer on top
|
||||||
W = BS.Parameters.WeightParam (labelDim, hiddenDim)
|
W = BS.Parameters.WeightParam (labelDim, Inferred)
|
||||||
B = BS.Parameters.BiasParam (labelDim)
|
B = BS.Parameters.BiasParam (labelDim)
|
||||||
|
|
||||||
z = W * S(LSTMoutput[numLSTMLayers]) + B; // top-level input to Softmax
|
// (unnecessarily using explicit Times with inferInputRankToMap in order to have a test for inferInputRankToMap parameter)
|
||||||
|
z = Times (W, S(LSTMoutput[numLSTMLayers]), inferInputRankToMap=0) + B; // top-level input to Softmax
|
||||||
].z
|
].z
|
||||||
|
|
||||||
// features
|
// features
|
||||||
|
|
Загрузка…
Ссылка в новой задаче