addressed CR feedback; modifed an e2e test to cover inferInputRankToMap
This commit is contained in:
Родитель
7aa9b83f40
Коммит
bd5f9cb3f4
|
@ -32,13 +32,19 @@
|
|||
# Note: outDim may describe a tensor as well.
|
||||
LinearLayer {outDim, bias = true, init='heNormal', initValueScale=1, inputRank=None, mapRank=None} =
|
||||
{
|
||||
inputShape = if BS.Constants.IsNone (inputRank) then Inferred else Repeat (inputRank, Inferred)
|
||||
# inputRank given: number of zeroes to add to W (mapRank must not be given)
|
||||
# mapRank given: expand W to leave exactly mapRank axes (inputRank must not be given)
|
||||
# none given: expand W to all (same as mapRank=0)
|
||||
inputShape =
|
||||
if BS.Constants.IsNone (inputRank) then Inferred # not given: one Inferred, which will get expanded
|
||||
else if !BS.Constants.IsNone (mapRank) then Fail ("'inputRank' and 'mapRank' cannot be specified at the same time.")
|
||||
else Repeat (inputRank, Inferred)
|
||||
W = ParameterTensor {_ConcatArrays (outDim, inputShape), init=init, initValueScale=initValueScale}
|
||||
b = ParameterTensor {outDim, initValue=0}
|
||||
outputRank = Length (_AsArray (outDim)) # support outputs with tensor layouts
|
||||
inferInputRankToMap =
|
||||
if BS.Constants.IsNone (mapRank) then -1 # means not specified
|
||||
else if !BS.Constants.IsNone (inputRank) Fail ("'inputRank' and 'mapRank' cannot be specified at the same time.")
|
||||
if !BS.Constants.IsNone (inputRank) then -1 # means not specified
|
||||
else if BS.Constants.IsNone (mapRank) then 0 # default to 'use all input dims'
|
||||
else mapRank
|
||||
apply (x) =
|
||||
if bias
|
||||
|
|
|
@ -238,7 +238,7 @@ class TimesNodeBase : public ComputationNode<ElemType>, public NumInputs<2>
|
|||
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembers; using Base::OperationName; \
|
||||
|
||||
public:
|
||||
TimesNodeBase(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = 1)
|
||||
TimesNodeBase(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = -1)
|
||||
: Base(deviceId, name), m_outputRank(outputRank), m_inferInputRankToMap(inferInputRankToMap)
|
||||
{
|
||||
}
|
||||
|
@ -249,8 +249,8 @@ public:
|
|||
if (flags & CopyNodeFlags::copyNodeValue)
|
||||
{
|
||||
auto node = dynamic_pointer_cast<TimesNodeBase<ElemType, m_transpose>>(nodeP);
|
||||
node->m_outputRank = m_outputRank;
|
||||
node->m_inferInputRankToMap = m_inferInputRankToMap;
|
||||
node->m_outputRank = m_outputRank;
|
||||
node->m_inferInputRankToMap = m_inferInputRankToMap;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -271,7 +271,7 @@ public:
|
|||
if (modelVersion >= CNTK_MODEL_VERSION_11)
|
||||
fstream >> m_inferInputRankToMap;
|
||||
else
|
||||
m_inferInputRankToMap = 1;
|
||||
m_inferInputRankToMap = -1;
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -525,7 +525,7 @@ class TimesNode : public TimesNodeBase<ElemType, false>
|
|||
static const std::wstring TypeName() { return L"Times"; }
|
||||
|
||||
public:
|
||||
TimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = 1)
|
||||
TimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = -1)
|
||||
: Base(deviceId, name, outputRank, inferInputRankToMap)
|
||||
{
|
||||
}
|
||||
|
@ -558,7 +558,7 @@ class TransposeTimesNode : public TimesNodeBase<ElemType, true>
|
|||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(TransposeTimesNode);
|
||||
TransposeTimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1)
|
||||
: Base(deviceId, name, outputRank, /*inferInputRankToMap=*/1)
|
||||
: Base(deviceId, name, outputRank, /*inferInputRankToMap=*/-1)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
|
|
@ -90,8 +90,8 @@ speechTrain = [
|
|||
// parameter macros--these carry their own weight matrices
|
||||
B() = BS.Parameters.BiasParam (cellDim)
|
||||
|
||||
W(v) = BS.Parameters.WeightParam (cellDim, inputDim) * Stabilize (v, enabled=enableSelfStabilization) // input-to-hidden
|
||||
H(h) = BS.Parameters.WeightParam (cellDim, outputDim) * Stabilize (h, enabled=enableSelfStabilization) // hidden-to-hidden
|
||||
W(v) = BS.Parameters.WeightParam (cellDim, Inferred/*inputDim*/) * Stabilize (v, enabled=enableSelfStabilization) // input-to-hidden
|
||||
H(h) = BS.Parameters.WeightParam (cellDim, Inferred/*outputDim*/) * Stabilize (h, enabled=enableSelfStabilization) // hidden-to-hidden
|
||||
C(c) = BS.Parameters.DiagWeightParam (cellDim) .* Stabilize (c, enabled=enableSelfStabilization) // cell-to-hiddden (note: applied elementwise)
|
||||
|
||||
// note: the W(x) here are all different, they all come with their own set of weights; same for H(dh), C(dc), and B()
|
||||
|
@ -133,10 +133,11 @@ speechTrain = [
|
|||
else /*BS.RNNs.*/ RecurrentLSTMP (hiddenDim, cellDim=innerCellDim, /*S*/ (LSTMoutput[k-1]), inputDim=hiddenDim, enableSelfStabilization=useSelfStabilization).h
|
||||
|
||||
// and add a softmax layer on top
|
||||
W = BS.Parameters.WeightParam (labelDim, hiddenDim)
|
||||
W = BS.Parameters.WeightParam (labelDim, Inferred)
|
||||
B = BS.Parameters.BiasParam (labelDim)
|
||||
|
||||
z = W * S(LSTMoutput[numLSTMLayers]) + B; // top-level input to Softmax
|
||||
// (unnecessarily using explicit Times with inferInputRankToMap in order to have a test for inferInputRankToMap parameter)
|
||||
z = Times (W, S(LSTMoutput[numLSTMLayers]), inferInputRankToMap=0) + B; // top-level input to Softmax
|
||||
].z
|
||||
|
||||
// features
|
||||
|
|
Загрузка…
Ссылка в новой задаче