Added CMA to BN node, updated samples.
This commit is contained in:
Родитель
f76412385d
Коммит
f52e80cf8f
|
@ -34,7 +34,7 @@ Train=[
|
|||
minibatchSize=128
|
||||
learningRatesPerMB=0.1*80:0.01*40:0.001
|
||||
momentumPerMB=0.9
|
||||
maxEpochs=10
|
||||
maxEpochs=120
|
||||
L2RegWeight=0.0001
|
||||
dropoutRate=0
|
||||
|
||||
|
@ -57,7 +57,7 @@ Train=[
|
|||
height=32
|
||||
channels=3
|
||||
cropType=Random
|
||||
cropRatio=1
|
||||
cropRatio=0.8
|
||||
jitterType=UniRatio
|
||||
interpolations=Linear
|
||||
#meanFile=
|
||||
|
|
|
@ -48,14 +48,14 @@ DNN=[
|
|||
rn3_2 = ResNetNode2(rn3_1, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
|
||||
rn3_3 = ResNetNode2(rn3_2, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
|
||||
|
||||
# pool
|
||||
poolW = 3
|
||||
poolH = 3
|
||||
poolhStride = 2
|
||||
poolvStride = 2
|
||||
# Global average pooling
|
||||
poolW = 8
|
||||
poolH = 8
|
||||
poolhStride = 1
|
||||
poolvStride = 1
|
||||
pool = AveragePooling(rn3_3, poolW, poolH, poolhStride, poolvStride, imageLayout = "cudnn")
|
||||
|
||||
ol = DnnLastLayer(576, labelDim, pool, fc1WScale, fc1BValue)
|
||||
ol = DnnLastLayer(cMap3, labelDim, pool, fc1WScale, fc1BValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
|
|
|
@ -11,7 +11,8 @@ ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue,
|
|||
{
|
||||
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
|
||||
#sc = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
|
||||
sc = Parameter(outMap, 1, init = fixedValue, value = 1)
|
||||
m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
|
@ -24,7 +25,8 @@ ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scScale)
|
|||
{
|
||||
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
|
||||
#sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
|
||||
sc1 = Parameter(outMap, 1, init = fixedValue, value = 1)
|
||||
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
|
@ -34,7 +36,8 @@ ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scScale)
|
|||
|
||||
W2 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
|
||||
#sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
|
||||
sc2 = Parameter(outMap, 1, init = fixedValue, value = 1)
|
||||
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
|
@ -48,7 +51,8 @@ ResNetNode2Inc(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scScale, W
|
|||
{
|
||||
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
|
||||
#sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
|
||||
sc1 = Parameter(outMap, 1, init = fixedValue, value = 1)
|
||||
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
|
@ -58,7 +62,8 @@ ResNetNode2Inc(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scScale, W
|
|||
|
||||
W2 = Parameter(outMap, wCount, init = Gaussian, initValueScale = wScale)
|
||||
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
|
||||
#sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
|
||||
sc2 = Parameter(outMap, 1, init = fixedValue, value = 1)
|
||||
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
|
|
|
@ -452,8 +452,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
bool eval = node->GetOptionalParameter("eval", "false");
|
||||
bool spatial = node->GetOptionalParameter("spatial", "false");
|
||||
double expAvgFactor = node->GetOptionalParameter("expAvgFactor", "1.0");
|
||||
ImageLayoutKind imageLayoutKind = ImageLayoutKindFrom(node->GetOptionalParameter("imageLayout", "CHW"));
|
||||
|
||||
nodePtr = builder.BatchNormalization(nullptr, nullptr, nullptr, nullptr, nullptr, eval, spatial, expAvgFactor, name);
|
||||
nodePtr = builder.BatchNormalization(nullptr, nullptr, nullptr, nullptr, nullptr, eval, spatial, expAvgFactor, imageLayoutKind, name);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
|
@ -610,9 +610,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
template<class ElemType> shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::BatchNormalization(const ComputationNodePtr input,
|
||||
const ComputationNodePtr scale, const ComputationNodePtr bias, const ComputationNodePtr runMean, const ComputationNodePtr runInvStdDev,
|
||||
bool eval, bool spatial, double expAvgFactor, const std::wstring nodeName)
|
||||
bool eval, bool spatial, double expAvgFactor, ImageLayoutKind imageLayoutKind, const std::wstring nodeName)
|
||||
{
|
||||
return net.AddNodeToNetAndAttachInputs(New<BatchNormalizationNode<ElemType>>(net.GetDeviceId(), nodeName, eval, spatial, expAvgFactor),
|
||||
return net.AddNodeToNetAndAttachInputs(New<BatchNormalizationNode<ElemType>>(net.GetDeviceId(), nodeName, eval, spatial, expAvgFactor, imageLayoutKind),
|
||||
input, scale, bias, runMean, runInvStdDev);
|
||||
}
|
||||
|
||||
|
|
|
@ -132,7 +132,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ComputationNodePtr TimeReverse(const ComputationNodePtr input, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr LookupTable(const ComputationNodePtr dictionary, const ComputationNodePtr input, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr BatchNormalization(const ComputationNodePtr input, const ComputationNodePtr scale, const ComputationNodePtr bias,
|
||||
const ComputationNodePtr runMean, const ComputationNodePtr runInvStdDev, bool eval = false, bool spatial = false, double expAvgFactor = 1, const std::wstring nodeName = L"");
|
||||
const ComputationNodePtr runMean, const ComputationNodePtr runInvStdDev, bool eval = false, bool spatial = false, double expAvgFactor = 1, ImageLayoutKind imageLayoutKind = ImageLayoutKind::CHW, const std::wstring nodeName = L"");
|
||||
};
|
||||
|
||||
// create a new from config
|
||||
|
|
|
@ -591,15 +591,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
static const std::wstring TypeName() { return L"BatchNormalization"; }
|
||||
public:
|
||||
BatchNormalizationNode(DEVICEID_TYPE deviceId, const wstring & name) :
|
||||
Base(deviceId, name), m_eval(false), m_spatial(false), m_expAvgFactor(0)
|
||||
Base(deviceId, name), m_eval(false), m_spatial(false), m_expAvgFactor(0), m_sampleCount(0), m_imageLayoutKind(ImageLayoutKind::CHW)
|
||||
{
|
||||
}
|
||||
BatchNormalizationNode(DEVICEID_TYPE deviceId, const wstring & name, bool eval, bool spatial, double expAvgFactor) :
|
||||
Base(deviceId, name), m_eval(eval), m_spatial(spatial), m_expAvgFactor(expAvgFactor)
|
||||
BatchNormalizationNode(DEVICEID_TYPE deviceId, const wstring & name, bool eval, bool spatial, double expAvgFactor, ImageLayoutKind imageLayoutKind) :
|
||||
Base(deviceId, name), m_eval(eval), m_spatial(spatial), m_expAvgFactor(expAvgFactor), m_imageLayoutKind(imageLayoutKind), m_sampleCount(0)
|
||||
{
|
||||
}
|
||||
BatchNormalizationNode(const ScriptableObjects::IConfigRecordPtr configp) :
|
||||
BatchNormalizationNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"eval"), configp->Get(L"spatial"), configp->Get(L"expAvgFactor"))
|
||||
BatchNormalizationNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"eval"), configp->Get(L"spatial"), configp->Get(L"expAvgFactor"),
|
||||
ImageLayoutKindFrom(configp->Get(L"imageLayout")))
|
||||
{
|
||||
AttachInputs(configp, this->GetExpectedNumInputs());
|
||||
}
|
||||
|
@ -612,6 +613,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
fstream << m_eval;
|
||||
fstream << m_spatial;
|
||||
fstream << m_expAvgFactor;
|
||||
fstream << (int32_t)m_imageLayoutKind;
|
||||
fstream << m_sampleCount;
|
||||
}
|
||||
|
||||
void Load(File& fstream, size_t modelVersion) override
|
||||
|
@ -635,6 +638,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
fstream >> m_eval;
|
||||
fstream >> m_spatial;
|
||||
fstream >> m_expAvgFactor;
|
||||
if (verWritten >= 0x00010002)
|
||||
{
|
||||
fstream >> m_imageLayoutKind;
|
||||
fstream >> m_sampleCount;
|
||||
}
|
||||
}
|
||||
|
||||
void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
|
||||
|
@ -733,8 +741,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
if (isFinalValidationPass)
|
||||
{
|
||||
const auto m_imageLayoutKind = ImageLayoutKind::CHW; // BUGBUG: Finish this. Must be serialized.
|
||||
|
||||
auto shape = GetSampleLayout();
|
||||
|
||||
if (m_factory == nullptr)
|
||||
|
@ -794,8 +800,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
private:
|
||||
struct VersionInfo
|
||||
{
|
||||
int32_t VerWrittenCur() const { return 0x00010001; } // Initial
|
||||
int32_t VerReadableCur() const { return 0x00010001; }
|
||||
//int32_t VerWrittenCur() const { return 0x00010001; } // Initial
|
||||
int32_t VerWrittenCur() const { return 0x00010002; } // Added m_imageLayoutKind and m_sampleCount
|
||||
int32_t VerReadableCur() const { return 0x00010002; }
|
||||
int32_t VerWeCanReadBack() const { return 0x00010001; }
|
||||
};
|
||||
VersionInfo m_version;
|
||||
|
@ -808,6 +815,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
bool m_spatial;
|
||||
// Smoothing factor.
|
||||
double m_expAvgFactor;
|
||||
// Layout (e.g. CHW).
|
||||
ImageLayoutKind m_imageLayoutKind;
|
||||
// Sample count, used to compute cumulative moving average.
|
||||
size_t m_sampleCount;
|
||||
|
||||
// Stores pre-computed on forward pass mean values that are used in gradient computation.
|
||||
shared_ptr<Matrix<ElemType>> m_saveMean;
|
||||
// Stores pre-computed on forward pass InvStdDev values that are used in gradient computation.
|
||||
|
|
Загрузка…
Ссылка в новой задаче