Added CMA to BN node, updated samples.

This commit is contained in:
Alexey Kamenev 2016-01-08 11:39:52 -08:00
Родитель f76412385d
Коммит f52e80cf8f
7 изменённых файлов: 43 добавлений и 25 удалений

Просмотреть файл

@ -34,7 +34,7 @@ Train=[
minibatchSize=128
learningRatesPerMB=0.1*80:0.01*40:0.001
momentumPerMB=0.9
maxEpochs=10
maxEpochs=120
L2RegWeight=0.0001
dropoutRate=0
@ -57,7 +57,7 @@ Train=[
height=32
channels=3
cropType=Random
cropRatio=1
cropRatio=0.8
jitterType=UniRatio
interpolations=Linear
#meanFile=

Просмотреть файл

@ -48,14 +48,14 @@ DNN=[
rn3_2 = ResNetNode2(rn3_1, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
rn3_3 = ResNetNode2(rn3_2, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
# pool
poolW = 3
poolH = 3
poolhStride = 2
poolvStride = 2
# Global average pooling
poolW = 8
poolH = 8
poolhStride = 1
poolvStride = 1
pool = AveragePooling(rn3_3, poolW, poolH, poolhStride, poolvStride, imageLayout = "cudnn")
ol = DnnLastLayer(576, labelDim, pool, fc1WScale, fc1BValue)
ol = DnnLastLayer(cMap3, labelDim, pool, fc1WScale, fc1BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)

Просмотреть файл

@ -11,7 +11,8 @@ ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue,
{
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
#sc = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
sc = Parameter(outMap, 1, init = fixedValue, value = 1)
m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
@ -24,7 +25,8 @@ ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scScale)
{
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
#sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
sc1 = Parameter(outMap, 1, init = fixedValue, value = 1)
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
@ -34,7 +36,8 @@ ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scScale)
W2 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
#sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
sc2 = Parameter(outMap, 1, init = fixedValue, value = 1)
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
@ -48,7 +51,8 @@ ResNetNode2Inc(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scScale, W
{
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
#sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
sc1 = Parameter(outMap, 1, init = fixedValue, value = 1)
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
@ -58,7 +62,8 @@ ResNetNode2Inc(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scScale, W
W2 = Parameter(outMap, wCount, init = Gaussian, initValueScale = wScale)
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
#sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
sc2 = Parameter(outMap, 1, init = fixedValue, value = 1)
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)

Просмотреть файл

@ -452,8 +452,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
bool eval = node->GetOptionalParameter("eval", "false");
bool spatial = node->GetOptionalParameter("spatial", "false");
double expAvgFactor = node->GetOptionalParameter("expAvgFactor", "1.0");
ImageLayoutKind imageLayoutKind = ImageLayoutKindFrom(node->GetOptionalParameter("imageLayout", "CHW"));
nodePtr = builder.BatchNormalization(nullptr, nullptr, nullptr, nullptr, nullptr, eval, spatial, expAvgFactor, name);
nodePtr = builder.BatchNormalization(nullptr, nullptr, nullptr, nullptr, nullptr, eval, spatial, expAvgFactor, imageLayoutKind, name);
}
}
else

Просмотреть файл

@ -610,9 +610,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType> shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::BatchNormalization(const ComputationNodePtr input,
const ComputationNodePtr scale, const ComputationNodePtr bias, const ComputationNodePtr runMean, const ComputationNodePtr runInvStdDev,
bool eval, bool spatial, double expAvgFactor, const std::wstring nodeName)
bool eval, bool spatial, double expAvgFactor, ImageLayoutKind imageLayoutKind, const std::wstring nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<BatchNormalizationNode<ElemType>>(net.GetDeviceId(), nodeName, eval, spatial, expAvgFactor),
return net.AddNodeToNetAndAttachInputs(New<BatchNormalizationNode<ElemType>>(net.GetDeviceId(), nodeName, eval, spatial, expAvgFactor, imageLayoutKind),
input, scale, bias, runMean, runInvStdDev);
}

Просмотреть файл

@ -132,7 +132,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ComputationNodePtr TimeReverse(const ComputationNodePtr input, const std::wstring nodeName = L"");
ComputationNodePtr LookupTable(const ComputationNodePtr dictionary, const ComputationNodePtr input, const std::wstring nodeName = L"");
ComputationNodePtr BatchNormalization(const ComputationNodePtr input, const ComputationNodePtr scale, const ComputationNodePtr bias,
const ComputationNodePtr runMean, const ComputationNodePtr runInvStdDev, bool eval = false, bool spatial = false, double expAvgFactor = 1, const std::wstring nodeName = L"");
const ComputationNodePtr runMean, const ComputationNodePtr runInvStdDev, bool eval = false, bool spatial = false, double expAvgFactor = 1, ImageLayoutKind imageLayoutKind = ImageLayoutKind::CHW, const std::wstring nodeName = L"");
};
// create a new from config

Просмотреть файл

@ -591,15 +591,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
static const std::wstring TypeName() { return L"BatchNormalization"; }
public:
BatchNormalizationNode(DEVICEID_TYPE deviceId, const wstring & name) :
Base(deviceId, name), m_eval(false), m_spatial(false), m_expAvgFactor(0)
Base(deviceId, name), m_eval(false), m_spatial(false), m_expAvgFactor(0), m_sampleCount(0), m_imageLayoutKind(ImageLayoutKind::CHW)
{
}
BatchNormalizationNode(DEVICEID_TYPE deviceId, const wstring & name, bool eval, bool spatial, double expAvgFactor) :
Base(deviceId, name), m_eval(eval), m_spatial(spatial), m_expAvgFactor(expAvgFactor)
BatchNormalizationNode(DEVICEID_TYPE deviceId, const wstring & name, bool eval, bool spatial, double expAvgFactor, ImageLayoutKind imageLayoutKind) :
Base(deviceId, name), m_eval(eval), m_spatial(spatial), m_expAvgFactor(expAvgFactor), m_imageLayoutKind(imageLayoutKind), m_sampleCount(0)
{
}
BatchNormalizationNode(const ScriptableObjects::IConfigRecordPtr configp) :
BatchNormalizationNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"eval"), configp->Get(L"spatial"), configp->Get(L"expAvgFactor"))
BatchNormalizationNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"eval"), configp->Get(L"spatial"), configp->Get(L"expAvgFactor"),
ImageLayoutKindFrom(configp->Get(L"imageLayout")))
{
AttachInputs(configp, this->GetExpectedNumInputs());
}
@ -612,6 +613,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fstream << m_eval;
fstream << m_spatial;
fstream << m_expAvgFactor;
fstream << (int32_t)m_imageLayoutKind;
fstream << m_sampleCount;
}
void Load(File& fstream, size_t modelVersion) override
@ -635,6 +638,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fstream >> m_eval;
fstream >> m_spatial;
fstream >> m_expAvgFactor;
if (verWritten >= 0x00010002)
{
fstream >> m_imageLayoutKind;
fstream >> m_sampleCount;
}
}
void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -733,8 +741,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (isFinalValidationPass)
{
const auto m_imageLayoutKind = ImageLayoutKind::CHW; // BUGBUG: Finish this. Must be serialized.
auto shape = GetSampleLayout();
if (m_factory == nullptr)
@ -794,8 +800,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
private:
struct VersionInfo
{
int32_t VerWrittenCur() const { return 0x00010001; } // Initial
int32_t VerReadableCur() const { return 0x00010001; }
//int32_t VerWrittenCur() const { return 0x00010001; } // Initial
int32_t VerWrittenCur() const { return 0x00010002; } // Added m_imageLayoutKind and m_sampleCount
int32_t VerReadableCur() const { return 0x00010002; }
int32_t VerWeCanReadBack() const { return 0x00010001; }
};
VersionInfo m_version;
@ -808,6 +815,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
bool m_spatial;
// Smoothing factor.
double m_expAvgFactor;
// Layout (e.g. CHW).
ImageLayoutKind m_imageLayoutKind;
// Sample count, used to compute cumulative moving average.
size_t m_sampleCount;
// Stores pre-computed on forward pass mean values that are used in gradient computation.
shared_ptr<Matrix<ElemType>> m_saveMean;
// Stores pre-computed on forward pass InvStdDev values that are used in gradient computation.