From d01c6ed712ba8285ed2b9b4c922e03ffc9fb2929 Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Tue, 29 Apr 2014 00:21:15 -0700 Subject: [PATCH] fwd/back math docs for neuron layers --- include/caffe/neuron_layers.hpp | 55 ++++++++++++++++++++++++++---- src/caffe/layers/dropout_layer.cpp | 2 ++ 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/include/caffe/neuron_layers.hpp b/include/caffe/neuron_layers.hpp index 68ebc2bc..e5108416 100644 --- a/include/caffe/neuron_layers.hpp +++ b/include/caffe/neuron_layers.hpp @@ -22,7 +22,10 @@ namespace caffe { -// NeuronLayer is an interface for layers that work on single elements. +/* NeuronLayer + An interface for layers that take one blob as input (x), + and produce one blob as output (y). +*/ template class NeuronLayer : public Layer { public: @@ -32,7 +35,13 @@ class NeuronLayer : public Layer { vector*>* top); }; -// BNLLLayer +/* BNLLLayer + + y = x + log(1 + exp(-x)) if x > 0 + y = log(1 + exp(x)) if x <= 0 + + y' = exp(x) / (exp(x) + 1) +*/ template class BNLLLayer : public NeuronLayer { public: @@ -50,7 +59,16 @@ class BNLLLayer : public NeuronLayer { const bool propagate_down, vector*>* bottom); }; -// DropoutLayer sets random portion of its input to 0. +/* DropoutLayer + During training only, sets some portion of x to 0, adjusting the + vector magnitude accordingly. + + mask = bernoulli(1 - threshold) + scale = 1 / (1 - threshold) + y = x * mask * scale + + y' = mask * scale +*/ template class DropoutLayer : public NeuronLayer { public: @@ -75,7 +93,12 @@ class DropoutLayer : public NeuronLayer { unsigned int uint_thres_; }; -// PowerLayer computes y = (shift + scale * x)^power +/* PowerLayer + y = (shift + scale * x) ^ power + + y' = scale * power * (shift + scale * x) ^ (power - 1) + = scale * power * y / (shift + scale * x) +*/ template class PowerLayer : public NeuronLayer { public: @@ -100,7 +123,13 @@ class PowerLayer : public NeuronLayer { Dtype diff_scale_; }; -// ReLULayer computes y = max(0, x). +/* ReLULayer + Rectified Linear Unit non-linearity: fast and stable. + + y = max(0, x). + + y' = x > 0 +*/ template class ReLULayer : public NeuronLayer { public: @@ -119,7 +148,13 @@ class ReLULayer : public NeuronLayer { const bool propagate_down, vector*>* bottom); }; -// SigmoidLayer computes y = 1. / (1 + exp(-x)) +/* SigmoidLayer + Sigmoid function non-linearity: a classic. + + y = 1. / (1 + exp(-x)) + + y' = y * (1 - y) +*/ template class SigmoidLayer : public NeuronLayer { public: @@ -137,7 +172,13 @@ class SigmoidLayer : public NeuronLayer { const bool propagate_down, vector*>* bottom); }; -// TanHLayer: computes y = 1. * (exp(2 * x) - 1) / (exp(2 * x) + 1) +/* TanHLayer + Hyperbolic tangent non-linearity. + + y = 1. * (exp(2x) - 1) / (exp(2x) + 1) + + y' = 1 - [(exp(2x) - 1) / (exp(2x) + 1)] ^ 2 +*/ template class TanHLayer : public NeuronLayer { public: diff --git a/src/caffe/layers/dropout_layer.cpp b/src/caffe/layers/dropout_layer.cpp index e28cab33..e1b69f36 100644 --- a/src/caffe/layers/dropout_layer.cpp +++ b/src/caffe/layers/dropout_layer.cpp @@ -1,5 +1,7 @@ // Copyright 2014 BVLC and contributors. +// TODO (sergeyk): effect should not be dependent on phase. wasted memcpy. + #include #include "caffe/common.hpp"