зеркало из https://github.com/microsoft/caffe.git
Merge pull request #796 from jeffdonahue/solver-debug-info
Print blob debug info during training if SolverParameter "debug_info" field is set
This commit is contained in:
Коммит
0e3d9701b7
|
@ -76,6 +76,10 @@ class Blob {
|
|||
void FromProto(const BlobProto& proto);
|
||||
void ToProto(BlobProto* proto, bool write_diff = false) const;
|
||||
|
||||
// Compute the sum of absolute values (L1 norm) of the data or diff.
|
||||
Dtype asum_data() const;
|
||||
Dtype asum_diff() const;
|
||||
|
||||
// Set the data_/diff_ shared_ptr to point to the SyncedMemory holding the
|
||||
// data_/diff_ of Blob other -- useful in layers which simply perform a copy
|
||||
// in their forward or backward pass.
|
||||
|
|
|
@ -95,8 +95,9 @@ class Net {
|
|||
// returns the parameters
|
||||
inline vector<shared_ptr<Blob<Dtype> > >& params() { return params_; }
|
||||
// returns the parameter learning rate multipliers
|
||||
inline vector<float>& params_lr() {return params_lr_; }
|
||||
inline vector<float>& params_lr() { return params_lr_; }
|
||||
inline vector<float>& params_weight_decay() { return params_weight_decay_; }
|
||||
const map<string, int>& param_names_index() { return param_names_index_; }
|
||||
// Input and output blob numbers
|
||||
inline int num_inputs() { return net_input_blobs_.size(); }
|
||||
inline int num_outputs() { return net_output_blobs_.size(); }
|
||||
|
@ -111,7 +112,8 @@ class Net {
|
|||
const shared_ptr<Blob<Dtype> > blob_by_name(const string& blob_name);
|
||||
bool has_layer(const string& layer_name);
|
||||
const shared_ptr<Layer<Dtype> > layer_by_name(const string& layer_name);
|
||||
const map<string, int>& param_names_index() { return param_names_index_; }
|
||||
|
||||
void set_debug_info(const bool value) { debug_info_ = value; }
|
||||
|
||||
protected:
|
||||
// Helpers for Init.
|
||||
|
@ -125,6 +127,12 @@ class Net {
|
|||
map<string, int>* blob_name_to_idx);
|
||||
void AppendParam(const NetParameter& param, const int layer_id,
|
||||
const int param_id);
|
||||
|
||||
// Helpers for displaying debug info.
|
||||
void ForwardDebugInfo(const int layer_id);
|
||||
void BackwardDebugInfo(const int layer_id);
|
||||
void UpdateDebugInfo(const int param_id);
|
||||
|
||||
// Function to get misc parameters, e.g. the learning rate multiplier and
|
||||
// weight decay.
|
||||
void GetLearningRateAndWeightDecay();
|
||||
|
@ -150,7 +158,8 @@ class Net {
|
|||
vector<vector<Blob<Dtype>*> > top_vecs_;
|
||||
vector<vector<int> > top_id_vecs_;
|
||||
vector<int> param_owners_;
|
||||
vector<pair<int, int> > layer_param_indices_;
|
||||
vector<string> param_display_names_;
|
||||
vector<pair<int, int> > param_layer_indices_;
|
||||
map<string, int> param_names_index_;
|
||||
// blob indices for the input and the output of the net
|
||||
vector<int> net_input_blob_indices_;
|
||||
|
@ -166,6 +175,9 @@ class Net {
|
|||
vector<float> params_weight_decay_;
|
||||
// The bytes of memory used by this net
|
||||
size_t memory_used_;
|
||||
// Whether to compute and display debug info for the net.
|
||||
bool debug_info_;
|
||||
|
||||
DISABLE_COPY_AND_ASSIGN(Net);
|
||||
};
|
||||
|
||||
|
|
|
@ -137,6 +137,76 @@ void Blob<Dtype>::Update() {
|
|||
}
|
||||
}
|
||||
|
||||
template <> unsigned int Blob<unsigned int>::asum_data() const {
|
||||
NOT_IMPLEMENTED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <> int Blob<int>::asum_data() const {
|
||||
NOT_IMPLEMENTED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
Dtype Blob<Dtype>::asum_data() const {
|
||||
if (!data_) { return 0; }
|
||||
switch (data_->head()) {
|
||||
case SyncedMemory::HEAD_AT_CPU:
|
||||
return caffe_cpu_asum(count_, cpu_data());
|
||||
case SyncedMemory::HEAD_AT_GPU:
|
||||
case SyncedMemory::SYNCED:
|
||||
#ifndef CPU_ONLY
|
||||
{
|
||||
Dtype asum;
|
||||
caffe_gpu_asum(count_, gpu_data(), &asum);
|
||||
return asum;
|
||||
}
|
||||
#else
|
||||
NO_GPU;
|
||||
#endif
|
||||
case SyncedMemory::UNINITIALIZED:
|
||||
return 0;
|
||||
default:
|
||||
LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <> unsigned int Blob<unsigned int>::asum_diff() const {
|
||||
NOT_IMPLEMENTED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <> int Blob<int>::asum_diff() const {
|
||||
NOT_IMPLEMENTED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
Dtype Blob<Dtype>::asum_diff() const {
|
||||
if (!diff_) { return 0; }
|
||||
switch (diff_->head()) {
|
||||
case SyncedMemory::HEAD_AT_CPU:
|
||||
return caffe_cpu_asum(count_, cpu_diff());
|
||||
case SyncedMemory::HEAD_AT_GPU:
|
||||
case SyncedMemory::SYNCED:
|
||||
#ifndef CPU_ONLY
|
||||
{
|
||||
Dtype asum;
|
||||
caffe_gpu_asum(count_, gpu_diff(), &asum);
|
||||
return asum;
|
||||
}
|
||||
#else
|
||||
NO_GPU;
|
||||
#endif
|
||||
case SyncedMemory::UNINITIALIZED:
|
||||
return 0;
|
||||
default:
|
||||
LOG(FATAL) << "Unknown SyncedMemory head state: " << diff_->head();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
|
||||
if (num_ != source.num() || channels_ != source.channels() ||
|
||||
|
|
|
@ -163,6 +163,8 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
|
|||
GetLearningRateAndWeightDecay();
|
||||
LOG(INFO) << "Network initialization done.";
|
||||
LOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype);
|
||||
// Don't display debug info by default.
|
||||
debug_info_ = false;
|
||||
}
|
||||
|
||||
// Helper for Net::Init: add a new input or top blob to the net. (Inputs have
|
||||
|
@ -242,13 +244,17 @@ void Net<Dtype>::AppendParam(const NetParameter& param, const int layer_id,
|
|||
const int param_id) {
|
||||
const LayerParameter& layer_param = layers_[layer_id]->layer_param();
|
||||
const int param_size = layer_param.param_size();
|
||||
string param_name;
|
||||
if (param_size) {
|
||||
param_name = layer_param.param(param_id);
|
||||
string param_name = param_size ? layer_param.param(param_id) : "";
|
||||
if (param_name.size()) {
|
||||
param_display_names_.push_back(param_name);
|
||||
} else {
|
||||
ostringstream param_display_name;
|
||||
param_display_name << param_id;
|
||||
param_display_names_.push_back(param_display_name.str());
|
||||
}
|
||||
const int net_param_id = params_.size();
|
||||
params_.push_back(layers_[layer_id]->blobs()[param_id]);
|
||||
layer_param_indices_.push_back(make_pair(layer_id, param_id));
|
||||
param_layer_indices_.push_back(make_pair(layer_id, param_id));
|
||||
if (!param_size || !param_name.size() || (param_name.size() &&
|
||||
param_names_index_.find(param_name) == param_names_index_.end())) {
|
||||
// This layer "owns" this parameter blob -- it is either anonymous
|
||||
|
@ -263,7 +269,7 @@ void Net<Dtype>::AppendParam(const NetParameter& param, const int layer_id,
|
|||
const int owner_net_param_id = param_names_index_[param_name];
|
||||
param_owners_.push_back(owner_net_param_id);
|
||||
const pair<int, int>& owner_index =
|
||||
layer_param_indices_[owner_net_param_id];
|
||||
param_layer_indices_[owner_net_param_id];
|
||||
const int owner_layer_id = owner_index.first;
|
||||
const int owner_param_id = owner_index.second;
|
||||
LOG(INFO) << "Sharing parameters '" << param_name << "' owned by "
|
||||
|
@ -339,6 +345,7 @@ Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
|
|||
// LOG(ERROR) << "Forwarding " << layer_names_[i];
|
||||
Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], &top_vecs_[i]);
|
||||
loss += layer_loss;
|
||||
if (debug_info_) { ForwardDebugInfo(i); }
|
||||
}
|
||||
return loss;
|
||||
}
|
||||
|
@ -402,10 +409,69 @@ void Net<Dtype>::BackwardFromTo(int start, int end) {
|
|||
if (layer_need_backward_[i]) {
|
||||
layers_[i]->Backward(
|
||||
top_vecs_[i], bottom_need_backward_[i], &bottom_vecs_[i]);
|
||||
if (debug_info_) { BackwardDebugInfo(i); }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
void Net<Dtype>::ForwardDebugInfo(const int layer_id) {
|
||||
for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
|
||||
const Blob<Dtype>& blob = *top_vecs_[layer_id][top_id];
|
||||
const string& blob_name = blob_names_[top_id_vecs_[layer_id][top_id]];
|
||||
const Dtype asum_mean = blob.asum_data() / blob.count();
|
||||
LOG(INFO) << " [Forward] "
|
||||
<< "Layer " << layer_names_[layer_id] << ", top blob " << blob_name
|
||||
<< " data: " << asum_mean;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
void Net<Dtype>::BackwardDebugInfo(const int layer_id) {
|
||||
const vector<Blob<Dtype>*>& bottom_vec = bottom_vecs_[layer_id];
|
||||
for (int bottom_id = 0; bottom_id < bottom_vec.size(); ++bottom_id) {
|
||||
if (!bottom_need_backward_[layer_id][bottom_id]) { continue; }
|
||||
const Blob<Dtype>& blob = *bottom_vec[bottom_id];
|
||||
const string& blob_name = blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
|
||||
const Dtype asum_mean = blob.asum_diff() / blob.count();
|
||||
LOG(INFO) << " [Backward] "
|
||||
<< "Layer " << layer_names_[layer_id] << ", bottom blob " << blob_name
|
||||
<< " diff: " << asum_mean;
|
||||
}
|
||||
for (int param_id = 0; param_id < layers_[layer_id]->blobs().size();
|
||||
++param_id) {
|
||||
if (!layers_[layer_id]->param_propagate_down(param_id)) { continue; }
|
||||
const Blob<Dtype>& blob = *layers_[layer_id]->blobs()[param_id];
|
||||
const Dtype asum_mean = blob.asum_diff() / blob.count();
|
||||
LOG(INFO) << " [Backward] "
|
||||
<< "Layer " << layer_names_[layer_id] << ", param blob " << param_id
|
||||
<< " diff: " << asum_mean;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
void Net<Dtype>::UpdateDebugInfo(const int param_id) {
|
||||
const Blob<Dtype>& blob = *params_[param_id];
|
||||
const int param_owner = param_owners_[param_id];
|
||||
const string& layer_name = layer_names_[param_layer_indices_[param_id].first];
|
||||
const string& param_display_name = param_display_names_[param_id];
|
||||
const Dtype asum_diff_mean = blob.asum_diff() / blob.count();
|
||||
if (param_owner < 0) {
|
||||
const Dtype asum_data_mean = blob.asum_data() / blob.count();
|
||||
LOG(INFO) << " [Update] Layer " << layer_name
|
||||
<< ", param " << param_display_name
|
||||
<< " data: " << asum_data_mean << "; diff: " << asum_diff_mean;
|
||||
} else {
|
||||
const string& owner_layer_name =
|
||||
layer_names_[param_layer_indices_[param_owner].first];
|
||||
LOG(INFO) << " [Update] Layer " << layer_name
|
||||
<< ", param blob " << param_display_name
|
||||
<< " (owned by layer " << owner_layer_name << ", "
|
||||
<< "param " << param_display_names_[param_owners_[param_id]] << ")"
|
||||
<< " diff: " << asum_diff_mean;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
void Net<Dtype>::ShareTrainedLayersWith(Net* other) {
|
||||
int num_source_layers = other->layers().size();
|
||||
|
@ -516,9 +582,8 @@ void Net<Dtype>::Update() {
|
|||
// diff. (Assumes that the learning rate, weight decay, etc. have already been
|
||||
// accounted for in the current diff.)
|
||||
for (int i = 0; i < params_.size(); ++i) {
|
||||
if (param_owners_[i] < 0) {
|
||||
continue;
|
||||
}
|
||||
if (param_owners_[i] < 0) { continue; }
|
||||
if (debug_info_) { UpdateDebugInfo(i); }
|
||||
const int count = params_[i]->count();
|
||||
const Dtype* this_diff;
|
||||
Dtype* owner_diff;
|
||||
|
@ -534,6 +599,8 @@ void Net<Dtype>::Update() {
|
|||
owner_diff = params_[param_owners_[i]]->mutable_gpu_diff();
|
||||
caffe_gpu_add(count, this_diff, owner_diff, owner_diff);
|
||||
break;
|
||||
#else
|
||||
NO_GPU;
|
||||
#endif
|
||||
default:
|
||||
LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
|
||||
|
@ -541,9 +608,9 @@ void Net<Dtype>::Update() {
|
|||
}
|
||||
// Now, update the owned parameters.
|
||||
for (int i = 0; i < params_.size(); ++i) {
|
||||
if (param_owners_[i] < 0) {
|
||||
params_[i]->Update();
|
||||
}
|
||||
if (param_owners_[i] >= 0) { continue; }
|
||||
if (debug_info_) { UpdateDebugInfo(i); }
|
||||
params_[i]->Update();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -105,6 +105,10 @@ message SolverParameter {
|
|||
// random number generator -- useful for reproducible results. Otherwise,
|
||||
// (and by default) initialize using a seed derived from the system clock.
|
||||
optional int64 random_seed = 20 [default = -1];
|
||||
|
||||
// If true, print information about the state of the net that may help with
|
||||
// debugging learning problems.
|
||||
optional bool debug_info = 23 [default = false];
|
||||
}
|
||||
|
||||
// A message that stores the solver snapshots
|
||||
|
|
|
@ -53,6 +53,8 @@ void Solver<Dtype>::Init(const SolverParameter& param) {
|
|||
LOG(INFO) << "Creating training net from file: " << param_.train_net();
|
||||
net_.reset(new Net<Dtype>(param_.train_net()));
|
||||
}
|
||||
CHECK(net_) << "Training net uninitialized.";
|
||||
net_->set_debug_info(param_.debug_info());
|
||||
const int num_test_net_params = param_.test_net_param_size();
|
||||
const int num_test_net_files = param_.test_net_size();
|
||||
const int num_test_nets = num_test_net_params + num_test_net_files;
|
||||
|
@ -100,11 +102,17 @@ void Solver<Dtype>::Solve(const char* resume_file) {
|
|||
// should be given, and we will just provide dummy vecs.
|
||||
vector<Blob<Dtype>*> bottom_vec;
|
||||
while (iter_++ < param_.max_iter()) {
|
||||
const bool display = param_.display() && iter_ % param_.display() == 0;
|
||||
if (display) {
|
||||
net_->set_debug_info(param_.debug_info());
|
||||
} else {
|
||||
net_->set_debug_info(false);
|
||||
}
|
||||
Dtype loss = net_->ForwardBackward(bottom_vec);
|
||||
ComputeUpdateValue();
|
||||
net_->Update();
|
||||
|
||||
if (param_.display() && iter_ % param_.display() == 0) {
|
||||
if (display) {
|
||||
LOG(INFO) << "Iteration " << iter_ << ", loss = " << loss;
|
||||
}
|
||||
if (param_.test_interval() && iter_ % param_.test_interval() == 0) {
|
||||
|
|
Загрузка…
Ссылка в новой задаче