зеркало из https://github.com/microsoft/caffe.git
Replace CPU timer with newly added Timer to benchmark net speed
This commit is contained in:
Родитель
263ceea2e7
Коммит
cd84539806
|
@ -1,31 +1,29 @@
|
||||||
// Copyright 2013 Yangqing Jia
|
// Copyright 2013 Yangqing Jia
|
||||||
|
|
||||||
#include <ctime>
|
#include <cuda_runtime.h>
|
||||||
#include <string>
|
#include <fcntl.h>
|
||||||
#include <vector>
|
#include <google/protobuf/text_format.h>
|
||||||
|
|
||||||
#include "cuda_runtime.h"
|
#include <cstring>
|
||||||
#include "fcntl.h"
|
#include <ctime>
|
||||||
#include "google/protobuf/text_format.h"
|
|
||||||
|
|
||||||
#include "caffe/blob.hpp"
|
#include "caffe/blob.hpp"
|
||||||
#include "caffe/common.hpp"
|
#include "caffe/common.hpp"
|
||||||
#include "caffe/net.hpp"
|
#include "caffe/net.hpp"
|
||||||
#include "caffe/filler.hpp"
|
#include "caffe/filler.hpp"
|
||||||
#include "caffe/proto/caffe.pb.h"
|
#include "caffe/proto/caffe.pb.h"
|
||||||
|
#include "caffe/util/benchmark.hpp"
|
||||||
#include "caffe/util/io.hpp"
|
#include "caffe/util/io.hpp"
|
||||||
#include "caffe/solver.hpp"
|
#include "caffe/solver.hpp"
|
||||||
|
|
||||||
using boost::shared_ptr;
|
using namespace caffe;
|
||||||
|
|
||||||
using namespace caffe; // NOLINT(build/namespaces)
|
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
|
||||||
int total_iter = 50;
|
int total_iter = 50;
|
||||||
|
|
||||||
if (argc < 2) {
|
if (argc < 2) {
|
||||||
LOG(ERROR) << "net_speed_benchmark net_proto [iterations=50] [CPU/GPU] "
|
LOG(ERROR) << "net_speed_benchmark net_proto [iterations=50] [CPU/GPU] [Device_id=0]";
|
||||||
<< "[Device_id=0]";
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -67,52 +65,38 @@ int main(int argc, char** argv) {
|
||||||
vector<vector<Blob<float>*> >& bottom_vecs = caffe_net.bottom_vecs();
|
vector<vector<Blob<float>*> >& bottom_vecs = caffe_net.bottom_vecs();
|
||||||
vector<vector<Blob<float>*> >& top_vecs = caffe_net.top_vecs();
|
vector<vector<Blob<float>*> >& top_vecs = caffe_net.top_vecs();
|
||||||
LOG(ERROR) << "*** Benchmark begins ***";
|
LOG(ERROR) << "*** Benchmark begins ***";
|
||||||
if (Caffe::mode() == Caffe::GPU) {
|
Timer total_timer;
|
||||||
cudaDeviceSynchronize();
|
total_timer.Start();
|
||||||
}
|
Timer forward_timer;
|
||||||
clock_t forward_start = clock();
|
forward_timer.Start();
|
||||||
|
Timer timer;
|
||||||
for (int i = 0; i < layers.size(); ++i) {
|
for (int i = 0; i < layers.size(); ++i) {
|
||||||
const string& layername = layers[i]->layer_param().name();
|
const string& layername = layers[i]->layer_param().name();
|
||||||
if (Caffe::mode() == Caffe::GPU) {
|
timer.Start();
|
||||||
cudaDeviceSynchronize();
|
|
||||||
}
|
|
||||||
clock_t start = clock();
|
|
||||||
for (int j = 0; j < total_iter; ++j) {
|
for (int j = 0; j < total_iter; ++j) {
|
||||||
layers[i]->Forward(bottom_vecs[i], &top_vecs[i]);
|
layers[i]->Forward(bottom_vecs[i], &top_vecs[i]);
|
||||||
}
|
}
|
||||||
if (Caffe::mode() == Caffe::GPU) {
|
timer.Stop();
|
||||||
cudaDeviceSynchronize();
|
LOG(ERROR) << layername << "\tforward: " << timer.ElapsedSeconds() << " seconds.";
|
||||||
}
|
|
||||||
LOG(ERROR) << layername << "\tforward: "
|
|
||||||
<< static_cast<float>(clock() - start) / CLOCKS_PER_SEC
|
|
||||||
<< " seconds.";
|
|
||||||
}
|
}
|
||||||
LOG(ERROR) << "Forward pass: "
|
forward_timer.Stop();
|
||||||
<< static_cast<float>(clock() - forward_start) / CLOCKS_PER_SEC
|
LOG(ERROR) << "Forward pass: " << forward_timer.ElapsedSeconds() << " seconds.";
|
||||||
<< " seconds.";
|
Timer backward_timer;
|
||||||
clock_t backward_start = clock();
|
backward_timer.Start();
|
||||||
for (int i = layers.size() - 1; i >= 0; --i) {
|
for (int i = layers.size() - 1; i >= 0; --i) {
|
||||||
const string& layername = layers[i]->layer_param().name();
|
const string& layername = layers[i]->layer_param().name();
|
||||||
if (Caffe::mode() == Caffe::GPU) {
|
timer.Start();
|
||||||
cudaDeviceSynchronize();
|
|
||||||
}
|
|
||||||
clock_t start = clock();
|
|
||||||
for (int j = 0; j < total_iter; ++j) {
|
for (int j = 0; j < total_iter; ++j) {
|
||||||
layers[i]->Backward(top_vecs[i], true, &bottom_vecs[i]);
|
layers[i]->Backward(top_vecs[i], true, &bottom_vecs[i]);
|
||||||
}
|
}
|
||||||
if (Caffe::mode() == Caffe::GPU) {
|
timer.Stop();
|
||||||
cudaDeviceSynchronize();
|
|
||||||
}
|
|
||||||
LOG(ERROR) << layername << "\tbackward: "
|
LOG(ERROR) << layername << "\tbackward: "
|
||||||
<< static_cast<float>(clock() - start) / CLOCKS_PER_SEC
|
<< timer.ElapsedSeconds() << " seconds.";
|
||||||
<< " seconds.";
|
|
||||||
}
|
}
|
||||||
LOG(ERROR) << "Backward pass: "
|
backward_timer.Stop();
|
||||||
<< static_cast<float>(clock() - backward_start) / CLOCKS_PER_SEC
|
LOG(ERROR) << "Backward pass: " << backward_timer.ElapsedSeconds() << " seconds.";
|
||||||
<< " seconds.";
|
total_timer.Stop();
|
||||||
LOG(ERROR) << "Total Time: "
|
LOG(ERROR) << "Total Time: " << total_timer.ElapsedSeconds() << " seconds.";
|
||||||
<< static_cast<float>(clock() - forward_start) / CLOCKS_PER_SEC
|
|
||||||
<< " seconds.";
|
|
||||||
LOG(ERROR) << "*** Benchmark ends ***";
|
LOG(ERROR) << "*** Benchmark ends ***";
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче