diff --git a/tools/net_speed_benchmark.cpp b/tools/net_speed_benchmark.cpp index 83fba147..a0e589e5 100644 --- a/tools/net_speed_benchmark.cpp +++ b/tools/net_speed_benchmark.cpp @@ -1,31 +1,29 @@ // Copyright 2013 Yangqing Jia -#include -#include -#include +#include +#include +#include -#include "cuda_runtime.h" -#include "fcntl.h" -#include "google/protobuf/text_format.h" +#include +#include #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/net.hpp" #include "caffe/filler.hpp" #include "caffe/proto/caffe.pb.h" +#include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" #include "caffe/solver.hpp" -using boost::shared_ptr; - -using namespace caffe; // NOLINT(build/namespaces) +using namespace caffe; int main(int argc, char** argv) { + int total_iter = 50; if (argc < 2) { - LOG(ERROR) << "net_speed_benchmark net_proto [iterations=50] [CPU/GPU] " - << "[Device_id=0]"; + LOG(ERROR) << "net_speed_benchmark net_proto [iterations=50] [CPU/GPU] [Device_id=0]"; return 0; } @@ -67,52 +65,38 @@ int main(int argc, char** argv) { vector*> >& bottom_vecs = caffe_net.bottom_vecs(); vector*> >& top_vecs = caffe_net.top_vecs(); LOG(ERROR) << "*** Benchmark begins ***"; - if (Caffe::mode() == Caffe::GPU) { - cudaDeviceSynchronize(); - } - clock_t forward_start = clock(); + Timer total_timer; + total_timer.Start(); + Timer forward_timer; + forward_timer.Start(); + Timer timer; for (int i = 0; i < layers.size(); ++i) { const string& layername = layers[i]->layer_param().name(); - if (Caffe::mode() == Caffe::GPU) { - cudaDeviceSynchronize(); - } - clock_t start = clock(); + timer.Start(); for (int j = 0; j < total_iter; ++j) { layers[i]->Forward(bottom_vecs[i], &top_vecs[i]); } - if (Caffe::mode() == Caffe::GPU) { - cudaDeviceSynchronize(); - } - LOG(ERROR) << layername << "\tforward: " - << static_cast(clock() - start) / CLOCKS_PER_SEC - << " seconds."; + timer.Stop(); + LOG(ERROR) << layername << "\tforward: " << timer.ElapsedSeconds() << " seconds."; } - LOG(ERROR) << "Forward pass: " - << static_cast(clock() - forward_start) / CLOCKS_PER_SEC - << " seconds."; - clock_t backward_start = clock(); + forward_timer.Stop(); + LOG(ERROR) << "Forward pass: " << forward_timer.ElapsedSeconds() << " seconds."; + Timer backward_timer; + backward_timer.Start(); for (int i = layers.size() - 1; i >= 0; --i) { const string& layername = layers[i]->layer_param().name(); - if (Caffe::mode() == Caffe::GPU) { - cudaDeviceSynchronize(); - } - clock_t start = clock(); + timer.Start(); for (int j = 0; j < total_iter; ++j) { layers[i]->Backward(top_vecs[i], true, &bottom_vecs[i]); } - if (Caffe::mode() == Caffe::GPU) { - cudaDeviceSynchronize(); - } + timer.Stop(); LOG(ERROR) << layername << "\tbackward: " - << static_cast(clock() - start) / CLOCKS_PER_SEC - << " seconds."; + << timer.ElapsedSeconds() << " seconds."; } - LOG(ERROR) << "Backward pass: " - << static_cast(clock() - backward_start) / CLOCKS_PER_SEC - << " seconds."; - LOG(ERROR) << "Total Time: " - << static_cast(clock() - forward_start) / CLOCKS_PER_SEC - << " seconds."; + backward_timer.Stop(); + LOG(ERROR) << "Backward pass: " << backward_timer.ElapsedSeconds() << " seconds."; + total_timer.Stop(); + LOG(ERROR) << "Total Time: " << total_timer.ElapsedSeconds() << " seconds."; LOG(ERROR) << "*** Benchmark ends ***"; return 0; }