зеркало из https://github.com/microsoft/caffe.git
Test for im2col kernel
With associated Makefile changes for .cu tests This tests that the grid-stride loop works for im2col, using the CPU version as a reference.
This commit is contained in:
Родитель
2e23f28f62
Коммит
6450e0f0c8
14
Makefile
14
Makefile
|
@ -30,11 +30,12 @@ CXX_SRCS := $(shell find src/$(PROJECT) ! -name "test_*.cpp" -name "*.cpp")
|
|||
# HXX_SRCS are the header files
|
||||
HXX_SRCS := $(shell find include/$(PROJECT) -name "*.hpp")
|
||||
# CU_SRCS are the cuda source files
|
||||
CU_SRCS := $(shell find src/$(PROJECT) -name "*.cu")
|
||||
CU_SRCS := $(shell find src/$(PROJECT) ! -name "test_*.cu" -name "*.cu")
|
||||
# TEST_SRCS are the test source files
|
||||
TEST_MAIN_SRC := src/$(PROJECT)/test/test_caffe_main.cpp
|
||||
TEST_SRCS := $(shell find src/$(PROJECT) -name "test_*.cpp")
|
||||
TEST_SRCS := $(filter-out $(TEST_MAIN_SRC), $(TEST_SRCS))
|
||||
TEST_CU_SRCS := $(shell find src/$(PROJECT) -name "test_*.cu")
|
||||
GTEST_SRC := src/gtest/gtest-all.cpp
|
||||
# TEST_HDRS are the test header files
|
||||
TEST_HDRS := $(shell find src/$(PROJECT) -name "test_*.hpp")
|
||||
|
@ -101,7 +102,9 @@ OBJS := $(PROTO_OBJS) $(CXX_OBJS) $(CU_OBJS)
|
|||
TOOL_OBJS := $(addprefix $(BUILD_DIR)/, ${TOOL_SRCS:.cpp=.o})
|
||||
TOOL_BUILD_DIR := $(BUILD_DIR)/tools
|
||||
TEST_BUILD_DIR := $(BUILD_DIR)/src/$(PROJECT)/test
|
||||
TEST_OBJS := $(addprefix $(BUILD_DIR)/, ${TEST_SRCS:.cpp=.o})
|
||||
TEST_CXX_OBJS := $(addprefix $(BUILD_DIR)/, ${TEST_SRCS:.cpp=.o})
|
||||
TEST_CU_OBJS := $(addprefix $(BUILD_DIR)/, ${TEST_CU_SRCS:.cu=.cuo})
|
||||
TEST_OBJS := $(TEST_CXX_OBJS) $(TEST_CU_OBJS)
|
||||
GTEST_OBJ := $(addprefix $(BUILD_DIR)/, ${GTEST_SRC:.cpp=.o})
|
||||
GTEST_BUILD_DIR := $(dir $(GTEST_OBJ))
|
||||
EXAMPLE_OBJS := $(addprefix $(BUILD_DIR)/, ${EXAMPLE_SRCS:.cpp=.o})
|
||||
|
@ -329,13 +332,18 @@ $(TEST_BUILD_DIR)/%.o: src/$(PROJECT)/test/%.cpp $(HXX_SRCS) $(TEST_HDRS) \
|
|||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
@ echo
|
||||
|
||||
$(TEST_BUILD_DIR)/%.cuo: src/$(PROJECT)/test/%.cu $(HXX_SRCS) $(TEST_HDRS) \
|
||||
| $(TEST_BUILD_DIR)
|
||||
$(CUDA_DIR)/bin/nvcc $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
|
||||
@ echo
|
||||
|
||||
$(TEST_ALL_BIN): $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) $(STATIC_NAME) \
|
||||
| $(TEST_BIN_DIR)
|
||||
$(CXX) $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) $(STATIC_NAME) \
|
||||
-o $@ $(LINKFLAGS) $(LDFLAGS)
|
||||
@ echo
|
||||
|
||||
$(TEST_BIN_DIR)/%.testbin: $(TEST_BUILD_DIR)/%.o $(GTEST_OBJ) $(STATIC_NAME) \
|
||||
$(TEST_BIN_DIR)/%.testbin: $(TEST_BUILD_DIR)/%.*o $(GTEST_OBJ) $(STATIC_NAME) \
|
||||
| $(TEST_BIN_DIR)
|
||||
$(CXX) $(TEST_MAIN_SRC) $< $(GTEST_OBJ) $(STATIC_NAME) \
|
||||
-o $@ $(LINKFLAGS) $(LDFLAGS)
|
||||
|
|
|
@ -0,0 +1,125 @@
|
|||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#include "cuda_runtime.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "caffe/blob.hpp"
|
||||
#include "caffe/common.hpp"
|
||||
#include "caffe/filler.hpp"
|
||||
#include "caffe/vision_layers.hpp"
|
||||
#include "caffe/util/im2col.hpp"
|
||||
|
||||
#include "caffe/test/test_caffe_main.hpp"
|
||||
|
||||
namespace caffe {
|
||||
|
||||
// Forward declare kernel functions
|
||||
template <typename Dtype>
|
||||
__global__ void im2col_gpu_kernel(const int n, const Dtype* data_im,
|
||||
const int height, const int width, const int ksize, const int pad,
|
||||
const int stride, const int height_col, const int width_col,
|
||||
Dtype* data_col);
|
||||
|
||||
extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
|
||||
|
||||
template <typename Dtype>
|
||||
class Im2colKernelTest : public ::testing::Test {
|
||||
protected:
|
||||
Im2colKernelTest()
|
||||
// big so launches > 1024 threads
|
||||
: blob_bottom_(new Blob<Dtype>(5, 500, 10, 10)),
|
||||
blob_top_(new Blob<Dtype>()),
|
||||
blob_top_cpu_(new Blob<Dtype>()) {
|
||||
FillerParameter filler_param;
|
||||
GaussianFiller<Dtype> filler(filler_param);
|
||||
filler.Fill(this->blob_bottom_);
|
||||
|
||||
height_ = blob_bottom_->height();
|
||||
width_ = blob_bottom_->width();
|
||||
channels_ = blob_bottom_->channels();
|
||||
pad_ = 0;
|
||||
stride_ = 2;
|
||||
kernel_size_ = 3;
|
||||
height_col_ = (height_ + 2 * pad_ - kernel_size_) / stride_ + 1;
|
||||
width_col_ = (width_ + 2 * pad_ - kernel_size_) / stride_ + 1;
|
||||
}
|
||||
|
||||
virtual ~Im2colKernelTest() {
|
||||
delete blob_bottom_;
|
||||
delete blob_top_;
|
||||
delete blob_top_cpu_;
|
||||
}
|
||||
|
||||
Blob<Dtype>* const blob_bottom_;
|
||||
Blob<Dtype>* const blob_top_;
|
||||
Blob<Dtype>* const blob_top_cpu_;
|
||||
int height_;
|
||||
int width_;
|
||||
int channels_;
|
||||
int pad_;
|
||||
int stride_;
|
||||
int kernel_size_;
|
||||
int height_col_;
|
||||
int width_col_;
|
||||
};
|
||||
|
||||
typedef ::testing::Types<float, double> Dtypes;
|
||||
TYPED_TEST_CASE(Im2colKernelTest, Dtypes);
|
||||
|
||||
TYPED_TEST(Im2colKernelTest, TestGPU) {
|
||||
Caffe::set_mode(Caffe::GPU);
|
||||
|
||||
// Reshape the blobs to correct size for im2col output
|
||||
this->blob_top_->Reshape(this->blob_bottom_->num(),
|
||||
this->channels_ * this->kernel_size_ * this->kernel_size_,
|
||||
this->height_col_,
|
||||
this->width_col_);
|
||||
|
||||
this->blob_top_cpu_->Reshape(this->blob_bottom_->num(),
|
||||
this->channels_ * this->kernel_size_ * this->kernel_size_,
|
||||
this->height_col_,
|
||||
this->width_col_);
|
||||
|
||||
const TypeParam* bottom_data = this->blob_bottom_->gpu_data();
|
||||
TypeParam* top_data = this->blob_top_->mutable_gpu_data();
|
||||
TypeParam* cpu_data = this->blob_top_cpu_->mutable_cpu_data();
|
||||
|
||||
// CPU Version
|
||||
for (int n = 0; n < this->blob_bottom_->num(); ++n) {
|
||||
im2col_cpu(this->blob_bottom_->cpu_data() + this->blob_bottom_->offset(n),
|
||||
this->channels_, this->height_, this->width_, this->kernel_size_,
|
||||
this->pad_, this->stride_, cpu_data + this->blob_top_cpu_->offset(n));
|
||||
}
|
||||
|
||||
// GPU version
|
||||
int num_kernels = this->channels_ * this->height_col_ * this->width_col_;
|
||||
int default_grid_dim = CAFFE_GET_BLOCKS(num_kernels);
|
||||
|
||||
// Launch with different grid sizes
|
||||
for (int grid_div = 2; grid_div <= 8; grid_div++) {
|
||||
for (int n = 0; n < this->blob_bottom_->num(); ++n) {
|
||||
int grid_dim = default_grid_dim/grid_div;
|
||||
// NOLINT_NEXT_LINE(whitespace/operators)
|
||||
im2col_gpu_kernel<TypeParam><<<grid_dim, CAFFE_CUDA_NUM_THREADS>>>(
|
||||
num_kernels, bottom_data + this->blob_bottom_->offset(n),
|
||||
this->height_, this->width_, this->kernel_size_, this->pad_,
|
||||
this->stride_, this->height_col_, this->width_col_,
|
||||
top_data + this->blob_top_->offset(n));
|
||||
CUDA_POST_KERNEL_CHECK;
|
||||
}
|
||||
|
||||
// Compare results against CPU version
|
||||
for (int i = 0; i < this->blob_top_->count(); ++i) {
|
||||
TypeParam cpuval = cpu_data[i];
|
||||
TypeParam gpuval = this->blob_top_->cpu_data()[i];
|
||||
EXPECT_EQ(cpuval, gpuval);
|
||||
if (cpuval != gpuval) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace caffe
|
Загрузка…
Ссылка в новой задаче