diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index 5d4a8e93..268cb2bd 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -5,6 +5,7 @@ #define CAFFE_UTIL_MATH_FUNCTIONS_H_ #include // for std::fabs +#include // for signbit #include #include "caffe/util/mkl_alternate.hpp" @@ -147,11 +148,38 @@ inline char caffe_sign(Dtype val) { template <> \ void caffe_cpu_##name(const int n, const double* x, double* y) + +#define DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(name, operation) \ +template \ +__global__ void name##_kernel(const int n, const Dtype* x, Dtype* y) { \ + int index = threadIdx.x + blockIdx.x * blockDim.x; \ + if (index < n) { \ + operation; \ + } \ +} \ +template <> \ +void caffe_gpu_##name(const int n, const float* x, float* y) { \ + name##_kernel<<>>( \ + n, x, y); \ +} \ +template <> \ +void caffe_gpu_##name(const int n, const double* x, double* y) { \ + name##_kernel<<>>( \ + n, x, y); \ +} + +// output is 1 for the positives, 0 for zero, and -1 for the negatives DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign(x[i])); template void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y); +// returns a nonzero value is the input has its sign bit set. +DEFINE_CAFFE_CPU_UNARY_FUNC(signbit, y[i] = std::signbit(x[i])); + +template +void caffe_gpu_signbit(const int n, const Dtype* x, Dtype* y); + DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i])); template diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp index 00f28bad..d314d73b 100644 --- a/src/caffe/test/test_math_functions.cpp +++ b/src/caffe/test/test_math_functions.cpp @@ -119,6 +119,27 @@ TYPED_TEST(MathFunctionsTest, TestSignGPU){ } } +TYPED_TEST(MathFunctionsTest, TestSignbitCPU){ + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + caffe_cpu_signbit(n, x, this->blob_bottom_->mutable_cpu_diff()); + const TypeParam* signbits = this->blob_bottom_->cpu_diff(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(signbits[i], x[i] < 0 ? 1 : 0); + } +} + +TYPED_TEST(MathFunctionsTest, TestSignbitGPU){ + int n = this->blob_bottom_->count(); + caffe_gpu_signbit(n, this->blob_bottom_->gpu_data(), + this->blob_bottom_->mutable_gpu_diff()); + const TypeParam* signbits = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(signbits[i], x[i] < 0 ? 1 : 0); + } +} + TYPED_TEST(MathFunctionsTest, TestFabsCPU){ int n = this->blob_bottom_->count(); const TypeParam* x = this->blob_bottom_->cpu_data(); diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index ef347a1f..ad83a998 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -411,6 +411,7 @@ void caffe_gpu_asum(const int n, const double* x, double* y) { } INSTANTIATE_CAFFE_CPU_UNARY_FUNC(sign); +INSTANTIATE_CAFFE_CPU_UNARY_FUNC(signbit); INSTANTIATE_CAFFE_CPU_UNARY_FUNC(fabs); template <> diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu index 72cbb00c..e3eaacc1 100644 --- a/src/caffe/util/math_functions.cu +++ b/src/caffe/util/math_functions.cu @@ -4,7 +4,7 @@ #include #include #include -#include // CUDA's, not caffe's, for fabs +#include // CUDA's, not caffe's, for fabs, signbit #include "caffe/common.hpp" #include "caffe/util/math_functions.hpp" @@ -35,44 +35,8 @@ void caffe_gpu_mul(const int N, const double* a, N, a, b, y); } -template -__global__ void sign_kernel(const int n, const Dtype* x, Dtype* y) { - int index = threadIdx.x + blockIdx.x * blockDim.x; - if (index < n) { - y[index] = (Dtype(0) < x[index]) - (x[index] < Dtype(0)); - } -} - -template <> -void caffe_gpu_sign(const int n, const float* x, float* y) { - sign_kernel<<>>( - n, x, y); -} - -template <> -void caffe_gpu_sign(const int n, const double* x, double* y) { - sign_kernel<<>>( - n, x, y); -} - -template -__global__ void fabs_kernel(const int n, const Dtype* x, Dtype* y) { - int index = threadIdx.x + blockIdx.x * blockDim.x; - if (index < n) { - y[index] = fabs(x[index]); - } -} - -template <> -void caffe_gpu_fabs(const int n, const float* x, float* y) { - fabs_kernel<<>>( - n, x, y); -} - -template <> -void caffe_gpu_fabs(const int n, const double* x, double* y) { - fabs_kernel<<>>( - n, x, y); -} +DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sign, y[index] = (Dtype(0) < x[index]) - (x[index] < Dtype(0))); +DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(signbit, y[index] = signbit(x[index])); +DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(fabs, y[index] = fabs(x[index])); } // namespace caffe