зеркало из https://github.com/microsoft/O-CNN.git
Update octree
This commit is contained in:
Родитель
e2a39b11ef
Коммит
f74c9539a5
|
@ -1244,7 +1244,8 @@ void Octree::octree2mesh(vector<float>& V, vector<int>& F, int depth_start,
|
|||
|
||||
vector<float> pts, normals, pts_ref;
|
||||
for (int i = 0; i < num; ++i) {
|
||||
if (node_type(child_d[i]) == kInternelNode && d != depth) continue;
|
||||
if ((node_type(child_d[i]) == kInternelNode && d != depth) ||
|
||||
(node_type(child_d[i]) == kLeaf && d == depth)) continue;
|
||||
|
||||
float n[3], pt[3], pt_ref[3];
|
||||
node_normal(n, i, d);
|
||||
|
|
|
@ -3,11 +3,14 @@
|
|||
#include "logs.h"
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
namespace octree {
|
||||
|
||||
template <typename Dtype>
|
||||
void OctreeBaseConv<Dtype>::setup(const vector<int>& kernel_size, const int stride,
|
||||
const int curr_depth, const int channel_in, const int channel_out) {
|
||||
void OctreeBaseConv<Dtype>::setup(const vector<int>& kernel_size,
|
||||
const int stride, const int curr_depth,
|
||||
const int channel_in, const int channel_out,
|
||||
const bool nempty) {
|
||||
// kernel size
|
||||
kernel_size_ = kernel_size;
|
||||
CHECK(kernel_size_[0] < 4 && kernel_size_[1] < 4 && kernel_size_[2] < 4)
|
||||
|
@ -31,98 +34,155 @@ void OctreeBaseConv<Dtype>::setup(const vector<int>& kernel_size, const int stri
|
|||
std::swap(conv_out_channels_, conv_in_channels_);
|
||||
}
|
||||
|
||||
// !!! perform the convolution on non-empty octree nodes or not
|
||||
nempty_ = nempty;
|
||||
|
||||
kernel_sdim_ = kernel_size_[0] * kernel_size_[1] * kernel_size_[2];
|
||||
kernel_dim_ = kernel_sdim_ * conv_in_channels_;
|
||||
|
||||
ni_cpu_ptr_ = NeighHelper::get_ni(kernel_size_).data();
|
||||
ni_gpu_ptr_ = nullptr; // must be set before using
|
||||
ni_gpu_ptr_ = nullptr; // must be set before using
|
||||
}
|
||||
|
||||
|
||||
template <typename Dtype>
|
||||
void OctreeBaseConv<Dtype>::reshape() {
|
||||
// weight shape
|
||||
weights_shape_ = vector<int> {conv_out_channels_, conv_in_channels_ * kernel_sdim_};
|
||||
|
||||
// compute top shape
|
||||
int btm_h = octree_.info().node_num(curr_depth_);
|
||||
int top_blob_depth = curr_depth_, top_h = btm_h;
|
||||
// assign depth for different blobs
|
||||
// curr_depth_ and top_depth are the octree depth of the input and output
|
||||
// data; workspace_depth_ is the octree depth of the `col` data, different
|
||||
// from top_depth, workspace_depth_ is always the same as the depth of larger
|
||||
// data when doing octree2col or col2octree
|
||||
int top_depth = workspace_depth_ = curr_depth_;
|
||||
if (stride_ == 2) {
|
||||
if (is_deconvolution_layer()) {
|
||||
top_blob_depth++;
|
||||
top_h = octree_.info().node_num(top_blob_depth);
|
||||
top_depth = workspace_depth_ = curr_depth_ + 1;
|
||||
} else {
|
||||
top_blob_depth--;
|
||||
top_h = octree_.info().node_num_nempty(top_blob_depth);
|
||||
top_depth = curr_depth_ - 1;
|
||||
}
|
||||
CHECK(0 <= top_blob_depth && top_blob_depth <= octree_.info().depth());
|
||||
CHECK(0 <= top_depth && top_depth <= octree_.info().depth());
|
||||
}
|
||||
if (top_h == 0) top_h = 1; // avoid degenerated case
|
||||
top_shape_ = vector<int> { 1, num_output_, top_h, 1 };
|
||||
|
||||
// reshape workspce
|
||||
workspace_depth_ = curr_depth_; // the depth value used for octree2col
|
||||
if (is_deconvolution_layer() && stride_ == 2) workspace_depth_++;
|
||||
workspace_h_ = btm_h;
|
||||
if (stride_ == 2) {
|
||||
if (is_deconvolution_layer()) { workspace_h_ = top_h >> 3; }
|
||||
else { workspace_h_ = btm_h >> 3; }
|
||||
// weight shape
|
||||
weights_shape_ =
|
||||
vector<int>{conv_out_channels_, conv_in_channels_ * kernel_sdim_};
|
||||
|
||||
// top shape
|
||||
int top_h = 0;
|
||||
if (!nempty_) {
|
||||
top_h = octree_.info().node_num(top_depth);
|
||||
if (stride_ == 2 && !is_deconvolution_layer()) {
|
||||
// In this case, the octree_pad is needed to pad the output data,
|
||||
// so the top_h is equal to the non-empty node number.
|
||||
top_h = octree_.info().node_num_nempty(top_depth);
|
||||
}
|
||||
} else {
|
||||
top_h = octree_.info().node_num_nempty(top_depth);
|
||||
}
|
||||
if (top_h == 0) top_h = 1; // avoid degenerated case
|
||||
top_shape_ = vector<int>{1, num_output_, top_h, 1};
|
||||
|
||||
// workspce shape
|
||||
workspace_h_ = top_h; // equals to the output height if stride is 1
|
||||
if (stride_ == 2) {
|
||||
if (is_deconvolution_layer()) {
|
||||
workspace_h_ = octree_.info().node_num(top_depth) / 8;
|
||||
} else {
|
||||
workspace_h_ = octree_.info().node_num(curr_depth_) / 8;
|
||||
}
|
||||
}
|
||||
|
||||
// child_h_, ichild_h_, octree_h_ are used for octree2col/col2octree
|
||||
// only if nempty_ is True.
|
||||
if (nempty_) {
|
||||
child_h_ = octree_.info().node_num(workspace_depth_);
|
||||
ichild_h_ = octree_.info().node_num_nempty(workspace_depth_);
|
||||
|
||||
// octree_h is the height of octree data for octree2col/col2octree
|
||||
octree_h_ = octree_.info().node_num_nempty(curr_depth_);
|
||||
if (stride_ == 2 && is_deconvolution_layer()) {
|
||||
octree_h_ = octree_.info().node_num_nempty(top_depth);
|
||||
}
|
||||
}
|
||||
|
||||
// workspace number and workspace actual shape
|
||||
workspace_n_ = 1;
|
||||
workspace_ha_ = workspace_h_;
|
||||
uint64 ideal_size = (uint64) workspace_h_ * (uint64) kernel_dim_;
|
||||
uint64 ideal_size = (uint64)workspace_h_ * (uint64)kernel_dim_;
|
||||
if (ideal_size > MAX_SIZE && !is_1x1_) {
|
||||
workspace_n_ = (ideal_size + MAX_SIZE - 1) / MAX_SIZE;
|
||||
workspace_ha_ = (workspace_h_ + workspace_n_ - 1) / workspace_n_;
|
||||
}
|
||||
workspace_shape_ = vector<int> { kernel_dim_, workspace_ha_};
|
||||
workspace_shape_ = vector<int>{kernel_dim_, workspace_ha_};
|
||||
|
||||
// reshape result_buffer_
|
||||
// result_buffer_ shape
|
||||
if (workspace_n_ > 1) {
|
||||
result_buffer_shape_ = vector<int> { conv_out_channels_, workspace_ha_ };
|
||||
result_buffer_shape_ = vector<int>{conv_out_channels_, workspace_ha_};
|
||||
} else {
|
||||
result_buffer_shape_.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// reshape data_buffer_
|
||||
if (stride_ == 2) {
|
||||
data_buffer_shape_ = vector<int> { 1, conv_out_channels_, workspace_h_, 1 };
|
||||
template <typename Dtype>
|
||||
void OctreeBaseConv<Dtype>::octree2col_cpu_wrapper(Dtype* workspace,
|
||||
const Dtype* bottom_data,
|
||||
const int n) {
|
||||
if (!nempty_) {
|
||||
octree2col_cpu<Dtype>(workspace, bottom_data, conv_in_channels_,
|
||||
workspace_h_, kernel_sdim_, stride_,
|
||||
octree_.neighbor_cpu(workspace_depth_), ni_cpu_ptr_,
|
||||
workspace_ha_, n);
|
||||
} else {
|
||||
data_buffer_shape_.clear();
|
||||
octree2colP_cpu<Dtype>(workspace, bottom_data, conv_in_channels_,
|
||||
workspace_h_, octree_h_, kernel_sdim_, stride_,
|
||||
octree_.neighbor_gpu(workspace_depth_), ni_cpu_ptr_,
|
||||
child_, ichild_, workspace_ha_, n);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
void OctreeBaseConv<Dtype>::col2octree_cpu_wrapper(const Dtype* col_diff,
|
||||
Dtype* bottom_diff, int n) {
|
||||
if (!nempty_) {
|
||||
col2octree_cpu<Dtype>(col_diff, bottom_diff, conv_in_channels_,
|
||||
workspace_h_, kernel_sdim_, stride_,
|
||||
octree_.neighbor_cpu(workspace_depth_), ni_cpu_ptr_,
|
||||
workspace_ha_, n);
|
||||
} else {
|
||||
col2octreeP_cpu<Dtype>(col_diff, bottom_diff, conv_in_channels_,
|
||||
workspace_h_, octree_h_, kernel_sdim_, stride_,
|
||||
octree_.neighbor_gpu(workspace_depth_), ni_cpu_ptr_,
|
||||
child_, ichild_, workspace_ha_, n);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
void OctreeBaseConv<Dtype>::forward_cpu_gemm(Dtype* top_data,
|
||||
const Dtype* bottom_data, const Dtype* weights) {
|
||||
const Dtype* bottom_data,
|
||||
const Dtype* weights) {
|
||||
const Dtype* col_data = bottom_data;
|
||||
Dtype* result_data = workspace_n_ == 1 ? top_data : result_buffer_;
|
||||
for (int n = 0; n < workspace_n_; ++n) {
|
||||
if (!is_1x1_) {
|
||||
octree2col_cpu<Dtype>(workspace_,
|
||||
bottom_data, conv_in_channels_, workspace_h_, kernel_sdim_,
|
||||
stride_, octree_.neighbor_cpu(workspace_depth_),
|
||||
ni_cpu_ptr_, workspace_ha_, n);
|
||||
octree2col_cpu_wrapper(workspace_, bottom_data, n);
|
||||
col_data = workspace_;
|
||||
}
|
||||
|
||||
engine_cpu_->gemm(false, false, conv_out_channels_,
|
||||
workspace_ha_, kernel_dim_, Dtype(1.0), weights, col_data,
|
||||
Dtype(0), result_data);
|
||||
engine_cpu_->gemm(false, false, conv_out_channels_, workspace_ha_,
|
||||
kernel_dim_, Dtype(1.0), weights, col_data, Dtype(0),
|
||||
result_data);
|
||||
|
||||
if (workspace_n_ == 1) return;
|
||||
int num = std::min(workspace_ha_, workspace_h_ - n * workspace_ha_);
|
||||
for (int c = 0; c < conv_out_channels_; ++c) {
|
||||
memcpy_cpu(num, result_data + c * workspace_ha_,
|
||||
top_data + c * workspace_h_ + n * workspace_ha_);
|
||||
top_data + c * workspace_h_ + n * workspace_ha_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
void OctreeBaseConv<Dtype>::backward_cpu_gemm(Dtype* bottom_diff,
|
||||
const Dtype* top_diff, const Dtype* weights) {
|
||||
const Dtype* top_diff,
|
||||
const Dtype* weights) {
|
||||
Dtype* col_diff = is_1x1_ ? bottom_diff : workspace_;
|
||||
for (int n = 0; n < workspace_n_; ++n) {
|
||||
const Dtype* result_buffer = top_diff;
|
||||
|
@ -131,28 +191,25 @@ void OctreeBaseConv<Dtype>::backward_cpu_gemm(Dtype* bottom_diff,
|
|||
int num = std::min(workspace_ha_, workspace_h_ - n * workspace_ha_);
|
||||
for (int c = 0; c < conv_out_channels_; ++c) {
|
||||
memcpy_cpu(num, top_diff + c * workspace_h_ + n * workspace_ha_,
|
||||
buffer_ + c * workspace_ha_);
|
||||
buffer_ + c * workspace_ha_);
|
||||
}
|
||||
result_buffer = result_buffer_;
|
||||
}
|
||||
|
||||
engine_cpu_->gemm(true, false, kernel_dim_,
|
||||
workspace_ha_, conv_out_channels_, Dtype(1.0), weights,
|
||||
result_buffer, Dtype(0.0), col_diff);
|
||||
engine_cpu_->gemm(true, false, kernel_dim_, workspace_ha_,
|
||||
conv_out_channels_, Dtype(1.0), weights, result_buffer,
|
||||
Dtype(0.0), col_diff);
|
||||
|
||||
if (!is_1x1_) {
|
||||
col2octree_cpu<Dtype>(col_diff, bottom_diff,
|
||||
conv_in_channels_, workspace_h_, kernel_sdim_,
|
||||
stride_, octree_.neighbor_cpu(workspace_depth_),
|
||||
ni_cpu_ptr_, workspace_ha_, n);
|
||||
col2octree_cpu_wrapper(col_diff, bottom_diff, n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename Dtype>
|
||||
void OctreeBaseConv<Dtype>::weight_cpu_gemm(Dtype* weights_diff,
|
||||
const Dtype* bottom_data, const Dtype* top_diff) {
|
||||
const Dtype* bottom_data,
|
||||
const Dtype* top_diff) {
|
||||
int num = num_elements(weights_shape_);
|
||||
memset_cpu(num, Dtype(0), weights_diff);
|
||||
|
||||
|
@ -160,10 +217,7 @@ void OctreeBaseConv<Dtype>::weight_cpu_gemm(Dtype* weights_diff,
|
|||
const Dtype* result_buffer = top_diff;
|
||||
for (int n = 0; n < workspace_n_; ++n) {
|
||||
if (!is_1x1_) {
|
||||
octree2col_cpu<Dtype>(workspace_,
|
||||
bottom_data, conv_in_channels_, workspace_h_, kernel_sdim_,
|
||||
stride_, octree_.neighbor_cpu(workspace_depth_),
|
||||
ni_cpu_ptr_, workspace_ha_, n);
|
||||
octree2col_cpu_wrapper(workspace_, bottom_data, n);
|
||||
col_data = workspace_;
|
||||
}
|
||||
|
||||
|
@ -172,49 +226,81 @@ void OctreeBaseConv<Dtype>::weight_cpu_gemm(Dtype* weights_diff,
|
|||
Dtype* buffer = result_buffer_;
|
||||
for (int c = 0; c < conv_out_channels_; ++c) {
|
||||
memcpy_cpu(num, top_diff + c * workspace_h_ + n * workspace_ha_,
|
||||
buffer + c * workspace_ha_);
|
||||
buffer + c * workspace_ha_);
|
||||
}
|
||||
result_buffer = result_buffer_;
|
||||
}
|
||||
|
||||
engine_cpu_->gemm(false, true, conv_out_channels_,
|
||||
kernel_dim_, workspace_ha_, Dtype(1.0), result_buffer, col_data,
|
||||
Dtype(1.0), weights_diff);
|
||||
engine_cpu_->gemm(false, true, conv_out_channels_, kernel_dim_,
|
||||
workspace_ha_, Dtype(1.0), result_buffer, col_data,
|
||||
Dtype(1.0), weights_diff);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA
|
||||
|
||||
template <typename Dtype>
|
||||
void OctreeBaseConv<Dtype>::octree2col_gpu_wrapper(Dtype* workspace,
|
||||
const Dtype* bottom_data,
|
||||
const int n) {
|
||||
if (!nempty_) {
|
||||
octree2col_gpu<Dtype>(workspace, bottom_data, conv_in_channels_,
|
||||
workspace_h_, kernel_sdim_, stride_,
|
||||
octree_.neighbor_gpu(workspace_depth_), ni_gpu_ptr_,
|
||||
workspace_ha_, n);
|
||||
} else {
|
||||
octree2colP_gpu<Dtype>(workspace, bottom_data, conv_in_channels_,
|
||||
workspace_h_, octree_h_, kernel_sdim_, stride_,
|
||||
octree_.neighbor_gpu(workspace_depth_), ni_gpu_ptr_,
|
||||
child_, ichild_, workspace_ha_, n);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
void OctreeBaseConv<Dtype>::col2octree_gpu_wrapper(const Dtype* col_diff,
|
||||
Dtype* bottom_diff, int n) {
|
||||
if (!nempty_) {
|
||||
col2octree_gpu<Dtype>(col_diff, bottom_diff, conv_in_channels_,
|
||||
workspace_h_, kernel_sdim_, stride_,
|
||||
octree_.neighbor_gpu(workspace_depth_), ni_gpu_ptr_,
|
||||
workspace_ha_, n);
|
||||
} else {
|
||||
col2octreeP_gpu<Dtype>(col_diff, bottom_diff, conv_in_channels_,
|
||||
workspace_h_, octree_h_, kernel_sdim_, stride_,
|
||||
octree_.neighbor_gpu(workspace_depth_), ni_gpu_ptr_,
|
||||
child_, ichild_, workspace_ha_, n);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
void OctreeBaseConv<Dtype>::forward_gpu_gemm(Dtype* top_data,
|
||||
const Dtype* bottom_data, const Dtype* weights) {
|
||||
const Dtype* bottom_data,
|
||||
const Dtype* weights) {
|
||||
const Dtype* col_data = bottom_data;
|
||||
Dtype* result_data = workspace_n_ == 1 ? top_data : result_buffer_;
|
||||
for (int n = 0; n < workspace_n_; ++n) {
|
||||
if (!is_1x1_) {
|
||||
octree2col_gpu<Dtype>(workspace_,
|
||||
bottom_data, conv_in_channels_, workspace_h_, kernel_sdim_,
|
||||
stride_, octree_.neighbor_gpu(workspace_depth_),
|
||||
ni_gpu_ptr_, workspace_ha_, n);
|
||||
octree2col_gpu_wrapper(workspace_, bottom_data, n);
|
||||
col_data = workspace_;
|
||||
}
|
||||
|
||||
engine_gpu_->gemm(false, false, conv_out_channels_,
|
||||
workspace_ha_, kernel_dim_, Dtype(1.0), weights, col_data,
|
||||
Dtype(0), result_data);
|
||||
engine_gpu_->gemm(false, false, conv_out_channels_, workspace_ha_,
|
||||
kernel_dim_, Dtype(1.0), weights, col_data, Dtype(0),
|
||||
result_data);
|
||||
|
||||
if (workspace_n_ == 1) return;
|
||||
int num = std::min(workspace_ha_, workspace_h_ - n * workspace_ha_);
|
||||
for (int c = 0; c < conv_out_channels_; ++c) {
|
||||
memcpy_gpu(num, result_data + c * workspace_ha_,
|
||||
top_data + c * workspace_h_ + n * workspace_ha_);
|
||||
top_data + c * workspace_h_ + n * workspace_ha_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
void OctreeBaseConv<Dtype>::backward_gpu_gemm(Dtype* bottom_diff,
|
||||
const Dtype* top_diff, const Dtype* weights) {
|
||||
const Dtype* top_diff,
|
||||
const Dtype* weights) {
|
||||
Dtype* col_diff = is_1x1_ ? bottom_diff : workspace_;
|
||||
for (int n = 0; n < workspace_n_; ++n) {
|
||||
const Dtype* result_buffer = top_diff;
|
||||
|
@ -223,28 +309,25 @@ void OctreeBaseConv<Dtype>::backward_gpu_gemm(Dtype* bottom_diff,
|
|||
int num = std::min(workspace_ha_, workspace_h_ - n * workspace_ha_);
|
||||
for (int c = 0; c < conv_out_channels_; ++c) {
|
||||
memcpy_gpu(num, top_diff + c * workspace_h_ + n * workspace_ha_,
|
||||
buffer_ + c * workspace_ha_);
|
||||
buffer_ + c * workspace_ha_);
|
||||
}
|
||||
result_buffer = result_buffer_;
|
||||
}
|
||||
|
||||
engine_gpu_->gemm(true, false, kernel_dim_,
|
||||
workspace_ha_, conv_out_channels_, Dtype(1.0), weights,
|
||||
result_buffer, Dtype(0.0), col_diff);
|
||||
engine_gpu_->gemm(true, false, kernel_dim_, workspace_ha_,
|
||||
conv_out_channels_, Dtype(1.0), weights, result_buffer,
|
||||
Dtype(0.0), col_diff);
|
||||
|
||||
if (!is_1x1_) {
|
||||
col2octree_gpu<Dtype>(col_diff, bottom_diff,
|
||||
conv_in_channels_, workspace_h_, kernel_sdim_,
|
||||
stride_, octree_.neighbor_gpu(workspace_depth_),
|
||||
ni_gpu_ptr_, workspace_ha_, n);
|
||||
col2octree_gpu_wrapper(col_diff, bottom_diff, n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename Dtype>
|
||||
void OctreeBaseConv<Dtype>::weight_gpu_gemm(Dtype* weights_diff,
|
||||
const Dtype* bottom_data, const Dtype* top_diff) {
|
||||
const Dtype* bottom_data,
|
||||
const Dtype* top_diff) {
|
||||
int num = num_elements(weights_shape_);
|
||||
memset_gpu(num, Dtype(0), weights_diff);
|
||||
|
||||
|
@ -252,10 +335,7 @@ void OctreeBaseConv<Dtype>::weight_gpu_gemm(Dtype* weights_diff,
|
|||
const Dtype* result_buffer = top_diff;
|
||||
for (int n = 0; n < workspace_n_; ++n) {
|
||||
if (!is_1x1_) {
|
||||
octree2col_gpu<Dtype>(workspace_,
|
||||
bottom_data, conv_in_channels_, workspace_h_, kernel_sdim_,
|
||||
stride_, octree_.neighbor_gpu(workspace_depth_),
|
||||
ni_gpu_ptr_, workspace_ha_, n);
|
||||
octree2col_gpu_wrapper(workspace_, bottom_data, n);
|
||||
col_data = workspace_;
|
||||
}
|
||||
|
||||
|
@ -264,18 +344,18 @@ void OctreeBaseConv<Dtype>::weight_gpu_gemm(Dtype* weights_diff,
|
|||
Dtype* buffer = result_buffer_;
|
||||
for (int c = 0; c < conv_out_channels_; ++c) {
|
||||
memcpy_gpu(num, top_diff + c * workspace_h_ + n * workspace_ha_,
|
||||
buffer + c * workspace_ha_);
|
||||
buffer + c * workspace_ha_);
|
||||
}
|
||||
result_buffer = result_buffer_;
|
||||
}
|
||||
|
||||
engine_gpu_->gemm(false, true, conv_out_channels_,
|
||||
kernel_dim_, workspace_ha_, Dtype(1.0), result_buffer, col_data,
|
||||
Dtype(1.0), weights_diff);
|
||||
engine_gpu_->gemm(false, true, conv_out_channels_, kernel_dim_,
|
||||
workspace_ha_, Dtype(1.0), result_buffer, col_data,
|
||||
Dtype(1.0), weights_diff);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // USE_CUDA
|
||||
#endif // USE_CUDA
|
||||
|
||||
template class OctreeBaseConv<float>;
|
||||
template class OctreeBaseConv<double>;
|
||||
|
|
|
@ -13,11 +13,13 @@ template <typename Dtype>
|
|||
class OctreeBaseConv {
|
||||
public:
|
||||
explicit OctreeBaseConv(int max_size = 256 * 1024 * 1024)
|
||||
: MAX_SIZE(max_size), engine_cpu_(nullptr), engine_gpu_(nullptr) {}
|
||||
: MAX_SIZE(max_size), engine_cpu_(nullptr), engine_gpu_(nullptr),
|
||||
nempty_(false), child_(nullptr), ichild_(nullptr) {}
|
||||
void setup(const vector<int>& kernel_size, const int stride,
|
||||
const int curr_depth, const int channel_in, const int channel_out);
|
||||
// after setup() and before reshpae(),
|
||||
// please set engine_cpu/gpu_, octree_ and ni_gpu_ptr_
|
||||
const int curr_depth, const int channel_in, const int channel_out,
|
||||
const bool nempty = false);
|
||||
// !!! Please set engine_cpu/gpu_, octree_ and ni_gpu_ptr_
|
||||
// after calling setup() and before reshpae(),
|
||||
void reshape();
|
||||
|
||||
protected:
|
||||
|
@ -40,11 +42,16 @@ class OctreeBaseConv {
|
|||
void weight_gpu_gemm(Dtype* weights_diff, const Dtype* bottom_data,
|
||||
const Dtype* top_diff);
|
||||
|
||||
void octree2col_cpu_wrapper(Dtype* workspace, const Dtype* bottom_data, int n);
|
||||
void col2octree_cpu_wrapper(const Dtype* workspace, Dtype* bottom_data, int n);
|
||||
void octree2col_gpu_wrapper(Dtype* workspace, const Dtype* bottom_data, int n);
|
||||
void col2octree_gpu_wrapper(const Dtype* workspace, Dtype* bottom_data, int n);
|
||||
|
||||
protected:
|
||||
int stride_;
|
||||
vector<int> kernel_size_;
|
||||
int kernel_dim_;
|
||||
int kernel_sdim_; // spatial dim of the kernel
|
||||
int kernel_sdim_; // spatial dim of the kernel
|
||||
bool is_1x1_;
|
||||
|
||||
// input channel & output channel
|
||||
|
@ -59,19 +66,17 @@ class OctreeBaseConv {
|
|||
OctreeParser octree_;
|
||||
|
||||
int workspace_n_;
|
||||
int workspace_ha_; // actual worksapce h
|
||||
int workspace_h_; // ideal workspace h
|
||||
int workspace_depth_;
|
||||
int workspace_ha_; // actual worksapce h, the height of `col` data
|
||||
int workspace_h_; // ideal workspace h
|
||||
int workspace_depth_; // the depth value used for octree2col
|
||||
|
||||
vector<int> top_shape_;
|
||||
vector<int> weights_shape_;
|
||||
vector<int> workspace_shape_;
|
||||
vector<int> data_buffer_shape_;
|
||||
vector<int> result_buffer_shape_;
|
||||
|
||||
Dtype* workspace_;
|
||||
Dtype* data_buffer_;
|
||||
Dtype* result_buffer_;
|
||||
Dtype* result_buffer_; // hold the temporary result of octree2col
|
||||
|
||||
const int* ni_cpu_ptr_; // hold cpu data from NeighHelper::get_ni(kernel_size_)
|
||||
const int* ni_gpu_ptr_; // hold gpu data from NeighHelper::get_ni(kernel_size_)
|
||||
|
@ -80,6 +85,15 @@ class OctreeBaseConv {
|
|||
|
||||
GEMMEngine<Dtype>* engine_cpu_;
|
||||
GEMMEngine<Dtype>* engine_gpu_;
|
||||
|
||||
bool nempty_; // perform convolution on non-empty voxels
|
||||
|
||||
// used for octree2col and col2octree on non-empty voxels
|
||||
int octree_h_; // the height of octree data
|
||||
int child_h_;
|
||||
int ichild_h_;
|
||||
const int* child_;
|
||||
const int* ichild_;
|
||||
};
|
||||
|
||||
} // namespace octree
|
||||
|
|
|
@ -41,6 +41,9 @@ class OctreeInfo {
|
|||
int node_num(int d) const { return nnum_[d]; }
|
||||
int node_num_cum(int d) const { return nnum_cum_[d]; }
|
||||
int node_num_nempty(int d) const { return nnum_nempty_[d]; }
|
||||
const int* node_num_ptr() const { return nnum_; }
|
||||
const int* node_nempty_ptr() const { return nnum_nempty_; }
|
||||
const int* node_num_cum_ptr() const { return nnum_cum_; }
|
||||
int total_nnum() const { return nnum_cum_[depth_ + 1]; }
|
||||
int total_nnum_capacity() const { return nnum_cum_[depth_ + 2]; }
|
||||
int content_flags() const { return content_flags_; }
|
||||
|
|
|
@ -10,7 +10,7 @@ void NeighHelper::init_neigh_index() {
|
|||
{ "331", 6 }, { "313", 7 }, { "133", 8 } };
|
||||
|
||||
const vector<vector<int> > vec{ {} /* 333, 27 */, { 13 } /* 111, 1 */,
|
||||
{ 13, 14, 16, 17, 22, 23, 25, 26 } /* 222, 8 */,
|
||||
{ 13, 14, 16, 17, 22, 23, 25, 26 } /* 222, 8, 8 octants */,
|
||||
{ 4, 13, 22 } /* 311, 3 */,
|
||||
{ 10, 13, 16 } /* 131, 3 */,
|
||||
{ 12, 13, 14 } /* 113, 3 */,
|
||||
|
|
|
@ -825,6 +825,7 @@ template void memset_gpu<double>(const size_t N, const double alpha, double* Y);
|
|||
template void memset_gpu<char>(const size_t N, const char alpha, char* Y);
|
||||
template void memset_gpu<int8_t>(const size_t N, const int8_t alpha, int8_t* Y);
|
||||
template void memset_gpu<uint8_t>(const size_t N, const uint8_t alpha, uint8_t* Y);
|
||||
template void memcpy_gpu<char>(const size_t N, const char* X, char* Y);
|
||||
template void memcpy_gpu<int>(const size_t N, const int* X, int* Y);
|
||||
template void memcpy_gpu<int64_t>(const size_t N, const int64_t* X, int64_t* Y);
|
||||
template void memcpy_gpu<int16_t>(const size_t N, const int16_t* X, int16_t* Y);
|
||||
|
@ -844,6 +845,8 @@ template void pad_backward_gpu<double>(double* X, const int Hx, const int Cx,
|
|||
const double* Y, const int Hy, const int* label);
|
||||
template void pad_backward_gpu<int>(int* X, const int Hx, const int Cx,
|
||||
const int* Y, const int Hy, const int* label);
|
||||
template void pad_backward_gpu<uintk>(uintk* X, const int Hx, const int Cx,
|
||||
const uintk* Y, const int Hy, const int* label);
|
||||
template void octree2col_gpu<float>(float* data_col, const float* data_octree,
|
||||
const int channel, const int height, const int kernel_sdim, const int stride,
|
||||
const int* neigh, const int* ni, const int height_col, const int n);
|
||||
|
|
|
@ -150,7 +150,7 @@ void PointsParser::transform(const float* mat) {
|
|||
}
|
||||
}
|
||||
|
||||
void PointsParser::clip(const float* bbmin, const float* bbmax) {
|
||||
vector<int> PointsParser::clip(const float* bbmin, const float* bbmax) {
|
||||
int npt = info_->pt_num(), npt_in_bbox = 0;
|
||||
float* pts = mutable_points();
|
||||
vector<int> in_bbox(npt, 0);
|
||||
|
@ -162,7 +162,10 @@ void PointsParser::clip(const float* bbmin, const float* bbmax) {
|
|||
npt_in_bbox += in_bbox[i];
|
||||
}
|
||||
|
||||
if (npt_in_bbox == npt) return; // early stop
|
||||
if (npt_in_bbox == npt) { // early stop
|
||||
return in_bbox;
|
||||
}
|
||||
|
||||
if (npt_in_bbox == 0) { // no points
|
||||
// just keep one point to avoid the degenerated case
|
||||
npt_in_bbox = 1;
|
||||
|
@ -171,7 +174,7 @@ void PointsParser::clip(const float* bbmin, const float* bbmax) {
|
|||
for (int i = 0; i < 3; ++i) { p[i] = bbmin[i]; }
|
||||
}
|
||||
|
||||
// Just discard the points which are out of the bbox
|
||||
// discard the points which are out of the bbox
|
||||
for (int t = 0; t < PointsInfo::kPTypeNum; ++t) {
|
||||
auto ptype = static_cast<PointsInfo::PropType>(1 << t);
|
||||
int channel = info_->channel(ptype);
|
||||
|
@ -188,6 +191,7 @@ void PointsParser::clip(const float* bbmin, const float* bbmax) {
|
|||
}
|
||||
|
||||
info_->set_pt_num(npt_in_bbox);
|
||||
return in_bbox;
|
||||
}
|
||||
|
||||
void PointsParser::add_noise(const float std_pt, const float std_nm) {
|
||||
|
|
|
@ -39,7 +39,7 @@ class PointsParser {
|
|||
void rotate(const float angle, const float* axis); // angle in radian
|
||||
void rotate(const float* angles);
|
||||
void transform(const float* trans_matrix);
|
||||
void clip(const float* bbmin, const float* bbmax);
|
||||
vector<int> clip(const float* bbmin, const float* bbmax);
|
||||
void add_noise(const float std_pt, const float std_nm);
|
||||
void normalize(); // translate and scale the points to unit sphere
|
||||
void orient_normal(const string axis);
|
||||
|
|
Загрузка…
Ссылка в новой задаче