This commit is contained in:
wang-ps 2021-08-24 09:44:19 +08:00
Родитель e2a39b11ef
Коммит f74c9539a5
8 изменённых файлов: 211 добавлений и 106 удалений

Просмотреть файл

@ -1244,7 +1244,8 @@ void Octree::octree2mesh(vector<float>& V, vector<int>& F, int depth_start,
vector<float> pts, normals, pts_ref;
for (int i = 0; i < num; ++i) {
if (node_type(child_d[i]) == kInternelNode && d != depth) continue;
if ((node_type(child_d[i]) == kInternelNode && d != depth) ||
(node_type(child_d[i]) == kLeaf && d == depth)) continue;
float n[3], pt[3], pt_ref[3];
node_normal(n, i, d);

Просмотреть файл

@ -3,11 +3,14 @@
#include "logs.h"
#include <algorithm>
namespace octree {
template <typename Dtype>
void OctreeBaseConv<Dtype>::setup(const vector<int>& kernel_size, const int stride,
const int curr_depth, const int channel_in, const int channel_out) {
void OctreeBaseConv<Dtype>::setup(const vector<int>& kernel_size,
const int stride, const int curr_depth,
const int channel_in, const int channel_out,
const bool nempty) {
// kernel size
kernel_size_ = kernel_size;
CHECK(kernel_size_[0] < 4 && kernel_size_[1] < 4 && kernel_size_[2] < 4)
@ -31,98 +34,155 @@ void OctreeBaseConv<Dtype>::setup(const vector<int>& kernel_size, const int stri
std::swap(conv_out_channels_, conv_in_channels_);
}
// !!! perform the convolution on non-empty octree nodes or not
nempty_ = nempty;
kernel_sdim_ = kernel_size_[0] * kernel_size_[1] * kernel_size_[2];
kernel_dim_ = kernel_sdim_ * conv_in_channels_;
ni_cpu_ptr_ = NeighHelper::get_ni(kernel_size_).data();
ni_gpu_ptr_ = nullptr; // must be set before using
ni_gpu_ptr_ = nullptr; // must be set before using
}
template <typename Dtype>
void OctreeBaseConv<Dtype>::reshape() {
// weight shape
weights_shape_ = vector<int> {conv_out_channels_, conv_in_channels_ * kernel_sdim_};
// compute top shape
int btm_h = octree_.info().node_num(curr_depth_);
int top_blob_depth = curr_depth_, top_h = btm_h;
// assign depth for different blobs
// curr_depth_ and top_depth are the octree depth of the input and output
// data; workspace_depth_ is the octree depth of the `col` data, different
// from top_depth, workspace_depth_ is always the same as the depth of larger
// data when doing octree2col or col2octree
int top_depth = workspace_depth_ = curr_depth_;
if (stride_ == 2) {
if (is_deconvolution_layer()) {
top_blob_depth++;
top_h = octree_.info().node_num(top_blob_depth);
top_depth = workspace_depth_ = curr_depth_ + 1;
} else {
top_blob_depth--;
top_h = octree_.info().node_num_nempty(top_blob_depth);
top_depth = curr_depth_ - 1;
}
CHECK(0 <= top_blob_depth && top_blob_depth <= octree_.info().depth());
CHECK(0 <= top_depth && top_depth <= octree_.info().depth());
}
if (top_h == 0) top_h = 1; // avoid degenerated case
top_shape_ = vector<int> { 1, num_output_, top_h, 1 };
// reshape workspce
workspace_depth_ = curr_depth_; // the depth value used for octree2col
if (is_deconvolution_layer() && stride_ == 2) workspace_depth_++;
workspace_h_ = btm_h;
if (stride_ == 2) {
if (is_deconvolution_layer()) { workspace_h_ = top_h >> 3; }
else { workspace_h_ = btm_h >> 3; }
// weight shape
weights_shape_ =
vector<int>{conv_out_channels_, conv_in_channels_ * kernel_sdim_};
// top shape
int top_h = 0;
if (!nempty_) {
top_h = octree_.info().node_num(top_depth);
if (stride_ == 2 && !is_deconvolution_layer()) {
// In this case, the octree_pad is needed to pad the output data,
// so the top_h is equal to the non-empty node number.
top_h = octree_.info().node_num_nempty(top_depth);
}
} else {
top_h = octree_.info().node_num_nempty(top_depth);
}
if (top_h == 0) top_h = 1; // avoid degenerated case
top_shape_ = vector<int>{1, num_output_, top_h, 1};
// workspce shape
workspace_h_ = top_h; // equals to the output height if stride is 1
if (stride_ == 2) {
if (is_deconvolution_layer()) {
workspace_h_ = octree_.info().node_num(top_depth) / 8;
} else {
workspace_h_ = octree_.info().node_num(curr_depth_) / 8;
}
}
// child_h_, ichild_h_, octree_h_ are used for octree2col/col2octree
// only if nempty_ is True.
if (nempty_) {
child_h_ = octree_.info().node_num(workspace_depth_);
ichild_h_ = octree_.info().node_num_nempty(workspace_depth_);
// octree_h is the height of octree data for octree2col/col2octree
octree_h_ = octree_.info().node_num_nempty(curr_depth_);
if (stride_ == 2 && is_deconvolution_layer()) {
octree_h_ = octree_.info().node_num_nempty(top_depth);
}
}
// workspace number and workspace actual shape
workspace_n_ = 1;
workspace_ha_ = workspace_h_;
uint64 ideal_size = (uint64) workspace_h_ * (uint64) kernel_dim_;
uint64 ideal_size = (uint64)workspace_h_ * (uint64)kernel_dim_;
if (ideal_size > MAX_SIZE && !is_1x1_) {
workspace_n_ = (ideal_size + MAX_SIZE - 1) / MAX_SIZE;
workspace_ha_ = (workspace_h_ + workspace_n_ - 1) / workspace_n_;
}
workspace_shape_ = vector<int> { kernel_dim_, workspace_ha_};
workspace_shape_ = vector<int>{kernel_dim_, workspace_ha_};
// reshape result_buffer_
// result_buffer_ shape
if (workspace_n_ > 1) {
result_buffer_shape_ = vector<int> { conv_out_channels_, workspace_ha_ };
result_buffer_shape_ = vector<int>{conv_out_channels_, workspace_ha_};
} else {
result_buffer_shape_.clear();
}
}
// reshape data_buffer_
if (stride_ == 2) {
data_buffer_shape_ = vector<int> { 1, conv_out_channels_, workspace_h_, 1 };
template <typename Dtype>
void OctreeBaseConv<Dtype>::octree2col_cpu_wrapper(Dtype* workspace,
const Dtype* bottom_data,
const int n) {
if (!nempty_) {
octree2col_cpu<Dtype>(workspace, bottom_data, conv_in_channels_,
workspace_h_, kernel_sdim_, stride_,
octree_.neighbor_cpu(workspace_depth_), ni_cpu_ptr_,
workspace_ha_, n);
} else {
data_buffer_shape_.clear();
octree2colP_cpu<Dtype>(workspace, bottom_data, conv_in_channels_,
workspace_h_, octree_h_, kernel_sdim_, stride_,
octree_.neighbor_gpu(workspace_depth_), ni_cpu_ptr_,
child_, ichild_, workspace_ha_, n);
}
}
template <typename Dtype>
void OctreeBaseConv<Dtype>::col2octree_cpu_wrapper(const Dtype* col_diff,
Dtype* bottom_diff, int n) {
if (!nempty_) {
col2octree_cpu<Dtype>(col_diff, bottom_diff, conv_in_channels_,
workspace_h_, kernel_sdim_, stride_,
octree_.neighbor_cpu(workspace_depth_), ni_cpu_ptr_,
workspace_ha_, n);
} else {
col2octreeP_cpu<Dtype>(col_diff, bottom_diff, conv_in_channels_,
workspace_h_, octree_h_, kernel_sdim_, stride_,
octree_.neighbor_gpu(workspace_depth_), ni_cpu_ptr_,
child_, ichild_, workspace_ha_, n);
}
}
template <typename Dtype>
void OctreeBaseConv<Dtype>::forward_cpu_gemm(Dtype* top_data,
const Dtype* bottom_data, const Dtype* weights) {
const Dtype* bottom_data,
const Dtype* weights) {
const Dtype* col_data = bottom_data;
Dtype* result_data = workspace_n_ == 1 ? top_data : result_buffer_;
for (int n = 0; n < workspace_n_; ++n) {
if (!is_1x1_) {
octree2col_cpu<Dtype>(workspace_,
bottom_data, conv_in_channels_, workspace_h_, kernel_sdim_,
stride_, octree_.neighbor_cpu(workspace_depth_),
ni_cpu_ptr_, workspace_ha_, n);
octree2col_cpu_wrapper(workspace_, bottom_data, n);
col_data = workspace_;
}
engine_cpu_->gemm(false, false, conv_out_channels_,
workspace_ha_, kernel_dim_, Dtype(1.0), weights, col_data,
Dtype(0), result_data);
engine_cpu_->gemm(false, false, conv_out_channels_, workspace_ha_,
kernel_dim_, Dtype(1.0), weights, col_data, Dtype(0),
result_data);
if (workspace_n_ == 1) return;
int num = std::min(workspace_ha_, workspace_h_ - n * workspace_ha_);
for (int c = 0; c < conv_out_channels_; ++c) {
memcpy_cpu(num, result_data + c * workspace_ha_,
top_data + c * workspace_h_ + n * workspace_ha_);
top_data + c * workspace_h_ + n * workspace_ha_);
}
}
}
template <typename Dtype>
void OctreeBaseConv<Dtype>::backward_cpu_gemm(Dtype* bottom_diff,
const Dtype* top_diff, const Dtype* weights) {
const Dtype* top_diff,
const Dtype* weights) {
Dtype* col_diff = is_1x1_ ? bottom_diff : workspace_;
for (int n = 0; n < workspace_n_; ++n) {
const Dtype* result_buffer = top_diff;
@ -131,28 +191,25 @@ void OctreeBaseConv<Dtype>::backward_cpu_gemm(Dtype* bottom_diff,
int num = std::min(workspace_ha_, workspace_h_ - n * workspace_ha_);
for (int c = 0; c < conv_out_channels_; ++c) {
memcpy_cpu(num, top_diff + c * workspace_h_ + n * workspace_ha_,
buffer_ + c * workspace_ha_);
buffer_ + c * workspace_ha_);
}
result_buffer = result_buffer_;
}
engine_cpu_->gemm(true, false, kernel_dim_,
workspace_ha_, conv_out_channels_, Dtype(1.0), weights,
result_buffer, Dtype(0.0), col_diff);
engine_cpu_->gemm(true, false, kernel_dim_, workspace_ha_,
conv_out_channels_, Dtype(1.0), weights, result_buffer,
Dtype(0.0), col_diff);
if (!is_1x1_) {
col2octree_cpu<Dtype>(col_diff, bottom_diff,
conv_in_channels_, workspace_h_, kernel_sdim_,
stride_, octree_.neighbor_cpu(workspace_depth_),
ni_cpu_ptr_, workspace_ha_, n);
col2octree_cpu_wrapper(col_diff, bottom_diff, n);
}
}
}
template <typename Dtype>
void OctreeBaseConv<Dtype>::weight_cpu_gemm(Dtype* weights_diff,
const Dtype* bottom_data, const Dtype* top_diff) {
const Dtype* bottom_data,
const Dtype* top_diff) {
int num = num_elements(weights_shape_);
memset_cpu(num, Dtype(0), weights_diff);
@ -160,10 +217,7 @@ void OctreeBaseConv<Dtype>::weight_cpu_gemm(Dtype* weights_diff,
const Dtype* result_buffer = top_diff;
for (int n = 0; n < workspace_n_; ++n) {
if (!is_1x1_) {
octree2col_cpu<Dtype>(workspace_,
bottom_data, conv_in_channels_, workspace_h_, kernel_sdim_,
stride_, octree_.neighbor_cpu(workspace_depth_),
ni_cpu_ptr_, workspace_ha_, n);
octree2col_cpu_wrapper(workspace_, bottom_data, n);
col_data = workspace_;
}
@ -172,49 +226,81 @@ void OctreeBaseConv<Dtype>::weight_cpu_gemm(Dtype* weights_diff,
Dtype* buffer = result_buffer_;
for (int c = 0; c < conv_out_channels_; ++c) {
memcpy_cpu(num, top_diff + c * workspace_h_ + n * workspace_ha_,
buffer + c * workspace_ha_);
buffer + c * workspace_ha_);
}
result_buffer = result_buffer_;
}
engine_cpu_->gemm(false, true, conv_out_channels_,
kernel_dim_, workspace_ha_, Dtype(1.0), result_buffer, col_data,
Dtype(1.0), weights_diff);
engine_cpu_->gemm(false, true, conv_out_channels_, kernel_dim_,
workspace_ha_, Dtype(1.0), result_buffer, col_data,
Dtype(1.0), weights_diff);
}
}
#ifdef USE_CUDA
template <typename Dtype>
void OctreeBaseConv<Dtype>::octree2col_gpu_wrapper(Dtype* workspace,
const Dtype* bottom_data,
const int n) {
if (!nempty_) {
octree2col_gpu<Dtype>(workspace, bottom_data, conv_in_channels_,
workspace_h_, kernel_sdim_, stride_,
octree_.neighbor_gpu(workspace_depth_), ni_gpu_ptr_,
workspace_ha_, n);
} else {
octree2colP_gpu<Dtype>(workspace, bottom_data, conv_in_channels_,
workspace_h_, octree_h_, kernel_sdim_, stride_,
octree_.neighbor_gpu(workspace_depth_), ni_gpu_ptr_,
child_, ichild_, workspace_ha_, n);
}
}
template <typename Dtype>
void OctreeBaseConv<Dtype>::col2octree_gpu_wrapper(const Dtype* col_diff,
Dtype* bottom_diff, int n) {
if (!nempty_) {
col2octree_gpu<Dtype>(col_diff, bottom_diff, conv_in_channels_,
workspace_h_, kernel_sdim_, stride_,
octree_.neighbor_gpu(workspace_depth_), ni_gpu_ptr_,
workspace_ha_, n);
} else {
col2octreeP_gpu<Dtype>(col_diff, bottom_diff, conv_in_channels_,
workspace_h_, octree_h_, kernel_sdim_, stride_,
octree_.neighbor_gpu(workspace_depth_), ni_gpu_ptr_,
child_, ichild_, workspace_ha_, n);
}
}
template <typename Dtype>
void OctreeBaseConv<Dtype>::forward_gpu_gemm(Dtype* top_data,
const Dtype* bottom_data, const Dtype* weights) {
const Dtype* bottom_data,
const Dtype* weights) {
const Dtype* col_data = bottom_data;
Dtype* result_data = workspace_n_ == 1 ? top_data : result_buffer_;
for (int n = 0; n < workspace_n_; ++n) {
if (!is_1x1_) {
octree2col_gpu<Dtype>(workspace_,
bottom_data, conv_in_channels_, workspace_h_, kernel_sdim_,
stride_, octree_.neighbor_gpu(workspace_depth_),
ni_gpu_ptr_, workspace_ha_, n);
octree2col_gpu_wrapper(workspace_, bottom_data, n);
col_data = workspace_;
}
engine_gpu_->gemm(false, false, conv_out_channels_,
workspace_ha_, kernel_dim_, Dtype(1.0), weights, col_data,
Dtype(0), result_data);
engine_gpu_->gemm(false, false, conv_out_channels_, workspace_ha_,
kernel_dim_, Dtype(1.0), weights, col_data, Dtype(0),
result_data);
if (workspace_n_ == 1) return;
int num = std::min(workspace_ha_, workspace_h_ - n * workspace_ha_);
for (int c = 0; c < conv_out_channels_; ++c) {
memcpy_gpu(num, result_data + c * workspace_ha_,
top_data + c * workspace_h_ + n * workspace_ha_);
top_data + c * workspace_h_ + n * workspace_ha_);
}
}
}
template <typename Dtype>
void OctreeBaseConv<Dtype>::backward_gpu_gemm(Dtype* bottom_diff,
const Dtype* top_diff, const Dtype* weights) {
const Dtype* top_diff,
const Dtype* weights) {
Dtype* col_diff = is_1x1_ ? bottom_diff : workspace_;
for (int n = 0; n < workspace_n_; ++n) {
const Dtype* result_buffer = top_diff;
@ -223,28 +309,25 @@ void OctreeBaseConv<Dtype>::backward_gpu_gemm(Dtype* bottom_diff,
int num = std::min(workspace_ha_, workspace_h_ - n * workspace_ha_);
for (int c = 0; c < conv_out_channels_; ++c) {
memcpy_gpu(num, top_diff + c * workspace_h_ + n * workspace_ha_,
buffer_ + c * workspace_ha_);
buffer_ + c * workspace_ha_);
}
result_buffer = result_buffer_;
}
engine_gpu_->gemm(true, false, kernel_dim_,
workspace_ha_, conv_out_channels_, Dtype(1.0), weights,
result_buffer, Dtype(0.0), col_diff);
engine_gpu_->gemm(true, false, kernel_dim_, workspace_ha_,
conv_out_channels_, Dtype(1.0), weights, result_buffer,
Dtype(0.0), col_diff);
if (!is_1x1_) {
col2octree_gpu<Dtype>(col_diff, bottom_diff,
conv_in_channels_, workspace_h_, kernel_sdim_,
stride_, octree_.neighbor_gpu(workspace_depth_),
ni_gpu_ptr_, workspace_ha_, n);
col2octree_gpu_wrapper(col_diff, bottom_diff, n);
}
}
}
template <typename Dtype>
void OctreeBaseConv<Dtype>::weight_gpu_gemm(Dtype* weights_diff,
const Dtype* bottom_data, const Dtype* top_diff) {
const Dtype* bottom_data,
const Dtype* top_diff) {
int num = num_elements(weights_shape_);
memset_gpu(num, Dtype(0), weights_diff);
@ -252,10 +335,7 @@ void OctreeBaseConv<Dtype>::weight_gpu_gemm(Dtype* weights_diff,
const Dtype* result_buffer = top_diff;
for (int n = 0; n < workspace_n_; ++n) {
if (!is_1x1_) {
octree2col_gpu<Dtype>(workspace_,
bottom_data, conv_in_channels_, workspace_h_, kernel_sdim_,
stride_, octree_.neighbor_gpu(workspace_depth_),
ni_gpu_ptr_, workspace_ha_, n);
octree2col_gpu_wrapper(workspace_, bottom_data, n);
col_data = workspace_;
}
@ -264,18 +344,18 @@ void OctreeBaseConv<Dtype>::weight_gpu_gemm(Dtype* weights_diff,
Dtype* buffer = result_buffer_;
for (int c = 0; c < conv_out_channels_; ++c) {
memcpy_gpu(num, top_diff + c * workspace_h_ + n * workspace_ha_,
buffer + c * workspace_ha_);
buffer + c * workspace_ha_);
}
result_buffer = result_buffer_;
}
engine_gpu_->gemm(false, true, conv_out_channels_,
kernel_dim_, workspace_ha_, Dtype(1.0), result_buffer, col_data,
Dtype(1.0), weights_diff);
engine_gpu_->gemm(false, true, conv_out_channels_, kernel_dim_,
workspace_ha_, Dtype(1.0), result_buffer, col_data,
Dtype(1.0), weights_diff);
}
}
#endif // USE_CUDA
#endif // USE_CUDA
template class OctreeBaseConv<float>;
template class OctreeBaseConv<double>;

Просмотреть файл

@ -13,11 +13,13 @@ template <typename Dtype>
class OctreeBaseConv {
public:
explicit OctreeBaseConv(int max_size = 256 * 1024 * 1024)
: MAX_SIZE(max_size), engine_cpu_(nullptr), engine_gpu_(nullptr) {}
: MAX_SIZE(max_size), engine_cpu_(nullptr), engine_gpu_(nullptr),
nempty_(false), child_(nullptr), ichild_(nullptr) {}
void setup(const vector<int>& kernel_size, const int stride,
const int curr_depth, const int channel_in, const int channel_out);
// after setup() and before reshpae(),
// please set engine_cpu/gpu_, octree_ and ni_gpu_ptr_
const int curr_depth, const int channel_in, const int channel_out,
const bool nempty = false);
// !!! Please set engine_cpu/gpu_, octree_ and ni_gpu_ptr_
// after calling setup() and before reshpae(),
void reshape();
protected:
@ -40,11 +42,16 @@ class OctreeBaseConv {
void weight_gpu_gemm(Dtype* weights_diff, const Dtype* bottom_data,
const Dtype* top_diff);
void octree2col_cpu_wrapper(Dtype* workspace, const Dtype* bottom_data, int n);
void col2octree_cpu_wrapper(const Dtype* workspace, Dtype* bottom_data, int n);
void octree2col_gpu_wrapper(Dtype* workspace, const Dtype* bottom_data, int n);
void col2octree_gpu_wrapper(const Dtype* workspace, Dtype* bottom_data, int n);
protected:
int stride_;
vector<int> kernel_size_;
int kernel_dim_;
int kernel_sdim_; // spatial dim of the kernel
int kernel_sdim_; // spatial dim of the kernel
bool is_1x1_;
// input channel & output channel
@ -59,19 +66,17 @@ class OctreeBaseConv {
OctreeParser octree_;
int workspace_n_;
int workspace_ha_; // actual worksapce h
int workspace_h_; // ideal workspace h
int workspace_depth_;
int workspace_ha_; // actual worksapce h, the height of `col` data
int workspace_h_; // ideal workspace h
int workspace_depth_; // the depth value used for octree2col
vector<int> top_shape_;
vector<int> weights_shape_;
vector<int> workspace_shape_;
vector<int> data_buffer_shape_;
vector<int> result_buffer_shape_;
Dtype* workspace_;
Dtype* data_buffer_;
Dtype* result_buffer_;
Dtype* result_buffer_; // hold the temporary result of octree2col
const int* ni_cpu_ptr_; // hold cpu data from NeighHelper::get_ni(kernel_size_)
const int* ni_gpu_ptr_; // hold gpu data from NeighHelper::get_ni(kernel_size_)
@ -80,6 +85,15 @@ class OctreeBaseConv {
GEMMEngine<Dtype>* engine_cpu_;
GEMMEngine<Dtype>* engine_gpu_;
bool nempty_; // perform convolution on non-empty voxels
// used for octree2col and col2octree on non-empty voxels
int octree_h_; // the height of octree data
int child_h_;
int ichild_h_;
const int* child_;
const int* ichild_;
};
} // namespace octree

Просмотреть файл

@ -41,6 +41,9 @@ class OctreeInfo {
int node_num(int d) const { return nnum_[d]; }
int node_num_cum(int d) const { return nnum_cum_[d]; }
int node_num_nempty(int d) const { return nnum_nempty_[d]; }
const int* node_num_ptr() const { return nnum_; }
const int* node_nempty_ptr() const { return nnum_nempty_; }
const int* node_num_cum_ptr() const { return nnum_cum_; }
int total_nnum() const { return nnum_cum_[depth_ + 1]; }
int total_nnum_capacity() const { return nnum_cum_[depth_ + 2]; }
int content_flags() const { return content_flags_; }

Просмотреть файл

@ -10,7 +10,7 @@ void NeighHelper::init_neigh_index() {
{ "331", 6 }, { "313", 7 }, { "133", 8 } };
const vector<vector<int> > vec{ {} /* 333, 27 */, { 13 } /* 111, 1 */,
{ 13, 14, 16, 17, 22, 23, 25, 26 } /* 222, 8 */,
{ 13, 14, 16, 17, 22, 23, 25, 26 } /* 222, 8, 8 octants */,
{ 4, 13, 22 } /* 311, 3 */,
{ 10, 13, 16 } /* 131, 3 */,
{ 12, 13, 14 } /* 113, 3 */,

Просмотреть файл

@ -825,6 +825,7 @@ template void memset_gpu<double>(const size_t N, const double alpha, double* Y);
template void memset_gpu<char>(const size_t N, const char alpha, char* Y);
template void memset_gpu<int8_t>(const size_t N, const int8_t alpha, int8_t* Y);
template void memset_gpu<uint8_t>(const size_t N, const uint8_t alpha, uint8_t* Y);
template void memcpy_gpu<char>(const size_t N, const char* X, char* Y);
template void memcpy_gpu<int>(const size_t N, const int* X, int* Y);
template void memcpy_gpu<int64_t>(const size_t N, const int64_t* X, int64_t* Y);
template void memcpy_gpu<int16_t>(const size_t N, const int16_t* X, int16_t* Y);
@ -844,6 +845,8 @@ template void pad_backward_gpu<double>(double* X, const int Hx, const int Cx,
const double* Y, const int Hy, const int* label);
template void pad_backward_gpu<int>(int* X, const int Hx, const int Cx,
const int* Y, const int Hy, const int* label);
template void pad_backward_gpu<uintk>(uintk* X, const int Hx, const int Cx,
const uintk* Y, const int Hy, const int* label);
template void octree2col_gpu<float>(float* data_col, const float* data_octree,
const int channel, const int height, const int kernel_sdim, const int stride,
const int* neigh, const int* ni, const int height_col, const int n);

Просмотреть файл

@ -150,7 +150,7 @@ void PointsParser::transform(const float* mat) {
}
}
void PointsParser::clip(const float* bbmin, const float* bbmax) {
vector<int> PointsParser::clip(const float* bbmin, const float* bbmax) {
int npt = info_->pt_num(), npt_in_bbox = 0;
float* pts = mutable_points();
vector<int> in_bbox(npt, 0);
@ -162,7 +162,10 @@ void PointsParser::clip(const float* bbmin, const float* bbmax) {
npt_in_bbox += in_bbox[i];
}
if (npt_in_bbox == npt) return; // early stop
if (npt_in_bbox == npt) { // early stop
return in_bbox;
}
if (npt_in_bbox == 0) { // no points
// just keep one point to avoid the degenerated case
npt_in_bbox = 1;
@ -171,7 +174,7 @@ void PointsParser::clip(const float* bbmin, const float* bbmax) {
for (int i = 0; i < 3; ++i) { p[i] = bbmin[i]; }
}
// Just discard the points which are out of the bbox
// discard the points which are out of the bbox
for (int t = 0; t < PointsInfo::kPTypeNum; ++t) {
auto ptype = static_cast<PointsInfo::PropType>(1 << t);
int channel = info_->channel(ptype);
@ -188,6 +191,7 @@ void PointsParser::clip(const float* bbmin, const float* bbmax) {
}
info_->set_pt_num(npt_in_bbox);
return in_bbox;
}
void PointsParser::add_noise(const float std_pt, const float std_nm) {

Просмотреть файл

@ -39,7 +39,7 @@ class PointsParser {
void rotate(const float angle, const float* axis); // angle in radian
void rotate(const float* angles);
void transform(const float* trans_matrix);
void clip(const float* bbmin, const float* bbmax);
vector<int> clip(const float* bbmin, const float* bbmax);
void add_noise(const float std_pt, const float std_nm);
void normalize(); // translate and scale the points to unit sphere
void orient_normal(const string axis);