Convert palette from double to float.
About 20% less time spent coding in vp10_k_means(). Change-Id: I5cf7605cde869a269776197bace70de353b07d83
This commit is contained in:
Родитель
d1327aec1b
Коммит
bb0e692151
|
@ -64,7 +64,7 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
uint8_t best_palette_color_map[MAX_SB_SQUARE];
|
||||
double kmeans_data_buf[2 * MAX_SB_SQUARE];
|
||||
float kmeans_data_buf[2 * MAX_SB_SQUARE];
|
||||
uint8_t kmeans_indices_buf[MAX_SB_SQUARE];
|
||||
uint8_t kmeans_pre_indices_buf[MAX_SB_SQUARE];
|
||||
} PALETTE_BUFFER;
|
||||
|
|
|
@ -11,20 +11,21 @@
|
|||
#include <math.h>
|
||||
#include "vp10/encoder/palette.h"
|
||||
|
||||
static double calc_dist(const double *p1, const double *p2, int dim) {
|
||||
double dist = 0;
|
||||
static float calc_dist(const float *p1, const float *p2, int dim) {
|
||||
float dist = 0;
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i < dim; ++i) {
|
||||
dist = dist + (p1[i] - round(p2[i])) * (p1[i] - round(p2[i]));
|
||||
float diff = p1[i] - roundf(p2[i]);
|
||||
dist += diff * diff;
|
||||
}
|
||||
return dist;
|
||||
}
|
||||
|
||||
void vp10_calc_indices(const double *data, const double *centroids,
|
||||
void vp10_calc_indices(const float *data, const float *centroids,
|
||||
uint8_t *indices, int n, int k, int dim) {
|
||||
int i, j;
|
||||
double min_dist, this_dist;
|
||||
float min_dist, this_dist;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
min_dist = calc_dist(data + i * dim, centroids, dim);
|
||||
|
@ -45,7 +46,7 @@ static unsigned int lcg_rand16(unsigned int *state) {
|
|||
return *state / 65536 % 32768;
|
||||
}
|
||||
|
||||
static void calc_centroids(const double *data, double *centroids,
|
||||
static void calc_centroids(const float *data, float *centroids,
|
||||
const uint8_t *indices, int n, int k, int dim) {
|
||||
int i, j, index;
|
||||
int count[PALETTE_MAX_SIZE];
|
||||
|
@ -70,16 +71,16 @@ static void calc_centroids(const double *data, double *centroids,
|
|||
memcpy(centroids + i * dim, data + (lcg_rand16(&rand_state) % n) * dim,
|
||||
sizeof(centroids[0]) * dim);
|
||||
} else {
|
||||
const double norm = 1.0 / count[i];
|
||||
const float norm = 1.0f / count[i];
|
||||
for (j = 0; j < dim; ++j)
|
||||
centroids[i * dim + j] *= norm;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static double calc_total_dist(const double *data, const double *centroids,
|
||||
static float calc_total_dist(const float *data, const float *centroids,
|
||||
const uint8_t *indices, int n, int k, int dim) {
|
||||
double dist = 0;
|
||||
float dist = 0;
|
||||
int i;
|
||||
(void) k;
|
||||
|
||||
|
@ -89,11 +90,11 @@ static double calc_total_dist(const double *data, const double *centroids,
|
|||
return dist;
|
||||
}
|
||||
|
||||
int vp10_k_means(const double *data, double *centroids, uint8_t *indices,
|
||||
int vp10_k_means(const float *data, float *centroids, uint8_t *indices,
|
||||
uint8_t *pre_indices, int n, int k, int dim, int max_itr) {
|
||||
int i = 0;
|
||||
double pre_dist, this_dist;
|
||||
double pre_centroids[2 * PALETTE_MAX_SIZE];
|
||||
float pre_dist, this_dist;
|
||||
float pre_centroids[2 * PALETTE_MAX_SIZE];
|
||||
|
||||
vp10_calc_indices(data, centroids, indices, n, k, dim);
|
||||
pre_dist = calc_total_dist(data, centroids, indices, n, k, dim);
|
||||
|
@ -121,9 +122,9 @@ int vp10_k_means(const double *data, double *centroids, uint8_t *indices,
|
|||
return i;
|
||||
}
|
||||
|
||||
void vp10_insertion_sort(double *data, int n) {
|
||||
void vp10_insertion_sort(float *data, int n) {
|
||||
int i, j, k;
|
||||
double val;
|
||||
float val;
|
||||
|
||||
if (n <= 1)
|
||||
return;
|
||||
|
|
|
@ -17,10 +17,10 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp10_insertion_sort(double *data, int n);
|
||||
void vp10_calc_indices(const double *data, const double *centroids,
|
||||
void vp10_insertion_sort(float *data, int n);
|
||||
void vp10_calc_indices(const float *data, const float *centroids,
|
||||
uint8_t *indices, int n, int k, int dim);
|
||||
int vp10_k_means(const double *data, double *centroids, uint8_t *indices,
|
||||
int vp10_k_means(const float *data, float *centroids, uint8_t *indices,
|
||||
uint8_t *pre_indices, int n, int k, int dim, int max_itr);
|
||||
int vp10_count_colors(const uint8_t *src, int stride, int rows, int cols);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
|
|
|
@ -1774,12 +1774,12 @@ static int rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x,
|
|||
const int max_itr = 50;
|
||||
int color_ctx, color_idx = 0;
|
||||
int color_order[PALETTE_MAX_SIZE];
|
||||
double *const data = x->palette_buffer->kmeans_data_buf;
|
||||
float *const data = x->palette_buffer->kmeans_data_buf;
|
||||
uint8_t *const indices = x->palette_buffer->kmeans_indices_buf;
|
||||
uint8_t *const pre_indices = x->palette_buffer->kmeans_pre_indices_buf;
|
||||
double centroids[PALETTE_MAX_SIZE];
|
||||
float centroids[PALETTE_MAX_SIZE];
|
||||
uint8_t *const color_map = xd->plane[0].color_index_map;
|
||||
double lb, ub, val;
|
||||
float lb, ub, val;
|
||||
MB_MODE_INFO *const mbmi = &mic->mbmi;
|
||||
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
|
@ -1834,7 +1834,7 @@ static int rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x,
|
|||
n, 1, max_itr);
|
||||
vp10_insertion_sort(centroids, n);
|
||||
for (i = 0; i < n; ++i)
|
||||
centroids[i] = round(centroids[i]);
|
||||
centroids[i] = roundf(centroids[i]);
|
||||
// remove duplicates
|
||||
i = 1;
|
||||
k = n;
|
||||
|
@ -1854,12 +1854,12 @@ static int rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x,
|
|||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cpi->common.use_highbitdepth)
|
||||
for (i = 0; i < k; ++i)
|
||||
pmi->palette_colors[i] = clip_pixel_highbd((int)round(centroids[i]),
|
||||
pmi->palette_colors[i] = clip_pixel_highbd((int)lroundf(centroids[i]),
|
||||
cpi->common.bit_depth);
|
||||
else
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
for (i = 0; i < k; ++i)
|
||||
pmi->palette_colors[i] = clip_pixel((int)round(centroids[i]));
|
||||
pmi->palette_colors[i] = clip_pixel((int)lroundf(centroids[i]));
|
||||
pmi->palette_size[0] = k;
|
||||
|
||||
vp10_calc_indices(data, centroids, indices, rows * cols, k, 1);
|
||||
|
@ -3583,12 +3583,12 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
|
|||
int color_ctx, color_idx = 0;
|
||||
int color_order[PALETTE_MAX_SIZE];
|
||||
int64_t this_sse;
|
||||
double lb_u, ub_u, val_u;
|
||||
double lb_v, ub_v, val_v;
|
||||
double *const data = x->palette_buffer->kmeans_data_buf;
|
||||
float lb_u, ub_u, val_u;
|
||||
float lb_v, ub_v, val_v;
|
||||
float *const data = x->palette_buffer->kmeans_data_buf;
|
||||
uint8_t *const indices = x->palette_buffer->kmeans_indices_buf;
|
||||
uint8_t *const pre_indices = x->palette_buffer->kmeans_pre_indices_buf;
|
||||
double centroids[2 * PALETTE_MAX_SIZE];
|
||||
float centroids[2 * PALETTE_MAX_SIZE];
|
||||
uint8_t *const color_map = xd->plane[1].color_index_map;
|
||||
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
|
||||
|
||||
|
@ -3657,12 +3657,12 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
|
|||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cpi->common.use_highbitdepth)
|
||||
pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
|
||||
clip_pixel_highbd(round(centroids[j * 2 + i - 1]),
|
||||
clip_pixel_highbd(roundf(centroids[j * 2 + i - 1]),
|
||||
cpi->common.bit_depth);
|
||||
else
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
|
||||
clip_pixel(round(centroids[j * 2 + i - 1]));
|
||||
clip_pixel(roundf(centroids[j * 2 + i - 1]));
|
||||
}
|
||||
}
|
||||
for (r = 0; r < rows; ++r)
|
||||
|
@ -7475,9 +7475,9 @@ static void restore_uv_color_map(VP10_COMP *cpi, MACROBLOCK *x) {
|
|||
int src_stride = x->plane[1].src.stride;
|
||||
const uint8_t *const src_u = x->plane[1].src.buf;
|
||||
const uint8_t *const src_v = x->plane[2].src.buf;
|
||||
double *const data = x->palette_buffer->kmeans_data_buf;
|
||||
float *const data = x->palette_buffer->kmeans_data_buf;
|
||||
uint8_t *const indices = x->palette_buffer->kmeans_indices_buf;
|
||||
double centroids[2 * PALETTE_MAX_SIZE];
|
||||
float centroids[2 * PALETTE_MAX_SIZE];
|
||||
uint8_t *const color_map = xd->plane[1].color_index_map;
|
||||
int r, c;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
|
|
Загрузка…
Ссылка в новой задаче