Convert palette from double to float.

About 20% less time spent coding in vp10_k_means().

Change-Id: I5cf7605cde869a269776197bace70de353b07d83
This commit is contained in:
Alex Converse 2016-04-07 11:13:32 -07:00
Родитель d1327aec1b
Коммит bb0e692151
4 изменённых файлов: 33 добавлений и 32 удалений

Просмотреть файл

@ -64,7 +64,7 @@ typedef struct {
typedef struct {
uint8_t best_palette_color_map[MAX_SB_SQUARE];
double kmeans_data_buf[2 * MAX_SB_SQUARE];
float kmeans_data_buf[2 * MAX_SB_SQUARE];
uint8_t kmeans_indices_buf[MAX_SB_SQUARE];
uint8_t kmeans_pre_indices_buf[MAX_SB_SQUARE];
} PALETTE_BUFFER;

Просмотреть файл

@ -11,20 +11,21 @@
#include <math.h>
#include "vp10/encoder/palette.h"
static double calc_dist(const double *p1, const double *p2, int dim) {
double dist = 0;
static float calc_dist(const float *p1, const float *p2, int dim) {
float dist = 0;
int i = 0;
for (i = 0; i < dim; ++i) {
dist = dist + (p1[i] - round(p2[i])) * (p1[i] - round(p2[i]));
float diff = p1[i] - roundf(p2[i]);
dist += diff * diff;
}
return dist;
}
void vp10_calc_indices(const double *data, const double *centroids,
void vp10_calc_indices(const float *data, const float *centroids,
uint8_t *indices, int n, int k, int dim) {
int i, j;
double min_dist, this_dist;
float min_dist, this_dist;
for (i = 0; i < n; ++i) {
min_dist = calc_dist(data + i * dim, centroids, dim);
@ -45,7 +46,7 @@ static unsigned int lcg_rand16(unsigned int *state) {
return *state / 65536 % 32768;
}
static void calc_centroids(const double *data, double *centroids,
static void calc_centroids(const float *data, float *centroids,
const uint8_t *indices, int n, int k, int dim) {
int i, j, index;
int count[PALETTE_MAX_SIZE];
@ -70,16 +71,16 @@ static void calc_centroids(const double *data, double *centroids,
memcpy(centroids + i * dim, data + (lcg_rand16(&rand_state) % n) * dim,
sizeof(centroids[0]) * dim);
} else {
const double norm = 1.0 / count[i];
const float norm = 1.0f / count[i];
for (j = 0; j < dim; ++j)
centroids[i * dim + j] *= norm;
}
}
}
static double calc_total_dist(const double *data, const double *centroids,
static float calc_total_dist(const float *data, const float *centroids,
const uint8_t *indices, int n, int k, int dim) {
double dist = 0;
float dist = 0;
int i;
(void) k;
@ -89,11 +90,11 @@ static double calc_total_dist(const double *data, const double *centroids,
return dist;
}
int vp10_k_means(const double *data, double *centroids, uint8_t *indices,
int vp10_k_means(const float *data, float *centroids, uint8_t *indices,
uint8_t *pre_indices, int n, int k, int dim, int max_itr) {
int i = 0;
double pre_dist, this_dist;
double pre_centroids[2 * PALETTE_MAX_SIZE];
float pre_dist, this_dist;
float pre_centroids[2 * PALETTE_MAX_SIZE];
vp10_calc_indices(data, centroids, indices, n, k, dim);
pre_dist = calc_total_dist(data, centroids, indices, n, k, dim);
@ -121,9 +122,9 @@ int vp10_k_means(const double *data, double *centroids, uint8_t *indices,
return i;
}
void vp10_insertion_sort(double *data, int n) {
void vp10_insertion_sort(float *data, int n) {
int i, j, k;
double val;
float val;
if (n <= 1)
return;

Просмотреть файл

@ -17,10 +17,10 @@
extern "C" {
#endif
void vp10_insertion_sort(double *data, int n);
void vp10_calc_indices(const double *data, const double *centroids,
void vp10_insertion_sort(float *data, int n);
void vp10_calc_indices(const float *data, const float *centroids,
uint8_t *indices, int n, int k, int dim);
int vp10_k_means(const double *data, double *centroids, uint8_t *indices,
int vp10_k_means(const float *data, float *centroids, uint8_t *indices,
uint8_t *pre_indices, int n, int k, int dim, int max_itr);
int vp10_count_colors(const uint8_t *src, int stride, int rows, int cols);
#if CONFIG_VP9_HIGHBITDEPTH

Просмотреть файл

@ -1774,12 +1774,12 @@ static int rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x,
const int max_itr = 50;
int color_ctx, color_idx = 0;
int color_order[PALETTE_MAX_SIZE];
double *const data = x->palette_buffer->kmeans_data_buf;
float *const data = x->palette_buffer->kmeans_data_buf;
uint8_t *const indices = x->palette_buffer->kmeans_indices_buf;
uint8_t *const pre_indices = x->palette_buffer->kmeans_pre_indices_buf;
double centroids[PALETTE_MAX_SIZE];
float centroids[PALETTE_MAX_SIZE];
uint8_t *const color_map = xd->plane[0].color_index_map;
double lb, ub, val;
float lb, ub, val;
MB_MODE_INFO *const mbmi = &mic->mbmi;
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
#if CONFIG_VP9_HIGHBITDEPTH
@ -1834,7 +1834,7 @@ static int rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x,
n, 1, max_itr);
vp10_insertion_sort(centroids, n);
for (i = 0; i < n; ++i)
centroids[i] = round(centroids[i]);
centroids[i] = roundf(centroids[i]);
// remove duplicates
i = 1;
k = n;
@ -1854,12 +1854,12 @@ static int rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x,
#if CONFIG_VP9_HIGHBITDEPTH
if (cpi->common.use_highbitdepth)
for (i = 0; i < k; ++i)
pmi->palette_colors[i] = clip_pixel_highbd((int)round(centroids[i]),
pmi->palette_colors[i] = clip_pixel_highbd((int)lroundf(centroids[i]),
cpi->common.bit_depth);
else
#endif // CONFIG_VP9_HIGHBITDEPTH
for (i = 0; i < k; ++i)
pmi->palette_colors[i] = clip_pixel((int)round(centroids[i]));
pmi->palette_colors[i] = clip_pixel((int)lroundf(centroids[i]));
pmi->palette_size[0] = k;
vp10_calc_indices(data, centroids, indices, rows * cols, k, 1);
@ -3583,12 +3583,12 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
int color_ctx, color_idx = 0;
int color_order[PALETTE_MAX_SIZE];
int64_t this_sse;
double lb_u, ub_u, val_u;
double lb_v, ub_v, val_v;
double *const data = x->palette_buffer->kmeans_data_buf;
float lb_u, ub_u, val_u;
float lb_v, ub_v, val_v;
float *const data = x->palette_buffer->kmeans_data_buf;
uint8_t *const indices = x->palette_buffer->kmeans_indices_buf;
uint8_t *const pre_indices = x->palette_buffer->kmeans_pre_indices_buf;
double centroids[2 * PALETTE_MAX_SIZE];
float centroids[2 * PALETTE_MAX_SIZE];
uint8_t *const color_map = xd->plane[1].color_index_map;
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
@ -3657,12 +3657,12 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
#if CONFIG_VP9_HIGHBITDEPTH
if (cpi->common.use_highbitdepth)
pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
clip_pixel_highbd(round(centroids[j * 2 + i - 1]),
clip_pixel_highbd(roundf(centroids[j * 2 + i - 1]),
cpi->common.bit_depth);
else
#endif // CONFIG_VP9_HIGHBITDEPTH
pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
clip_pixel(round(centroids[j * 2 + i - 1]));
clip_pixel(roundf(centroids[j * 2 + i - 1]));
}
}
for (r = 0; r < rows; ++r)
@ -7475,9 +7475,9 @@ static void restore_uv_color_map(VP10_COMP *cpi, MACROBLOCK *x) {
int src_stride = x->plane[1].src.stride;
const uint8_t *const src_u = x->plane[1].src.buf;
const uint8_t *const src_v = x->plane[2].src.buf;
double *const data = x->palette_buffer->kmeans_data_buf;
float *const data = x->palette_buffer->kmeans_data_buf;
uint8_t *const indices = x->palette_buffer->kmeans_indices_buf;
double centroids[2 * PALETTE_MAX_SIZE];
float centroids[2 * PALETTE_MAX_SIZE];
uint8_t *const color_map = xd->plane[1].color_index_map;
int r, c;
#if CONFIG_VP9_HIGHBITDEPTH