Merge "Make coefficient skip condition an explicit RD choice."

This commit is contained in:
Ronald S. Bultje 2013-06-28 11:54:28 -07:00 коммит произвёл Gerrit Code Review
Родитель 0345fc3ad9 af660715c0
Коммит ec5d09b950
6 изменённых файлов: 115 добавлений и 91 удалений

Просмотреть файл

@ -558,7 +558,7 @@ prototype unsigned int vp9_get_mb_ss "const int16_t *"
specialize vp9_get_mb_ss mmx sse2 specialize vp9_get_mb_ss mmx sse2
# ENCODEMB INVOKE # ENCODEMB INVOKE
prototype int64_t vp9_block_error "int16_t *coeff, int16_t *dqcoeff, intptr_t block_size" prototype int64_t vp9_block_error "int16_t *coeff, int16_t *dqcoeff, intptr_t block_size, int64_t *ssz"
specialize vp9_block_error sse2 specialize vp9_block_error sse2
prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride" prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"

Просмотреть файл

@ -68,7 +68,6 @@ struct macroblock_plane {
int16_t *quant; int16_t *quant;
uint8_t *quant_shift; uint8_t *quant_shift;
int16_t *zbin; int16_t *zbin;
int16_t *zrun_zbin_boost;
int16_t *round; int16_t *round;
// Zbin Over Quant value // Zbin Over Quant value

Просмотреть файл

@ -268,11 +268,7 @@ typedef struct VP9_COMP {
DECLARE_ALIGNED(16, unsigned char, a_quant_shift[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, unsigned char, a_quant_shift[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, a_zbin[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, a_zbin[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, a_round[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, a_round[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_a[QINDEX_RANGE][16]);
#endif #endif
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]);
MACROBLOCK mb; MACROBLOCK mb;
VP9_COMMON common; VP9_COMMON common;

Просмотреть файл

@ -21,8 +21,7 @@
extern int enc_debug; extern int enc_debug;
#endif #endif
static void quantize(int16_t *zbin_boost_orig_ptr, static void quantize(int16_t *coeff_ptr, int n_coeffs, int skip_block,
int16_t *coeff_ptr, int n_coeffs, int skip_block,
int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr,
uint8_t *quant_shift_ptr, uint8_t *quant_shift_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
@ -31,8 +30,6 @@ static void quantize(int16_t *zbin_boost_orig_ptr,
int i, rc, eob; int i, rc, eob;
int zbins[2], nzbins[2], zbin; int zbins[2], nzbins[2], zbin;
int x, y, z, sz; int x, y, z, sz;
int zero_run = 0;
int16_t *zbin_boost_ptr = zbin_boost_orig_ptr;
int zero_flag = n_coeffs; int zero_flag = n_coeffs;
vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t)); vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
@ -65,8 +62,7 @@ static void quantize(int16_t *zbin_boost_orig_ptr,
rc = scan[i]; rc = scan[i];
z = coeff_ptr[rc]; z = coeff_ptr[rc];
zbin = (zbins[rc != 0] + zbin_boost_ptr[zero_run]); zbin = (zbins[rc != 0]);
zero_run += (zero_run < 15);
sz = (z >> 31); // sign of z sz = (z >> 31); // sign of z
x = (z ^ sz) - sz; x = (z ^ sz) - sz;
@ -81,7 +77,6 @@ static void quantize(int16_t *zbin_boost_orig_ptr,
if (y) { if (y) {
eob = i; // last nonzero coeffs eob = i; // last nonzero coeffs
zero_run = 0; // set zero_run
} }
} }
} }
@ -90,8 +85,7 @@ static void quantize(int16_t *zbin_boost_orig_ptr,
} }
// This function works well for large transform size. // This function works well for large transform size.
static void quantize_sparse(int16_t *zbin_boost_orig_ptr, static void quantize_sparse(int16_t *coeff_ptr, int n_coeffs, int skip_block,
int16_t *coeff_ptr, int n_coeffs, int skip_block,
int16_t *zbin_ptr, int16_t *round_ptr, int16_t *zbin_ptr, int16_t *round_ptr,
int16_t *quant_ptr, uint8_t *quant_shift_ptr, int16_t *quant_ptr, uint8_t *quant_shift_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
@ -101,10 +95,7 @@ static void quantize_sparse(int16_t *zbin_boost_orig_ptr,
int i, rc, eob; int i, rc, eob;
int zbins[2], nzbins[2], zbin; int zbins[2], nzbins[2], zbin;
int x, y, z, sz; int x, y, z, sz;
int zero_run = 0;
int16_t *zbin_boost_ptr = zbin_boost_orig_ptr;
int idx = 0; int idx = 0;
int pre_idx = 0;
vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t)); vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t)); vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
@ -135,11 +126,8 @@ static void quantize_sparse(int16_t *zbin_boost_orig_ptr,
rc = scan[idx_arr[i]]; rc = scan[idx_arr[i]];
// Calculate ZBIN // Calculate ZBIN
zero_run += idx_arr[i] - pre_idx; zbin = (zbins[rc != 0]);
if(zero_run > 15) zero_run = 15;
zbin = (zbins[rc != 0] + zbin_boost_ptr[zero_run]);
pre_idx = idx_arr[i];
z = coeff_ptr[rc] * 2; z = coeff_ptr[rc] * 2;
sz = (z >> 31); // sign of z sz = (z >> 31); // sign of z
x = (z ^ sz) - sz; // x = abs(z) x = (z ^ sz) - sz; // x = abs(z)
@ -155,7 +143,6 @@ static void quantize_sparse(int16_t *zbin_boost_orig_ptr,
if (y) { if (y) {
eob = idx_arr[i]; // last nonzero coeffs eob = idx_arr[i]; // last nonzero coeffs
zero_run = -1; // set zero_run
} }
} }
} }
@ -189,8 +176,7 @@ void vp9_quantize(MACROBLOCK *mb, int plane, int block, int n_coeffs,
// Save index of picked coefficient in pre-scan pass. // Save index of picked coefficient in pre-scan pass.
int idx_arr[1024]; int idx_arr[1024];
quantize_sparse(mb->plane[plane].zrun_zbin_boost, quantize_sparse(BLOCK_OFFSET(mb->plane[plane].coeff, block, 16),
BLOCK_OFFSET(mb->plane[plane].coeff, block, 16),
n_coeffs, mb->skip_block, n_coeffs, mb->skip_block,
mb->plane[plane].zbin, mb->plane[plane].zbin,
mb->plane[plane].round, mb->plane[plane].round,
@ -204,8 +190,7 @@ void vp9_quantize(MACROBLOCK *mb, int plane, int block, int n_coeffs,
scan, idx_arr); scan, idx_arr);
} }
else { else {
quantize(mb->plane[plane].zrun_zbin_boost, quantize(BLOCK_OFFSET(mb->plane[plane].coeff, block, 16),
BLOCK_OFFSET(mb->plane[plane].coeff, block, 16),
n_coeffs, mb->skip_block, n_coeffs, mb->skip_block,
mb->plane[plane].zbin, mb->plane[plane].zbin,
mb->plane[plane].round, mb->plane[plane].round,
@ -226,8 +211,7 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx); const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
const int *pt_scan = get_scan_4x4(tx_type); const int *pt_scan = get_scan_4x4(tx_type);
quantize(mb->plane[pb_idx.plane].zrun_zbin_boost, quantize(BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
16, mb->skip_block, 16, mb->skip_block,
mb->plane[pb_idx.plane].zbin, mb->plane[pb_idx.plane].zbin,
mb->plane[pb_idx.plane].round, mb->plane[pb_idx.plane].round,
@ -261,9 +245,6 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
#endif #endif
int q; int q;
static const int zbin_boost[16] = { 0, 0, 0, 8, 8, 8, 10, 12,
14, 16, 20, 24, 28, 32, 36, 40 };
for (q = 0; q < QINDEX_RANGE; q++) { for (q = 0; q < QINDEX_RANGE; q++) {
int qzbin_factor = (vp9_dc_quant(q, 0) < 148) ? 84 : 80; int qzbin_factor = (vp9_dc_quant(q, 0) < 148) ? 84 : 80;
int qrounding_factor = 48; int qrounding_factor = 48;
@ -277,14 +258,12 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
cpi->y_zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); cpi->y_zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
cpi->y_round[q][0] = (qrounding_factor * quant_val) >> 7; cpi->y_round[q][0] = (qrounding_factor * quant_val) >> 7;
cpi->common.y_dequant[q][0] = quant_val; cpi->common.y_dequant[q][0] = quant_val;
cpi->zrun_zbin_boost_y[q][0] = (quant_val * zbin_boost[0]) >> 7;
quant_val = vp9_dc_quant(q, cpi->common.uv_dc_delta_q); quant_val = vp9_dc_quant(q, cpi->common.uv_dc_delta_q);
invert_quant(cpi->uv_quant[q] + 0, cpi->uv_quant_shift[q] + 0, quant_val); invert_quant(cpi->uv_quant[q] + 0, cpi->uv_quant_shift[q] + 0, quant_val);
cpi->uv_zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); cpi->uv_zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
cpi->uv_round[q][0] = (qrounding_factor * quant_val) >> 7; cpi->uv_round[q][0] = (qrounding_factor * quant_val) >> 7;
cpi->common.uv_dequant[q][0] = quant_val; cpi->common.uv_dequant[q][0] = quant_val;
cpi->zrun_zbin_boost_uv[q][0] = (quant_val * zbin_boost[0]) >> 7;
#if CONFIG_ALPHA #if CONFIG_ALPHA
quant_val = vp9_dc_quant(q, cpi->common.a_dc_delta_q); quant_val = vp9_dc_quant(q, cpi->common.a_dc_delta_q);
@ -292,7 +271,6 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
cpi->a_zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); cpi->a_zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
cpi->a_round[q][0] = (qrounding_factor * quant_val) >> 7; cpi->a_round[q][0] = (qrounding_factor * quant_val) >> 7;
cpi->common.a_dequant[q][0] = quant_val; cpi->common.a_dequant[q][0] = quant_val;
cpi->zrun_zbin_boost_a[q][0] = (quant_val * zbin_boost[0]) >> 7;
#endif #endif
quant_val = vp9_ac_quant(q, 0); quant_val = vp9_ac_quant(q, 0);
@ -310,15 +288,11 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
invert_quant(cpi->y_quant[q] + rc, cpi->y_quant_shift[q] + rc, quant_val); invert_quant(cpi->y_quant[q] + rc, cpi->y_quant_shift[q] + rc, quant_val);
cpi->y_zbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); cpi->y_zbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
cpi->y_round[q][rc] = (qrounding_factor * quant_val) >> 7; cpi->y_round[q][rc] = (qrounding_factor * quant_val) >> 7;
cpi->zrun_zbin_boost_y[q][i] =
ROUND_POWER_OF_TWO(quant_val * zbin_boost[i], 7);
invert_quant(cpi->uv_quant[q] + rc, cpi->uv_quant_shift[q] + rc, invert_quant(cpi->uv_quant[q] + rc, cpi->uv_quant_shift[q] + rc,
quant_uv_val); quant_uv_val);
cpi->uv_zbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_uv_val, 7); cpi->uv_zbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_uv_val, 7);
cpi->uv_round[q][rc] = (qrounding_factor * quant_uv_val) >> 7; cpi->uv_round[q][rc] = (qrounding_factor * quant_uv_val) >> 7;
cpi->zrun_zbin_boost_uv[q][i] =
ROUND_POWER_OF_TWO(quant_uv_val * zbin_boost[i], 7);
#if CONFIG_ALPHA #if CONFIG_ALPHA
invert_quant(cpi->a_quant[q] + rc, cpi->a_quant_shift[q] + rc, invert_quant(cpi->a_quant[q] + rc, cpi->a_quant_shift[q] + rc,
@ -326,8 +300,6 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
cpi->a_zbin[q][rc] = cpi->a_zbin[q][rc] =
ROUND_POWER_OF_TWO(qzbin_factor * quant_alpha_val, 7); ROUND_POWER_OF_TWO(qzbin_factor * quant_alpha_val, 7);
cpi->a_round[q][rc] = (qrounding_factor * quant_alpha_val) >> 7; cpi->a_round[q][rc] = (qrounding_factor * quant_alpha_val) >> 7;
cpi->zrun_zbin_boost_a[q][i] =
ROUND_POWER_OF_TWO(quant_alpha_val * zbin_boost[i], 7);
#endif #endif
} }
} }
@ -348,7 +320,6 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
x->plane[0].quant_shift = cpi->y_quant_shift[qindex]; x->plane[0].quant_shift = cpi->y_quant_shift[qindex];
x->plane[0].zbin = cpi->y_zbin[qindex]; x->plane[0].zbin = cpi->y_zbin[qindex];
x->plane[0].round = cpi->y_round[qindex]; x->plane[0].round = cpi->y_round[qindex];
x->plane[0].zrun_zbin_boost = cpi->zrun_zbin_boost_y[qindex];
x->plane[0].zbin_extra = (int16_t)zbin_extra; x->plane[0].zbin_extra = (int16_t)zbin_extra;
x->e_mbd.plane[0].dequant = cpi->common.y_dequant[qindex]; x->e_mbd.plane[0].dequant = cpi->common.y_dequant[qindex];
@ -361,7 +332,6 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
x->plane[i].quant_shift = cpi->uv_quant_shift[qindex]; x->plane[i].quant_shift = cpi->uv_quant_shift[qindex];
x->plane[i].zbin = cpi->uv_zbin[qindex]; x->plane[i].zbin = cpi->uv_zbin[qindex];
x->plane[i].round = cpi->uv_round[qindex]; x->plane[i].round = cpi->uv_round[qindex];
x->plane[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[qindex];
x->plane[i].zbin_extra = (int16_t)zbin_extra; x->plane[i].zbin_extra = (int16_t)zbin_extra;
x->e_mbd.plane[i].dequant = cpi->common.uv_dequant[qindex]; x->e_mbd.plane[i].dequant = cpi->common.uv_dequant[qindex];
} }
@ -371,7 +341,6 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
x->plane[3].quant_shift = cpi->a_quant_shift[qindex]; x->plane[3].quant_shift = cpi->a_quant_shift[qindex];
x->plane[3].zbin = cpi->a_zbin[qindex]; x->plane[3].zbin = cpi->a_zbin[qindex];
x->plane[3].round = cpi->a_round[qindex]; x->plane[3].round = cpi->a_round[qindex];
x->plane[3].zrun_zbin_boost = cpi->zrun_zbin_boost_a[qindex];
x->plane[3].zbin_extra = (int16_t)zbin_extra; x->plane[3].zbin_extra = (int16_t)zbin_extra;
x->e_mbd.plane[3].dequant = cpi->common.a_dequant[qindex]; x->e_mbd.plane[3].dequant = cpi->common.a_dequant[qindex];
#endif #endif

Просмотреть файл

@ -283,15 +283,17 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
} }
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
intptr_t block_size) { intptr_t block_size, int64_t *ssz) {
int i; int i;
int64_t error = 0; int64_t error = 0, sqcoeff = 0;
for (i = 0; i < block_size; i++) { for (i = 0; i < block_size; i++) {
int this_diff = coeff[i] - dqcoeff[i]; int this_diff = coeff[i] - dqcoeff[i];
error += (unsigned)this_diff * this_diff; error += (unsigned)this_diff * this_diff;
sqcoeff += (unsigned) coeff[i] * coeff[i];
} }
*ssz = sqcoeff;
return error; return error;
} }
@ -501,27 +503,31 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
} }
static int64_t block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, static int64_t block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
int shift) { int shift, int64_t *sse) {
struct macroblockd_plane *p = &x->e_mbd.plane[0]; struct macroblockd_plane *p = &x->e_mbd.plane[0];
const int bw = plane_block_width(bsize, p); const int bw = plane_block_width(bsize, p);
const int bh = plane_block_height(bsize, p); const int bh = plane_block_height(bsize, p);
return vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff, int64_t e = vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
bw * bh) >> shift; bw * bh, sse) >> shift;
*sse >>= shift;
return e;
} }
static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
int shift) { int shift, int64_t *sse) {
int64_t sum = 0; int64_t sum = 0, this_sse;
int plane; int plane;
*sse = 0;
for (plane = 1; plane < MAX_MB_PLANE; plane++) { for (plane = 1; plane < MAX_MB_PLANE; plane++) {
struct macroblockd_plane *p = &x->e_mbd.plane[plane]; struct macroblockd_plane *p = &x->e_mbd.plane[plane];
const int bw = plane_block_width(bsize, p); const int bw = plane_block_width(bsize, p);
const int bh = plane_block_height(bsize, p); const int bh = plane_block_height(bsize, p);
sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff, sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
bw * bh); bw * bh, &this_sse);
*sse += this_sse;
} }
*sse >>= shift;
return sum >> shift; return sum >> shift;
} }
@ -581,7 +587,7 @@ static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
int *rate, int64_t *distortion, int *rate, int64_t *distortion,
int *skippable, int *skippable, int64_t *sse,
BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
xd->mode_info_context->mbmi.txfm_size = tx_size; xd->mode_info_context->mbmi.txfm_size = tx_size;
@ -591,18 +597,18 @@ static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
else else
vp9_xform_quant_sby(cm, x, bsize); vp9_xform_quant_sby(cm, x, bsize);
*distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2); *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2, sse);
*rate = rdcost_plane(cm, x, 0, bsize, tx_size); *rate = rdcost_plane(cm, x, 0, bsize, tx_size);
*skippable = vp9_sby_is_skippable(xd, bsize); *skippable = vp9_sby_is_skippable(xd, bsize);
} }
static void super_block_yrd(VP9_COMP *cpi, static void super_block_yrd(VP9_COMP *cpi,
MACROBLOCK *x, int *rate, int64_t *distortion, MACROBLOCK *x, int *rate, int64_t *distortion,
int *skip, BLOCK_SIZE_TYPE bs, int *skip, int64_t *psse, BLOCK_SIZE_TYPE bs,
int64_t txfm_cache[NB_TXFM_MODES]) { int64_t txfm_cache[NB_TXFM_MODES]) {
VP9_COMMON *const cm = &cpi->common; VP9_COMMON *const cm = &cpi->common;
int r[TX_SIZE_MAX_SB][2], s[TX_SIZE_MAX_SB]; int r[TX_SIZE_MAX_SB][2], s[TX_SIZE_MAX_SB];
int64_t d[TX_SIZE_MAX_SB]; int64_t d[TX_SIZE_MAX_SB], sse[TX_SIZE_MAX_SB];
MACROBLOCKD *xd = &x->e_mbd; MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
@ -621,25 +627,27 @@ static void super_block_yrd(VP9_COMP *cpi,
mbmi->txfm_size = TX_4X4; mbmi->txfm_size = TX_4X4;
} }
vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t)); vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t));
super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs, super_block_yrd_for_txfm(cm, x, rate, distortion, skip, &sse[0], bs,
mbmi->txfm_size); mbmi->txfm_size);
return; return;
} }
if (bs >= BLOCK_SIZE_SB32X32) if (bs >= BLOCK_SIZE_SB32X32)
super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
bs, TX_32X32); &sse[TX_32X32], bs, TX_32X32);
if (bs >= BLOCK_SIZE_MB16X16) if (bs >= BLOCK_SIZE_MB16X16)
super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
bs, TX_16X16); &sse[TX_16X16], bs, TX_16X16);
super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs, super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8],
TX_8X8); &sse[TX_8X8], bs, TX_8X8);
super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs, super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4],
TX_4X4); &sse[TX_4X4], bs, TX_4X4);
choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
skip, txfm_cache, skip, txfm_cache,
TX_32X32 - (bs < BLOCK_SIZE_SB32X32) TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
- (bs < BLOCK_SIZE_MB16X16)); - (bs < BLOCK_SIZE_MB16X16));
if (psse)
*psse = sse[mbmi->txfm_size];
} }
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
@ -688,6 +696,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
for (idy = 0; idy < bh; ++idy) { for (idy = 0; idy < bh; ++idy) {
for (idx = 0; idx < bw; ++idx) { for (idx = 0; idx < bw; ++idx) {
int64_t ssz;
block = ib + idy * 2 + idx; block = ib + idy * 2 + idx;
xd->mode_info_context->bmi[block].as_mode.first = mode; xd->mode_info_context->bmi[block].as_mode.first = mode;
src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
@ -718,7 +728,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC, ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
tempa + idx, templ + idy, TX_4X4, 16); tempa + idx, templ + idy, TX_4X4, 16);
distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff,
block, 16), 16) >> 2; block, 16),
16, &ssz) >> 2;
if (best_tx_type != DCT_DCT) if (best_tx_type != DCT_DCT)
vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block, 16), vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block, 16),
@ -881,7 +892,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
} }
x->e_mbd.mode_info_context->mbmi.mode = mode; x->e_mbd.mode_info_context->mbmi.mode = mode;
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
bsize, local_txfm_cache); bsize, local_txfm_cache);
this_rate = this_rate_tokenonly + bmode_costs[mode]; this_rate = this_rate_tokenonly + bmode_costs[mode];
@ -914,22 +925,25 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
int *rate, int64_t *distortion, int *rate, int64_t *distortion,
int *skippable, BLOCK_SIZE_TYPE bsize, int *skippable, int64_t *sse,
BLOCK_SIZE_TYPE bsize,
TX_SIZE uv_tx_size) { TX_SIZE uv_tx_size) {
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
int64_t dummy;
if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
vp9_encode_intra_block_uv(cm, x, bsize); vp9_encode_intra_block_uv(cm, x, bsize);
else else
vp9_xform_quant_sbuv(cm, x, bsize); vp9_xform_quant_sbuv(cm, x, bsize);
*distortion = block_error_sbuv(x, bsize, uv_tx_size == TX_32X32 ? 0 : 2); *distortion = block_error_sbuv(x, bsize, uv_tx_size == TX_32X32 ? 0 : 2,
sse ? sse : &dummy);
*rate = rdcost_uv(cm, x, bsize, uv_tx_size); *rate = rdcost_uv(cm, x, bsize, uv_tx_size);
*skippable = vp9_sbuv_is_skippable(xd, bsize); *skippable = vp9_sbuv_is_skippable(xd, bsize);
} }
static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
int *rate, int64_t *distortion, int *skippable, int *rate, int64_t *distortion, int *skippable,
BLOCK_SIZE_TYPE bsize) { int64_t *sse, BLOCK_SIZE_TYPE bsize) {
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi); TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
@ -937,7 +951,7 @@ static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
if (mbmi->ref_frame[0] > INTRA_FRAME) if (mbmi->ref_frame[0] > INTRA_FRAME)
vp9_subtract_sbuv(x, bsize); vp9_subtract_sbuv(x, bsize);
super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, sse, bsize,
uv_txfm_size); uv_txfm_size);
} }
@ -954,7 +968,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
for (mode = DC_PRED; mode <= TM_PRED; mode++) { for (mode = DC_PRED; mode <= TM_PRED; mode++) {
x->e_mbd.mode_info_context->mbmi.uv_mode = mode; x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
super_block_uvrd(&cpi->common, x, &this_rate_tokenonly, super_block_uvrd(&cpi->common, x, &this_rate_tokenonly,
&this_distortion, &s, bsize); &this_distortion, &s, NULL, bsize);
this_rate = this_rate_tokenonly + this_rate = this_rate_tokenonly +
x->intra_uv_mode_cost[x->e_mbd.frame_type][mode]; x->intra_uv_mode_cost[x->e_mbd.frame_type][mode];
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
@ -1151,6 +1165,8 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
k = i; k = i;
for (idy = 0; idy < bh / 4; ++idy) { for (idy = 0; idy < bh / 4; ++idy) {
for (idx = 0; idx < bw / 4; ++idx) { for (idx = 0; idx < bw / 4; ++idx) {
int64_t ssz;
k += (idy * 2 + idx); k += (idy * 2 + idx);
src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k, src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k,
x->plane[0].src_diff); x->plane[0].src_diff);
@ -1159,7 +1175,7 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
x->quantize_b_4x4(x, k, DCT_DCT, 16); x->quantize_b_4x4(x, k, DCT_DCT, 16);
thisdistortion += vp9_block_error(coeff, thisdistortion += vp9_block_error(coeff,
BLOCK_OFFSET(xd->plane[0].dqcoeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
k, 16), 16); k, 16), 16, &ssz);
thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC, thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC,
ta + (k & 1), ta + (k & 1),
tl + (k >> 1), TX_4X4, 16); tl + (k >> 1), TX_4X4, 16);
@ -2238,7 +2254,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
INTERPOLATIONFILTERTYPE *best_filter, INTERPOLATIONFILTERTYPE *best_filter,
int_mv *frame_mv, int_mv *frame_mv,
int mi_row, int mi_col, int mi_row, int mi_col,
int_mv single_newmv[MAX_REF_FRAMES]) { int_mv single_newmv[MAX_REF_FRAMES],
int64_t *psse) {
VP9_COMMON *cm = &cpi->common; VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd; MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
@ -2467,17 +2484,19 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (!x->skip) { if (!x->skip) {
int skippable_y, skippable_uv; int skippable_y, skippable_uv;
int64_t sseuv = INT_MAX;
// Y cost and distortion // Y cost and distortion
super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
bsize, txfm_cache); bsize, txfm_cache);
*rate2 += *rate_y; *rate2 += *rate_y;
*distortion += *distortion_y; *distortion += *distortion_y;
super_block_uvrd(cm, x, rate_uv, distortion_uv, super_block_uvrd(cm, x, rate_uv, distortion_uv,
&skippable_uv, bsize); &skippable_uv, &sseuv, bsize);
*psse += sseuv;
*rate2 += *rate_uv; *rate2 += *rate_uv;
*distortion += *distortion_uv; *distortion += *distortion_uv;
*skippable = skippable_y && skippable_uv; *skippable = skippable_y && skippable_uv;
@ -2611,6 +2630,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int bws = (1 << bwsl) / 4; // mode_info step for subsize int bws = (1 << bwsl) / 4; // mode_info step for subsize
int bhsl = b_height_log2(bsize); int bhsl = b_height_log2(bsize);
int bhs = (1 << bhsl) / 4; // mode_info step for subsize int bhs = (1 << bhsl) / 4; // mode_info step for subsize
int best_skip2 = 0;
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
int j; int j;
@ -2702,6 +2722,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int skippable; int skippable;
int64_t txfm_cache[NB_TXFM_MODES]; int64_t txfm_cache[NB_TXFM_MODES];
int i; int i;
int this_skip2 = 0;
int64_t total_sse = INT_MAX;
for (i = 0; i < NB_TXFM_MODES; ++i) for (i = 0; i < NB_TXFM_MODES; ++i)
txfm_cache[i] = INT64_MAX; txfm_cache[i] = INT64_MAX;
@ -2863,7 +2885,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
txfm_cache[i] = txfm_cache[ONLY_4X4]; txfm_cache[i] = txfm_cache[ONLY_4X4];
} else if (ref_frame == INTRA_FRAME) { } else if (ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx; TX_SIZE uv_tx;
super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
bsize, txfm_cache); bsize, txfm_cache);
uv_tx = mbmi->txfm_size; uv_tx = mbmi->txfm_size;
@ -2989,7 +3011,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE_SB8X8); BLOCK_SIZE_SB8X8);
vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8); vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv, super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
&uv_skippable, BLOCK_SIZE_SB8X8, TX_4X4); &uv_skippable, NULL, BLOCK_SIZE_SB8X8, TX_4X4);
rate2 += rate_uv; rate2 += rate_uv;
distortion2 += distortion_uv; distortion2 += distortion_uv;
skippable = skippable && uv_skippable; skippable = skippable && uv_skippable;
@ -3017,7 +3039,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
&mode_excluded, &disable_skip, &mode_excluded, &disable_skip,
&tmp_best_filter, frame_mv[this_mode], &tmp_best_filter, frame_mv[this_mode],
mi_row, mi_col, mi_row, mi_col,
single_newmv); single_newmv, &total_sse);
if (this_rd == INT64_MAX) if (this_rd == INT64_MAX)
continue; continue;
} }
@ -3062,10 +3084,29 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
rate2 += prob_skip_cost; rate2 += prob_skip_cost;
} }
} }
} else if (mb_skip_allowed && ref_frame != INTRA_FRAME &&
this_mode != SPLITMV) {
if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
// Add in the cost of the no skip flag.
int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd,
PRED_MBSKIP), 0);
rate2 += prob_skip_cost;
} else {
int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd,
PRED_MBSKIP), 1);
rate2 += prob_skip_cost;
distortion2 = total_sse;
assert(total_sse >= 0);
rate2 -= (rate_y + rate_uv);
rate_y = 0;
rate_uv = 0;
this_skip2 = 1;
}
} else if (mb_skip_allowed) { } else if (mb_skip_allowed) {
// Add in the cost of the no skip flag. // Add in the cost of the no skip flag.
int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd,
PRED_MBSKIP), 0); PRED_MBSKIP), 0);
rate2 += prob_skip_cost; rate2 += prob_skip_cost;
} }
@ -3119,6 +3160,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
*returndistortion = distortion2; *returndistortion = distortion2;
best_rd = this_rd; best_rd = this_rd;
best_mbmode = *mbmi; best_mbmode = *mbmi;
best_skip2 = this_skip2;
best_partition = *x->partition_info; best_partition = *x->partition_info;
if (this_mode == I4X4_PRED || this_mode == SPLITMV) if (this_mode == I4X4_PRED || this_mode == SPLITMV)
@ -3301,6 +3343,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// macroblock modes // macroblock modes
*mbmi = best_mbmode; *mbmi = best_mbmode;
x->skip |= best_skip2;
if (best_mbmode.ref_frame[0] == INTRA_FRAME && if (best_mbmode.ref_frame[0] == INTRA_FRAME &&
best_mbmode.sb_type < BLOCK_SIZE_SB8X8) { best_mbmode.sb_type < BLOCK_SIZE_SB8X8) {
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)

Просмотреть файл

@ -12,45 +12,62 @@
SECTION .text SECTION .text
; void vp9_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size) ; int64_t vp9_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size,
; int64_t *ssz)
INIT_XMM sse2 INIT_XMM sse2
cglobal block_error, 3, 3, 6, uqc, dqc, size cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
pxor m4, m4 ; accumulator pxor m4, m4 ; sse accumulator
pxor m6, m6 ; ssz accumulator
pxor m5, m5 ; dedicated zero register pxor m5, m5 ; dedicated zero register
lea uqcq, [uqcq+sizeq*2] lea uqcq, [uqcq+sizeq*2]
lea dqcq, [dqcq+sizeq*2] lea dqcq, [dqcq+sizeq*2]
neg sizeq neg sizeq
.loop: .loop:
mova m0, [uqcq+sizeq*2] mova m2, [uqcq+sizeq*2]
mova m2, [dqcq+sizeq*2] mova m0, [dqcq+sizeq*2]
mova m1, [uqcq+sizeq*2+mmsize] mova m3, [uqcq+sizeq*2+mmsize]
mova m3, [dqcq+sizeq*2+mmsize] mova m1, [dqcq+sizeq*2+mmsize]
psubw m0, m2 psubw m0, m2
psubw m1, m3 psubw m1, m3
; individual errors are max. 15bit+sign, so squares are 30bit, and ; individual errors are max. 15bit+sign, so squares are 30bit, and
; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit) ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
pmaddwd m0, m0 pmaddwd m0, m0
pmaddwd m1, m1 pmaddwd m1, m1
pmaddwd m2, m2
pmaddwd m3, m3
; accumulate in 64bit ; accumulate in 64bit
punpckldq m2, m0, m5 punpckldq m7, m0, m5
punpckhdq m0, m5 punpckhdq m0, m5
punpckldq m3, m1, m5 paddq m4, m7
punpckhdq m1, m5 punpckldq m7, m1, m5
paddq m4, m2
paddq m4, m0 paddq m4, m0
paddq m4, m3 punpckhdq m1, m5
paddq m4, m7
punpckldq m7, m2, m5
paddq m4, m1 paddq m4, m1
punpckhdq m2, m5
paddq m6, m7
punpckldq m7, m3, m5
paddq m6, m2
punpckhdq m3, m5
paddq m6, m7
paddq m6, m3
add sizeq, mmsize add sizeq, mmsize
jl .loop jl .loop
; accumulate horizontally and store in return value ; accumulate horizontally and store in return value
movhlps m5, m4 movhlps m5, m4
movhlps m7, m6
paddq m4, m5 paddq m4, m5
paddq m6, m7
%if ARCH_X86_64 %if ARCH_X86_64
movq rax, m4 movq rax, m4
movq [sszq], m6
%else %else
mov eax, sszm
pshufd m5, m4, 0x1 pshufd m5, m4, 0x1
movq [eax], m6
movd eax, m4 movd eax, m4
movd edx, m5 movd edx, m5
%endif %endif