From 6f43ff5824e16a4c0bbbcc890c5e54e74755447f Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Mon, 15 Apr 2013 09:31:27 -0700 Subject: [PATCH] Make the use of pred buffers consistent in MB/SB Use in-place buffers (dst of MACROBLOCKD) for macroblock prediction. This makes the macroblock buffer handling consistent with those of superblock. Remove predictor buffer MACROBLOCKD. Change-Id: Id1bcd898961097b1e6230c10f0130753a59fc6df --- vp9/common/vp9_blockd.h | 2 - vp9/common/vp9_mbpitch.c | 3 - vp9/common/vp9_recon.c | 19 +-- vp9/common/vp9_reconinter.c | 70 ++--------- vp9/common/vp9_reconinter.h | 26 ++-- vp9/common/vp9_reconintra.c | 37 +----- vp9/common/vp9_rtcd_defs.sh | 35 +++--- vp9/common/x86/vp9_recon_wrapper_sse2.c | 8 +- vp9/decoder/vp9_decodframe.c | 2 +- vp9/encoder/vp9_encodeframe.c | 15 +-- vp9/encoder/vp9_encodeintra.c | 34 ++--- vp9/encoder/vp9_encodemb.c | 45 +++---- vp9/encoder/vp9_encodemb.h | 7 -- vp9/encoder/vp9_mbgraph.c | 99 ++++++++------- vp9/encoder/vp9_rdopt.c | 137 +++++++++------------ vp9/encoder/x86/vp9_x86_csystemdependent.c | 8 +- 16 files changed, 212 insertions(+), 335 deletions(-) diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 8d67402c8..9525a0e14 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -290,7 +290,6 @@ typedef struct { } MODE_INFO; typedef struct blockd { - uint8_t *predictor; int16_t *diff; int16_t *dequant; @@ -354,7 +353,6 @@ struct mb_plane { typedef struct macroblockd { DECLARE_ALIGNED(16, int16_t, diff[64*64+32*32*2]); /* from idct diff */ - DECLARE_ALIGNED(16, uint8_t, predictor[384]); // unused for superblocks #if CONFIG_CODE_NONZEROCOUNT DECLARE_ALIGNED(16, uint16_t, nzcs[256+64*2]); #endif diff --git a/vp9/common/vp9_mbpitch.c b/vp9/common/vp9_mbpitch.c index aba950e9a..6ed5f27d9 100644 --- a/vp9/common/vp9_mbpitch.c +++ b/vp9/common/vp9_mbpitch.c @@ -78,7 +78,6 @@ void vp9_setup_block_dptrs(MACROBLOCKD *mb) { const int to = r * 4 + c; const int from = r * 4 * 16 + c * 4; blockd[to].diff = &mb->diff[from]; - blockd[to].predictor = &mb->predictor[from]; } } @@ -87,7 +86,6 @@ void vp9_setup_block_dptrs(MACROBLOCKD *mb) { const int to = 16 + r * 2 + c; const int from = 256 + r * 4 * 8 + c * 4; blockd[to].diff = &mb->diff[from]; - blockd[to].predictor = &mb->predictor[from]; } } @@ -96,7 +94,6 @@ void vp9_setup_block_dptrs(MACROBLOCKD *mb) { const int to = 20 + r * 2 + c; const int from = 320 + r * 4 * 8 + c * 4; blockd[to].diff = &mb->diff[from]; - blockd[to].predictor = &mb->predictor[from]; } } diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c index 0625ccb8e..121776c69 100644 --- a/vp9/common/vp9_recon.c +++ b/vp9/common/vp9_recon.c @@ -32,22 +32,22 @@ static INLINE void recon(int rows, int cols, void vp9_recon_b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride) { - recon(4, 4, pred_ptr, 16, diff_ptr, 16, dst_ptr, stride); + recon(4, 4, pred_ptr, stride, diff_ptr, 16, dst_ptr, stride); } void vp9_recon_uv_b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride) { - recon(4, 4, pred_ptr, 8, diff_ptr, 8, dst_ptr, stride); + recon(4, 4, pred_ptr, stride, diff_ptr, 8, dst_ptr, stride); } void vp9_recon4b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride) { - recon(4, 16, pred_ptr, 16, diff_ptr, 16, dst_ptr, stride); + recon(4, 16, pred_ptr, stride, diff_ptr, 16, dst_ptr, stride); } void vp9_recon2b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride) { - recon(4, 8, pred_ptr, 8, diff_ptr, 8, dst_ptr, stride); + recon(4, 8, pred_ptr, stride, diff_ptr, 8, dst_ptr, stride); } void vp9_recon_sby_s_c(MACROBLOCKD *mb, uint8_t *dst, @@ -95,7 +95,8 @@ void vp9_recon_mby_c(MACROBLOCKD *xd) { for (i = 0; i < 16; i += 4) { BLOCKD *b = &xd->block[i]; - vp9_recon4b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + vp9_recon4b(*(b->base_dst) + b->dst, b->diff, + *(b->base_dst) + b->dst, b->dst_stride); } } @@ -104,13 +105,13 @@ void vp9_recon_mb_c(MACROBLOCKD *xd) { for (i = 0; i < 16; i += 4) { BLOCKD *b = &xd->block[i]; - - vp9_recon4b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + vp9_recon4b(*(b->base_dst) + b->dst, b->diff, + *(b->base_dst) + b->dst, b->dst_stride); } for (i = 16; i < 24; i += 2) { BLOCKD *b = &xd->block[i]; - - vp9_recon2b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + vp9_recon2b(*(b->base_dst) + b->dst, b->diff, + *(b->base_dst) + b->dst, b->dst_stride); } } diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 79ca0b4e5..71be77df1 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -399,7 +399,7 @@ static void build_2x1_inter_predictor_wh(const BLOCKD *d0, const BLOCKD *d1, int row, int col) { struct scale_factors * scale = &s[which_mv]; - assert(d1->predictor - d0->predictor == block_size); + assert(d1->dst - d0->dst == block_size); assert(d1->pre == d0->pre + block_size); scale->set_scaled_offsets(scale, row, col); @@ -446,11 +446,11 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1, int block_size, int stride, int which_mv, int weight, const struct subpix_fn_table *subpix, - int row, int col, int use_dst) { - uint8_t *d0_predictor = use_dst ? *(d0->base_dst) + d0->dst : d0->predictor; - uint8_t *d1_predictor = use_dst ? *(d1->base_dst) + d1->dst : d1->predictor; + int row, int col) { + uint8_t *d0_predictor = *(d0->base_dst) + d0->dst; + uint8_t *d1_predictor = *(d1->base_dst) + d1->dst; struct scale_factors * scale = &s[which_mv]; - stride = use_dst ? d0->dst_stride : stride; + stride = d0->dst_stride; assert(d1_predictor - d0_predictor == block_size); assert(d1->pre == d0->pre + block_size); @@ -1338,8 +1338,7 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *mb, } static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, - int mb_row, int mb_col, - int use_dst) { + int mb_row, int mb_col) { int i; MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; BLOCKD *blockd = xd->block; @@ -1368,8 +1367,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16, which_mv, which_mv ? weight : 0, - &xd->subpix, mb_row * 16 + y, mb_col * 16, - use_dst); + &xd->subpix, mb_row * 16 + y, mb_col * 16); } } } else { @@ -1386,8 +1384,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, build_2x1_inter_predictor(d0, d1, xd->scale_factor, 4, 16, which_mv, which_mv ? weight : 0, &xd->subpix, - mb_row * 16 + y, mb_col * 16 + x, - use_dst); + mb_row * 16 + y, mb_col * 16 + x); } } } @@ -1405,8 +1402,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv, which_mv ? weight : 0, &xd->subpix, - mb_row * 8 + y, mb_col * 8 + x, - use_dst); + mb_row * 8 + y, mb_col * 8 + x); } } } @@ -1493,58 +1489,17 @@ static void build_4x4uvmvs(MACROBLOCKD *xd) { } } -void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd, - uint8_t *dst_y, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_ystride, - int dst_uvstride, - int mb_row, - int mb_col) { - vp9_build_inter16x16_predictors_mby(xd, dst_y, dst_ystride, mb_row, mb_col); - vp9_build_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride, - mb_row, mb_col); -#if CONFIG_COMP_INTERINTRA_PRED - if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { - vp9_build_interintra_16x16_predictors_mb(xd, dst_y, dst_u, dst_v, - dst_ystride, dst_uvstride); - } -#endif -} - void vp9_build_inter_predictors_mb(MACROBLOCKD *xd, int mb_row, int mb_col) { if (xd->mode_info_context->mbmi.mode != SPLITMV) { - // TODO(jingning): to be replaced with vp9_build_inter_predictors_sb() when - // converting buffers from predictors to dst. - vp9_build_inter16x16_predictors_mb(xd, xd->predictor, - &xd->predictor[256], - &xd->predictor[320], 16, 8, - mb_row, mb_col); - + vp9_build_inter_predictors_sb(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16); } else { build_4x4uvmvs(xd); - build_inter4x4_predictors_mb(xd, mb_row, mb_col, 0); + build_inter4x4_predictors_mb(xd, mb_row, mb_col); } } -void vp9_build_inter_predictors_mb_s(MACROBLOCKD *xd, - int mb_row, - int mb_col) { - if (xd->mode_info_context->mbmi.mode != SPLITMV) { - vp9_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride, - mb_row, mb_col); - - } else { - build_4x4uvmvs(xd); - build_inter4x4_predictors_mb(xd, mb_row, mb_col, 1); - } -} /*encoder only*/ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, int mb_row, int mb_col) { @@ -1593,8 +1548,7 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv, which_mv ? weight : 0, - &xd->subpix, mb_row * 8 + y, mb_col * 8 + x, - 0); + &xd->subpix, mb_row * 8 + y, mb_col * 8 + x); } } } diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 068853d1d..533d30466 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -29,14 +29,20 @@ void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, int mb_row, int mb_col); -void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd, - uint8_t *dst_y, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_ystride, - int dst_uvstride, - int mb_row, - int mb_col); +void vp9_build_inter_predictors_sby(MACROBLOCKD *x, + uint8_t *dst_y, + int dst_ystride, + int mb_row, + int mb_col, + BLOCK_SIZE_TYPE bsize); + +void vp9_build_inter_predictors_sbuv(MACROBLOCKD *x, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_uvstride, + int mb_row, + int mb_col, + BLOCK_SIZE_TYPE bsize); void vp9_build_inter_predictors_sb(MACROBLOCKD *mb, int mb_row, int mb_col, @@ -46,10 +52,6 @@ void vp9_build_inter_predictors_mb(MACROBLOCKD *xd, int mb_row, int mb_col); -void vp9_build_inter_predictors_mb_s(MACROBLOCKD *xd, - int mb_row, - int mb_col); - void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, int mb_row, int mb_col); diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 632191183..88c3f191e 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -273,7 +273,8 @@ void vp9_recon_intra_mbuv(MACROBLOCKD *xd) { int i; for (i = 16; i < 24; i += 2) { BLOCKD *b = &xd->block[i]; - vp9_recon2b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + vp9_recon2b(*(b->base_dst) + b->dst, b->diff, + *(b->base_dst) + b->dst, b->dst_stride); } } @@ -758,40 +759,6 @@ void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd, xd->left_available, xd->right_available); } -// TODO(jingning): merge mby and mbuv into the above sby and sbmu functions -void vp9_build_intra_predictors_mby(MACROBLOCKD *xd) { - vp9_build_intra_predictors(xd->dst.y_buffer, xd->dst.y_stride, - xd->predictor, 16, - xd->mode_info_context->mbmi.mode, - 16, 16, - xd->up_available, xd->left_available, - xd->right_available); -} - -void vp9_build_intra_predictors_mbuv_internal(MACROBLOCKD *xd, - uint8_t *upred_ptr, - uint8_t *vpred_ptr, - int uv_stride, - int mode, int bsize) { - vp9_build_intra_predictors(xd->dst.u_buffer, xd->dst.uv_stride, - upred_ptr, uv_stride, mode, - bsize, bsize, - xd->up_available, xd->left_available, - xd->right_available); - vp9_build_intra_predictors(xd->dst.v_buffer, xd->dst.uv_stride, - vpred_ptr, uv_stride, mode, - bsize, bsize, - xd->up_available, xd->left_available, - xd->right_available); -} - -void vp9_build_intra_predictors_mbuv(MACROBLOCKD *xd) { - vp9_build_intra_predictors_mbuv_internal(xd, &xd->predictor[256], - &xd->predictor[320], 8, - xd->mode_info_context->mbmi.uv_mode, - 8); -} - void vp9_intra8x8_predict(MACROBLOCKD *xd, BLOCKD *b, int mode, diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index ae5b7fbc4..f9f2395f3 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -68,11 +68,15 @@ specialize vp9_recon_b prototype void vp9_recon_uv_b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride" specialize vp9_recon_uv_b +# TODO(jingning): The prototype functions in c are modified to enable block-size configurable +# operations. Need to change the sse2 accrodingly. prototype void vp9_recon2b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride" -specialize vp9_recon2b sse2 +specialize vp9_recon2b +# specialize vp9_recon2b sse2 prototype void vp9_recon4b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride" -specialize vp9_recon4b sse2 +specialize vp9_recon4b +# specialize vp9_recon4b sse2 prototype void vp9_recon_mb "struct macroblockd *x" specialize vp9_recon_mb @@ -86,17 +90,14 @@ specialize vp9_recon_sby_s prototype void vp9_recon_sbuv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst, enum BLOCK_SIZE_TYPE bsize" specialize void vp9_recon_sbuv_s +prototype void vp9_build_intra_predictors "uint8_t *src, int src_stride, uint8_t *pred, int y_stride, int mode, int bw, int bh, int up_available, int left_available, int right_available" +specialize void vp9_build_intra_predictors + prototype void vp9_build_intra_predictors_sby_s "struct macroblockd *x, enum BLOCK_SIZE_TYPE bsize" -specialize vp9_build_intra_predictors_sby_s; +specialize vp9_build_intra_predictors_sby_s prototype void vp9_build_intra_predictors_sbuv_s "struct macroblockd *x, enum BLOCK_SIZE_TYPE bsize" -specialize vp9_build_intra_predictors_sbuv_s; - -prototype void vp9_build_intra_predictors_mby "struct macroblockd *x" -specialize vp9_build_intra_predictors_mby; - -prototype void vp9_build_intra_predictors_mbuv "struct macroblockd *x" -specialize vp9_build_intra_predictors_mbuv; +specialize vp9_build_intra_predictors_sbuv_s prototype void vp9_intra4x4_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor, int pre_stride" specialize vp9_intra4x4_predict; @@ -620,16 +621,10 @@ specialize vp9_block_error mmx sse2 vp9_block_error_sse2=vp9_block_error_xmm prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch" -specialize vp9_subtract_b mmx sse2 - -prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch" -specialize vp9_subtract_b mmx sse2 - -prototype void vp9_subtract_mby "int16_t *diff, uint8_t *src, uint8_t *pred, int stride" -specialize vp9_subtract_mby mmx sse2 - -prototype void vp9_subtract_mbuv "int16_t *diff, uint8_t *usrc, uint8_t *vsrc, uint8_t *pred, int stride" -specialize vp9_subtract_mbuv mmx sse2 +# TODO(jingning): The prototype function in c has been changed to remove +# the use of predictor buffer in MACROBLOCKD. Need to modify the mmx and sse2 +# versions accordingly. +specialize vp9_subtract_b # # Structured Similarity (SSIM) diff --git a/vp9/common/x86/vp9_recon_wrapper_sse2.c b/vp9/common/x86/vp9_recon_wrapper_sse2.c index bb7baf8a0..12d2f970c 100644 --- a/vp9/common/x86/vp9_recon_wrapper_sse2.c +++ b/vp9/common/x86/vp9_recon_wrapper_sse2.c @@ -73,15 +73,15 @@ static void build_intra_predictors_mbuv_x86(MACROBLOCKD *xd, } void vp9_build_intra_predictors_mbuv_sse2(MACROBLOCKD *xd) { - build_intra_predictors_mbuv_x86(xd, &xd->predictor[256], - &xd->predictor[320], 8, + build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer, + xd->dst.v_buffer, xd->dst.uv_stride, vp9_intra_pred_uv_tm_sse2, vp9_intra_pred_uv_ho_mmx2); } void vp9_build_intra_predictors_mbuv_ssse3(MACROBLOCKD *xd) { - build_intra_predictors_mbuv_x86(xd, &xd->predictor[256], - &xd->predictor[320], 8, + build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer, + xd->dst.v_buffer, xd->dst.uv_stride, vp9_intra_pred_uv_tm_ssse3, vp9_intra_pred_uv_ho_ssse3); } diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index d3b18d765..20d4f19c0 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -645,7 +645,7 @@ static void decode_mb(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->mode_info_context->mbmi.mode, tx_size, xd->mode_info_context->mbmi.interp_filter); #endif - vp9_build_inter_predictors_mb_s(xd, mb_row, mb_col); + vp9_build_inter_predictors_mb(xd, mb_row, mb_col); } if (xd->mode_info_context->mbmi.mb_skip_coeff) { diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 6f0e8c7f4..8db2796db 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1405,9 +1405,9 @@ static void encode_frame_internal(VP9_COMP *cpi) { MACROBLOCKD *const xd = &x->e_mbd; int totalrate; -// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", -// cpi->common.current_video_frame, cpi->common.show_frame, -// cm->frame_type); +// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", +// cpi->common.current_video_frame, cpi->common.show_frame, +// cm->frame_type); // Compute a modified set of reference frame probabilities to use when // prediction fails. These are based on the current general estimates for @@ -2230,15 +2230,8 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, if (!x->skip) { vp9_encode_inter16x16(cm, x, mb_row, mb_col); - } else { - vp9_build_inter16x16_predictors_mb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride, - mb_row, mb_col); + vp9_build_inter_predictors_sb(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16); #if CONFIG_COMP_INTERINTRA_PRED if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { vp9_build_interintra_16x16_predictors_mb(xd, diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index 355867ba7..bccd22bf9 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -52,7 +52,8 @@ static void encode_intra4x4block(MACROBLOCK *x, int ib) { b->bmi.as_mode.context = vp9_find_bpred_context(&x->e_mbd, b); #endif - vp9_intra4x4_predict(&x->e_mbd, b, b->bmi.as_mode.first, b->predictor, 16); + vp9_intra4x4_predict(&x->e_mbd, b, b->bmi.as_mode.first, + *(b->base_dst) + b->dst, b->dst_stride); vp9_subtract_b(be, b, 16); tx_type = get_tx_type_4x4(&x->e_mbd, ib); @@ -69,7 +70,8 @@ static void encode_intra4x4block(MACROBLOCK *x, int ib) { b->diff, 32); } - vp9_recon_b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + vp9_recon_b(*(b->base_dst) + b->dst, b->diff, + *(b->base_dst) + b->dst, b->dst_stride); } void vp9_encode_intra4x4mby(MACROBLOCK *mb) { @@ -81,12 +83,13 @@ void vp9_encode_intra4x4mby(MACROBLOCK *mb) { void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; - BLOCK *b = &x->block[0]; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - vp9_build_intra_predictors_mby(xd); - - vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride); + vp9_build_intra_predictors_sby_s(xd, BLOCK_SIZE_MB16X16); + vp9_subtract_sby_s_c(x->src_diff, + x->src.y_buffer, x->src.y_stride, + xd->dst.y_buffer, xd->dst.y_stride, + BLOCK_SIZE_MB16X16); switch (tx_size) { case TX_16X16: @@ -119,10 +122,11 @@ void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - vp9_build_intra_predictors_mbuv(xd); - - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - xd->predictor, x->src.uv_stride); + vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16); + vp9_subtract_sbuv_s_c(x->src_diff, + x->src.u_buffer, x->src.v_buffer, x->src.uv_stride, + xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, + BLOCK_SIZE_MB16X16); switch (tx_size) { case TX_4X4: @@ -152,7 +156,8 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { int i; TX_TYPE tx_type; - vp9_intra8x8_predict(xd, b, b->bmi.as_mode.first, b->predictor, 16); + vp9_intra8x8_predict(xd, b, b->bmi.as_mode.first, + *(b->base_dst) + b->dst, b->dst_stride); // generate residual blocks vp9_subtract_4b_c(be, b, 16); @@ -206,7 +211,7 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { // reconstruct submacroblock for (i = 0; i < 4; i++) { b = &xd->block[ib + iblock[i]]; - vp9_recon_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst, + vp9_recon_b_c(*(b->base_dst) + b->dst, b->diff, *(b->base_dst) + b->dst, b->dst_stride); } } @@ -227,7 +232,8 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) { const int block = ib < 20 ? ib - 16 : ib - 20; assert(ib >= 16 && ib < 24); - vp9_intra_uv4x4_predict(&x->e_mbd, b, mode, b->predictor, 8); + vp9_intra_uv4x4_predict(&x->e_mbd, b, mode, + *(b->base_dst) + b->dst, b->dst_stride); vp9_subtract_b(be, b, 8); @@ -236,7 +242,7 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) { vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[plane].eobs[block], dqcoeff, b->diff, 16); - vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst, + vp9_recon_uv_b_c(*(b->base_dst) + b->dst, b->diff, *(b->base_dst) + b->dst, b->dst_stride); } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 6ecaaa509..e786532a1 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -23,8 +23,9 @@ void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) { uint8_t *src_ptr = (*(be->base_src) + be->src); int16_t *diff_ptr = be->src_diff; - uint8_t *pred_ptr = bd->predictor; + uint8_t *pred_ptr = *(bd->base_dst) + bd->dst; int src_stride = be->src_stride; + int dst_stride = bd->dst_stride; int r, c; @@ -33,7 +34,7 @@ void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) { diff_ptr[c] = src_ptr[c] - pred_ptr[c]; diff_ptr += pitch; - pred_ptr += pitch; + pred_ptr += dst_stride; src_ptr += src_stride; } } @@ -41,8 +42,9 @@ void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) { void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) { uint8_t *src_ptr = (*(be->base_src) + be->src); int16_t *diff_ptr = be->src_diff; - uint8_t *pred_ptr = bd->predictor; + uint8_t *pred_ptr = *(bd->base_dst) + bd->dst; int src_stride = be->src_stride; + int dst_stride = bd->dst_stride; int r, c; for (r = 0; r < 8; r++) { @@ -50,7 +52,7 @@ void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) { diff_ptr[c] = src_ptr[c] - pred_ptr[c]; diff_ptr += pitch; - pred_ptr += pitch; + pred_ptr += dst_stride; src_ptr += src_stride; } } @@ -102,25 +104,15 @@ void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc, } } -void vp9_subtract_mby_c(int16_t *diff, uint8_t *src, - uint8_t *pred, int stride) { - vp9_subtract_sby_s_c(diff, src, stride, pred, 16, BLOCK_SIZE_MB16X16); -} - -void vp9_subtract_mbuv_c(int16_t *diff, uint8_t *usrc, - uint8_t *vsrc, uint8_t *pred, int stride) { - uint8_t *upred = pred + 256; - uint8_t *vpred = pred + 320; - - vp9_subtract_sbuv_s_c(diff, usrc, vsrc, stride, upred, vpred, 8, - BLOCK_SIZE_MB16X16); -} - static void subtract_mb(MACROBLOCK *x) { - vp9_subtract_mby(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, - x->src.y_stride); - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); + MACROBLOCKD *xd = &x->e_mbd; + vp9_subtract_sby_s_c(x->src_diff, x->src.y_buffer, x->src.y_stride, + xd->dst.y_buffer, xd->dst.y_stride, + BLOCK_SIZE_MB16X16); + vp9_subtract_sbuv_s_c(x->src_diff, x->src.u_buffer, x->src.v_buffer, + x->src.uv_stride, + xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, + BLOCK_SIZE_MB16X16); } void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { @@ -920,11 +912,12 @@ void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x, /* this function is used by first pass only */ void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col) { MACROBLOCKD *xd = &x->e_mbd; - BLOCK *b = &x->block[0]; - vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col); - - vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride); + vp9_build_inter_predictors_sby(xd, xd->dst.y_buffer, xd->dst.y_stride, + mb_row, mb_col, BLOCK_SIZE_MB16X16); + vp9_subtract_sby_s_c(x->src_diff, x->src.y_buffer, x->src.y_stride, + xd->dst.y_buffer, xd->dst.y_stride, + BLOCK_SIZE_MB16X16); vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16); vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16); diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 76fb0f7cd..3c0d760a1 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -58,13 +58,6 @@ void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch); -void vp9_subtract_mbuv_s_c(int16_t *diff, const uint8_t *usrc, - const uint8_t *vsrc, int src_stride, - const uint8_t *upred, - const uint8_t *vpred, int dst_stride); -void vp9_subtract_mby_s_c(int16_t *diff, const uint8_t *src, - int src_stride, const uint8_t *pred, - int dst_stride); void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride, const uint8_t *pred, int dst_stride, BLOCK_SIZE_TYPE bsize); diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 715d68377..e9da395bc 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -71,9 +71,10 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, } vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv); - vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col); - best_err = vp9_sad16x16(xd->dst.y_buffer, xd->dst.y_stride, - xd->predictor, 16, INT_MAX); + vp9_build_inter_predictors_sby(xd, xd->dst.y_buffer, xd->dst.y_stride, + mb_row, mb_col, BLOCK_SIZE_MB16X16); + best_err = vp9_sad16x16(x->src.y_buffer, x->src.y_stride, + xd->dst.y_buffer, xd->dst.y_stride, INT_MAX); /* restore UMV window */ x->mv_col_min = tmp_col_min; @@ -105,21 +106,19 @@ static int do_16x16_motion_search BLOCKD *d = &xd->block[n]; BLOCK *b = &x->block[n]; - b->base_src = &buf->y_buffer; - b->src_stride = buf->y_stride; - b->src = buf->y_stride * (n & 12) + (n & 3) * 4 + buf_mb_y_offset; + b->base_src = &x->src.y_buffer; + b->src_stride = x->src.y_stride; + b->src = x->src.y_stride * (n & 12) + (n & 3) * 4; - d->base_pre = &ref->y_buffer; - d->pre_stride = ref->y_stride; - d->pre = ref->y_stride * (n & 12) + (n & 3) * 4 + mb_y_offset; + d->base_pre = &xd->pre.y_buffer; + d->pre_stride = xd->pre.y_stride; + d->pre = xd->pre.y_stride * (n & 12) + (n & 3) * 4; } // Try zero MV first // FIXME should really use something like near/nearest MV and/or MV prediction - xd->pre.y_buffer = ref->y_buffer + mb_y_offset; - xd->pre.y_stride = ref->y_stride; - err = vp9_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride, - xd->dst.y_buffer, xd->dst.y_stride, INT_MAX); + err = vp9_sad16x16(x->src.y_buffer, x->src.y_stride, + xd->pre.y_buffer, xd->pre.y_stride, INT_MAX); dst_mv->as_int = 0; // Test last reference frame using the previous best mv as the @@ -159,27 +158,11 @@ static int do_16x16_zerozero_search MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; unsigned int err; - int n; - - for (n = 0; n < 16; n++) { - BLOCKD *d = &xd->block[n]; - BLOCK *b = &x->block[n]; - - b->base_src = &buf->y_buffer; - b->src_stride = buf->y_stride; - b->src = buf->y_stride * (n & 12) + (n & 3) * 4 + buf_mb_y_offset; - - d->base_pre = &ref->y_buffer; - d->pre_stride = ref->y_stride; - d->pre = ref->y_stride * (n & 12) + (n & 3) * 4 + mb_y_offset; - } // Try zero MV first // FIXME should really use something like near/nearest MV and/or MV prediction - xd->pre.y_buffer = ref->y_buffer + mb_y_offset; - xd->pre.y_stride = ref->y_stride; - err = vp9_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride, - xd->dst.y_buffer, xd->dst.y_stride, INT_MAX); + err = vp9_sad16x16(x->src.y_buffer, x->src.y_stride, + xd->pre.y_buffer, xd->pre.y_stride, INT_MAX); dst_mv->as_int = 0; @@ -201,11 +184,19 @@ static int find_best_16x16_intra // we're intentionally not doing 4x4, we just want a rough estimate for (mode = DC_PRED; mode <= TM_PRED; mode++) { unsigned int err; + const int bwl = b_width_log2(BLOCK_SIZE_MB16X16), bw = 4 << bwl; + const int bhl = b_height_log2(BLOCK_SIZE_MB16X16), bh = 4 << bhl; xd->mode_info_context->mbmi.mode = mode; - vp9_build_intra_predictors_mby(xd); - err = vp9_sad16x16(xd->predictor, 16, buf->y_buffer + mb_y_offset, - buf->y_stride, best_err); + vp9_build_intra_predictors(x->src.y_buffer, x->src.y_stride, + xd->dst.y_buffer, xd->dst.y_stride, + xd->mode_info_context->mbmi.mode, + bw, bh, + xd->up_available, xd->left_available, + xd->right_available); + err = vp9_sad16x16(x->src.y_buffer, x->src.y_stride, + xd->dst.y_buffer, xd->dst.y_stride, best_err); + // find best if (err < best_err) { best_err = err; @@ -237,23 +228,32 @@ static void update_mbgraph_mb_stats MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; int intra_error; + VP9_COMMON *cm = &cpi->common; // FIXME in practice we're completely ignoring chroma here - xd->dst.y_buffer = buf->y_buffer + mb_y_offset; + x->src.y_buffer = buf->y_buffer + mb_y_offset; + x->src.y_stride = buf->y_stride; + + xd->dst.y_buffer = cm->yv12_fb[cm->new_fb_idx].y_buffer + mb_y_offset; + xd->dst.y_stride = cm->yv12_fb[cm->new_fb_idx].y_stride; // do intra 16x16 prediction - intra_error = find_best_16x16_intra(cpi, buf, mb_y_offset, &stats->ref[INTRA_FRAME].m.mode); + intra_error = find_best_16x16_intra(cpi, buf, mb_y_offset, + &stats->ref[INTRA_FRAME].m.mode); if (intra_error <= 0) intra_error = 1; stats->ref[INTRA_FRAME].err = intra_error; // Golden frame MV search, if it exists and is different than last frame if (golden_ref) { - int g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv, - &stats->ref[GOLDEN_FRAME].m.mv, - buf, mb_y_offset, - golden_ref, gld_y_offset, - mb_row, mb_col); + int g_motion_error; + xd->pre.y_buffer = golden_ref->y_buffer + mb_y_offset; + xd->pre.y_stride = golden_ref->y_stride; + g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv, + &stats->ref[GOLDEN_FRAME].m.mv, + buf, mb_y_offset, + golden_ref, gld_y_offset, + mb_row, mb_col); stats->ref[GOLDEN_FRAME].err = g_motion_error; } else { stats->ref[GOLDEN_FRAME].err = INT_MAX; @@ -262,16 +262,13 @@ static void update_mbgraph_mb_stats // Alt-ref frame MV search, if it exists and is different than last/golden frame if (alt_ref) { - // int a_motion_error = do_16x16_motion_search(cpi, prev_alt_ref_mv, - // &stats->ref[ALTREF_FRAME].m.mv, - // buf, mb_y_offset, - // alt_ref, arf_y_offset); - - int a_motion_error = - do_16x16_zerozero_search(cpi, - &stats->ref[ALTREF_FRAME].m.mv, - buf, mb_y_offset, - alt_ref, arf_y_offset); + int a_motion_error; + xd->pre.y_buffer = alt_ref->y_buffer + mb_y_offset; + xd->pre.y_stride = alt_ref->y_stride; + a_motion_error = do_16x16_zerozero_search(cpi, + &stats->ref[ALTREF_FRAME].m.mv, + buf, mb_y_offset, + alt_ref, arf_y_offset); stats->ref[ALTREF_FRAME].err = a_motion_error; } else { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 2f29b1dc3..73c126912 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -638,15 +638,6 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, rd[TX_4X4][1] : rd[TX_8X8][1]; } -static void copy_predictor(uint8_t *dst, const uint8_t *predictor) { - const unsigned int *p = (const unsigned int *)predictor; - unsigned int *d = (unsigned int *)dst; - d[0] = p[0]; - d[4] = p[4]; - d[8] = p[8]; - d[12] = p[12]; -} - static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size, int shift) { int i; @@ -849,13 +840,7 @@ static void super_block_yrd(VP9_COMP *cpi, uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer; int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; - // FIXME(rbultje): mb code still predicts into xd->predictor - if (bs == BLOCK_SIZE_MB16X16) { - vp9_subtract_mby(x->src_diff, src, xd->predictor, src_y_stride); - } else { - vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride, - bs); - } + vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride, bs); if (bs >= BLOCK_SIZE_SB32X32) super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], @@ -892,7 +877,6 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, * a temp buffer that meets the stride requirements, but we are only * interested in the left 4x4 block * */ - DECLARE_ALIGNED_ARRAY(16, uint8_t, best_predictor, 16 * 4); DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16); assert(ib < 16); @@ -922,7 +906,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, rate = bmode_costs[mode]; #endif - vp9_intra4x4_predict(xd, b, mode, b->predictor, 16); + vp9_intra4x4_predict(xd, b, mode, *(b->base_dst) + b->dst, b->dst_stride); vp9_subtract_b(be, b, 16); b->bmi.as_mode.first = mode; @@ -956,7 +940,6 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, best_tx_type = tx_type; *a = tempa; *l = templ; - copy_predictor(best_predictor, b->predictor); vpx_memcpy(best_dqcoeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 32); } } @@ -968,7 +951,10 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, else xd->inv_txm4x4(best_dqcoeff, b->diff, 32); - vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + vp9_intra4x4_predict(xd, b, *best_mode, + *(b->base_dst) + b->dst, b->dst_stride); + vp9_recon_b(*(b->base_dst) + b->dst, b->diff, + *(b->base_dst) + b->dst, b->dst_stride); return best_rd; } @@ -1063,11 +1049,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t local_txfm_cache[NB_TXFM_MODES]; x->e_mbd.mode_info_context->mbmi.mode = mode; - if (bsize == BLOCK_SIZE_MB16X16) { - vp9_build_intra_predictors_mby(&x->e_mbd); - } else { - vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize); - } + vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize); super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, bsize, local_txfm_cache); @@ -1129,7 +1111,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, rate = mode_costs[mode]; b->bmi.as_mode.first = mode; - vp9_intra8x8_predict(xd, b, mode, b->predictor, 16); + vp9_intra8x8_predict(xd, b, mode, *(b->base_dst) + b->dst, b->dst_stride); vp9_subtract_4b_c(be, b, 16); @@ -1543,14 +1525,8 @@ static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - // FIXME(rbultje): mb code still predicts into xd->predictor - if (bsize == BLOCK_SIZE_MB16X16) { - vp9_subtract_mbuv(x->src_diff, usrc, vsrc, xd->predictor, - x->src.uv_stride); - } else { - vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, - udst, vdst, dst_uv_stride, bsize); - } + vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, + udst, vdst, dst_uv_stride, bsize); if (mbmi->txfm_size >= TX_32X32 && bsize >= BLOCK_SIZE_SB64X64) { super_block_uvrd_32x32(cm, x, rate, distortion, skippable, bsize); @@ -1576,10 +1552,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, for (mode = DC_PRED; mode <= TM_PRED; mode++) { x->e_mbd.mode_info_context->mbmi.uv_mode = mode; - if (bsize == BLOCK_SIZE_MB16X16) - vp9_build_intra_predictors_mbuv(&x->e_mbd); - else - vp9_build_intra_predictors_sbuv_s(&x->e_mbd, bsize); + vp9_build_intra_predictors_sbuv_s(&x->e_mbd, bsize); super_block_uvrd(&cpi->common, x, &this_rate_tokenonly, &this_distortion, &s, bsize); @@ -1759,7 +1732,8 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, vp9_build_inter_predictor(*(bd->base_pre) + bd->pre, bd->pre_stride, - bd->predictor, 16, + *(bd->base_dst) + bd->dst, + bd->dst_stride, &bd->bmi.as_mv[0], &xd->scale_factor[0], 4, 4, 0 /* no avg */, &xd->subpix); @@ -1769,7 +1743,8 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, // weighting for splitmv modes is turned on. if (xd->mode_info_context->mbmi.second_ref_frame > 0) { vp9_build_inter_predictor( - *(bd->base_second_pre) + bd->pre, bd->pre_stride, bd->predictor, 16, + *(bd->base_second_pre) + bd->pre, bd->pre_stride, + *(bd->base_dst) + bd->dst, bd->dst_stride, &bd->bmi.as_mv[1], &xd->scale_factor[1], 4, 4, 1 << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT) /* avg */, &xd->subpix); @@ -1834,7 +1809,8 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, // implicit-compoundinter-weight experiment when implicit // weighting for splitmv modes is turned on. vp9_build_inter_predictor( - *base_pre + bd->pre, bd->pre_stride, bd->predictor, 16, + *base_pre + bd->pre, bd->pre_stride, + *(bd->base_dst) + bd->dst, bd->dst_stride, &bd->bmi.as_mv[which_mv], &xd->scale_factor[which_mv], 8, 8, which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix); @@ -3144,23 +3120,20 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, unsigned int sse, var; int tmp_rate_y, tmp_rate_u, tmp_rate_v; int tmp_dist_y, tmp_dist_u, tmp_dist_v; - vp9_build_inter16x16_predictors_mb(xd, xd->predictor, - xd->predictor + 256, - xd->predictor + 320, - 16, 8, mb_row, mb_col); + vp9_build_inter_predictors_sb(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16); var = vp9_variance16x16(*(b->base_src), b->src_stride, - xd->predictor, 16, &sse); + xd->dst.y_buffer, xd->dst.y_stride, &sse); // Note our transform coeffs are 8 times an orthogonal transform. // Hence quantizer step is also 8 times. To get effective quantizer // we need to divide by 8 before sending to modeling function. model_rd_from_var_lapndz(var, 16 * 16, xd->block[0].dequant[1] >> 3, &tmp_rate_y, &tmp_dist_y); var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride, - &xd->predictor[256], 8, &sse); + xd->dst.u_buffer, xd->dst.uv_stride, &sse); model_rd_from_var_lapndz(var, 8 * 8, xd->block[16].dequant[1] >> 3, &tmp_rate_u, &tmp_dist_u); var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride, - &xd->predictor[320], 8, &sse); + xd->dst.v_buffer, xd->dst.uv_stride, &sse); model_rd_from_var_lapndz(var, 8 * 8, xd->block[20].dequant[1] >> 3, &tmp_rate_v, &tmp_dist_v); rd = RDCOST(x->rdmult, x->rddiv, @@ -3184,9 +3157,19 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || (cm->mcomp_filter_type != SWITCHABLE && cm->mcomp_filter_type == mbmi->interp_filter)) { - vpx_memcpy(tmp_ybuf, xd->predictor, sizeof(unsigned char) * 256); - vpx_memcpy(tmp_ubuf, xd->predictor + 256, sizeof(unsigned char) * 64); - vpx_memcpy(tmp_vbuf, xd->predictor + 320, sizeof(unsigned char) * 64); + int i; + for (i = 0; i < 16 * bh; ++i) + vpx_memcpy(tmp_ybuf + i * 16 * bw, + xd->dst.y_buffer + i * xd->dst.y_stride, + sizeof(unsigned char) * 16 * bw); + for (i = 0; i < 8 * bh; ++i) + vpx_memcpy(tmp_ubuf + i * 8 * bw, + xd->dst.u_buffer + i * xd->dst.uv_stride, + sizeof(unsigned char) * 8 * bw); + for (i = 0; i < 8 * bh; ++i) + vpx_memcpy(tmp_vbuf + i * 8 * bw, + xd->dst.v_buffer + i * xd->dst.uv_stride, + sizeof(unsigned char) * 8 * bw); pred_exists = 1; } interpolating_intpel_seen |= @@ -3203,32 +3186,19 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (pred_exists) { // FIXME(rbultje): mb code still predicts into xd->predictor - if (bsize != BLOCK_SIZE_MB16X16) { - for (i = 0; i < bh * 16; ++i) - vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, - tmp_ybuf + i * bw * 16, sizeof(unsigned char) * bw * 16); - for (i = 0; i < bh * 8; ++i) - vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, - tmp_ubuf + i * bw * 8, sizeof(unsigned char) * bw * 8); - for (i = 0; i < bh * 8; ++i) - vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, - tmp_vbuf + i * bw * 8, sizeof(unsigned char) * bw * 8); - } else { - vpx_memcpy(xd->predictor, tmp_ybuf, sizeof(unsigned char) * 256); - vpx_memcpy(xd->predictor + 256, tmp_ubuf, sizeof(unsigned char) * 64); - vpx_memcpy(xd->predictor + 320, tmp_vbuf, sizeof(unsigned char) * 64); - } + for (i = 0; i < bh * 16; ++i) + vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, + tmp_ybuf + i * bw * 16, sizeof(unsigned char) * bw * 16); + for (i = 0; i < bh * 8; ++i) + vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, + tmp_ubuf + i * bw * 8, sizeof(unsigned char) * bw * 8); + for (i = 0; i < bh * 8; ++i) + vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, + tmp_vbuf + i * bw * 8, sizeof(unsigned char) * bw * 8); } else { // Handles the special case when a filter that is not in the // switchable list (ex. bilinear, 6-tap) is indicated at the frame level - if (bsize > BLOCK_SIZE_MB16X16) { - vp9_build_inter_predictors_sb(xd, mb_row, mb_col, bsize); - } else { - vp9_build_inter16x16_predictors_mb(xd, xd->predictor, - xd->predictor + 256, - xd->predictor + 320, - 16, 8, mb_row, mb_col); - } + vp9_build_inter_predictors_sb(xd, mb_row, mb_col, bsize); } if (cpi->common.mcomp_filter_type == SWITCHABLE) { @@ -3253,7 +3223,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, &sse); } else { var = vp9_variance16x16(*(b->base_src), b->src_stride, - xd->predictor, 16, &sse); + xd->dst.y_buffer, xd->dst.y_stride, &sse); } if ((int)sse < threshold) { @@ -3278,9 +3248,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } else { unsigned int sse2u, sse2v; var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride, - xd->predictor + 256, 8, &sse2u); + xd->dst.u_buffer, xd->dst.uv_stride, &sse2u); var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride, - xd->predictor + 320, 8, &sse2v); + xd->dst.v_buffer, xd->dst.uv_stride, &sse2v); sse2 = sse2u + sse2v; } @@ -3614,7 +3584,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, case TM_PRED: mbmi->ref_frame = INTRA_FRAME; // FIXME compound intra prediction - vp9_build_intra_predictors_mby(&x->e_mbd); + vp9_build_intra_predictors_sby_s(&x->e_mbd, BLOCK_SIZE_MB16X16); + // vp9_build_intra_predictors_mby(&x->e_mbd); super_block_yrd(cpi, x, &rate_y, &distortion, &skippable, BLOCK_SIZE_MB16X16, txfm_cache); rate2 += rate_y; @@ -3790,8 +3761,14 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int uv_skippable; vp9_build_inter4x4_predictors_mbuv(&x->e_mbd, mb_row, mb_col); - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); + + vp9_subtract_sbuv_s_c(x->src_diff, + x->src.u_buffer, + x->src.v_buffer, x->src.uv_stride, + xd->dst.u_buffer, + xd->dst.v_buffer, xd->dst.uv_stride, + BLOCK_SIZE_MB16X16); + super_block_uvrd_4x4(cm, x, &rate_uv, &distortion_uv, &uv_skippable, BLOCK_SIZE_MB16X16); rate2 += rate_uv; diff --git a/vp9/encoder/x86/vp9_x86_csystemdependent.c b/vp9/encoder/x86/vp9_x86_csystemdependent.c index 310f0d99d..04383fcb4 100644 --- a/vp9/encoder/x86/vp9_x86_csystemdependent.c +++ b/vp9/encoder/x86/vp9_x86_csystemdependent.c @@ -30,7 +30,9 @@ void vp9_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) { unsigned char *z = *(be->base_src) + be->src; unsigned int src_stride = be->src_stride; short *diff = &be->src_diff[0]; - unsigned char *predictor = &bd->predictor[0]; + unsigned char *predictor = *(bd->base_dst) + bd->dst; + // TODO(jingning): The prototype function in c has been changed. Need to + // modify the mmx and sse versions. vp9_subtract_b_mmx_impl(z, src_stride, diff, predictor, pitch); } @@ -44,7 +46,9 @@ void vp9_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) { unsigned char *z = *(be->base_src) + be->src; unsigned int src_stride = be->src_stride; short *diff = &be->src_diff[0]; - unsigned char *predictor = &bd->predictor[0]; + unsigned char *predictor = *(bd->base_dst) + bd->dst; + // TODO(jingning): The prototype function in c has been changed. Need to + // modify the mmx and sse versions. vp9_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch); }