From b6b91432188a4773456b31a38194799b7b71e9c6 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Thu, 7 Nov 2013 14:56:58 -0800 Subject: [PATCH] Dual buffer encoding for intra modes Overall change (using dual buffer scheme for superblocks of both inter and intra modes) reduces speed 2 runtime: bluesky_1080p at 6000kbps: 263553ms -> 257441ms riverbed_1080p at 8000kbps: 233230ms -> 225308ms. Change-Id: Idf8d70f768a4b0d97b2a8506372c57b7b4022119 --- vp9/encoder/vp9_block.h | 17 +++--- vp9/encoder/vp9_encodeframe.c | 13 ++++- vp9/encoder/vp9_encodemb.c | 102 +++++++++++++++++++--------------- vp9/encoder/vp9_firstpass.c | 1 + vp9/encoder/vp9_onyx_if.c | 4 +- vp9/encoder/vp9_rdopt.c | 59 +++++++++++++++----- 6 files changed, 124 insertions(+), 72 deletions(-) diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 6427f7f36..3c98f7d5f 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -27,16 +27,16 @@ typedef struct { typedef struct { MODE_INFO mic; uint8_t *zcoeff_blk; - int16_t *coeff[MAX_MB_PLANE][2]; - int16_t *qcoeff[MAX_MB_PLANE][2]; - int16_t *dqcoeff[MAX_MB_PLANE][2]; - uint16_t *eobs[MAX_MB_PLANE][2]; + int16_t *coeff[MAX_MB_PLANE][3]; + int16_t *qcoeff[MAX_MB_PLANE][3]; + int16_t *dqcoeff[MAX_MB_PLANE][3]; + uint16_t *eobs[MAX_MB_PLANE][3]; // dual buffer pointers, 0: in use, 1: best in store - int16_t *coeff_pbuf[MAX_MB_PLANE][2]; - int16_t *qcoeff_pbuf[MAX_MB_PLANE][2]; - int16_t *dqcoeff_pbuf[MAX_MB_PLANE][2]; - uint16_t *eobs_pbuf[MAX_MB_PLANE][2]; + int16_t *coeff_pbuf[MAX_MB_PLANE][3]; + int16_t *qcoeff_pbuf[MAX_MB_PLANE][3]; + int16_t *dqcoeff_pbuf[MAX_MB_PLANE][3]; + uint16_t *eobs_pbuf[MAX_MB_PLANE][3]; int is_coded; int num_4x4_blk; @@ -94,6 +94,7 @@ struct macroblock { MACROBLOCKD e_mbd; int skip_block; int select_txfm_size; + int skip_recode; int skip_optimize; int q_index; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index e9b68cc03..3e75f3b28 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -377,6 +377,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, const int mis = cm->mode_info_stride; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; + int max_plane; assert(mi->mbmi.mode < MB_MODE_COUNT); assert(mi->mbmi.ref_frame[0] < MAX_REF_FRAMES); @@ -385,13 +386,21 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, *mi_addr = *mi; - for (i = 0; i < MAX_MB_PLANE; ++i) { + max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1; + for (i = 0; i < max_plane; ++i) { p[i].coeff = ctx->coeff_pbuf[i][1]; pd[i].qcoeff = ctx->qcoeff_pbuf[i][1]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; pd[i].eobs = ctx->eobs_pbuf[i][1]; } + for (i = max_plane; i < MAX_MB_PLANE; ++i) { + p[i].coeff = ctx->coeff_pbuf[i][2]; + pd[i].qcoeff = ctx->qcoeff_pbuf[i][2]; + pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; + pd[i].eobs = ctx->eobs_pbuf[i][2]; + } + // Restore the coding context of the MB to that that was in place // when the mode was picked for it for (y = 0; y < mi_height; y++) @@ -619,6 +628,7 @@ static void pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, pd[i].eobs = ctx->eobs_pbuf[i][0]; } ctx->is_coded = 0; + x->skip_recode = 0; // Set to zero to make sure we do not use the previous encoded frame stats xd->mi_8x8[0]->mbmi.skip_coeff = 0; @@ -2406,6 +2416,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, const int mis = cm->mode_info_stride; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; + x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8; x->skip_optimize = ctx->is_coded; ctx->is_coded = 1; x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 7afed26d6..a73c21a42 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -432,19 +432,18 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, // TODO(jingning): per transformed block zero forcing only enabled for // luma component. will integrate chroma components as well. if (x->zcoeff_blk[tx_size][block] && plane == 0) { - int i, k; + int i, j; pd->eobs[block] = 0; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &k); + txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); ctx->ta[plane][i] = 0; - ctx->tl[plane][k] = 0; + ctx->tl[plane][j] = 0; return; } - if (x->select_txfm_size || xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) + if (!x->skip_recode) vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); - if (x->optimize && (x->select_txfm_size || - xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8|| !x->skip_optimize)) { + if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx); } else { int i, k; @@ -515,10 +514,10 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { struct optimize_ctx ctx; struct encode_b_args arg = {x, &ctx}; - if (x->select_txfm_size || xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) + if (!x->skip_recode) vp9_subtract_sb(x, bsize); - if (x->optimize) { + if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { int i; for (i = 0; i < MAX_MB_PLANE; ++i) optimize_init_b(i, bsize, &arg); @@ -563,19 +562,22 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, xoff = 32 * (block & twmask); yoff = 32 * (block >> twl); dst = pd->dst.buf + yoff * pd->dst.stride + xoff; - src = p->src.buf + yoff * p->src.stride + xoff; - src_diff = p->src_diff + 4 * bw * yoff + xoff; vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode, dst, pd->dst.stride, dst, pd->dst.stride); - vp9_subtract_block(32, 32, src_diff, bw * 4, - src, p->src.stride, dst, pd->dst.stride); - if (x->use_lp32x32fdct) - vp9_fdct32x32_rd(src_diff, coeff, bw * 4); - else - vp9_fdct32x32(src_diff, coeff, bw * 4); - vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan, iscan); + + if (!x->skip_recode) { + src = p->src.buf + yoff * p->src.stride + xoff; + src_diff = p->src_diff + 4 * bw * yoff + xoff; + vp9_subtract_block(32, 32, src_diff, bw * 4, + src, p->src.stride, dst, pd->dst.stride); + if (x->use_lp32x32fdct) + vp9_fdct32x32_rd(src_diff, coeff, bw * 4); + else + vp9_fdct32x32(src_diff, coeff, bw * 4); + vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, + p->quant, p->quant_shift, qcoeff, dqcoeff, + pd->dequant, p->zbin_extra, eob, scan, iscan); + } if (!x->skip_encode && *eob) vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob); break; @@ -588,16 +590,18 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, xoff = 16 * (block & twmask); yoff = 16 * (block >> twl); dst = pd->dst.buf + yoff * pd->dst.stride + xoff; - src = p->src.buf + yoff * p->src.stride + xoff; - src_diff = p->src_diff + 4 * bw * yoff + xoff; vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode, dst, pd->dst.stride, dst, pd->dst.stride); - vp9_subtract_block(16, 16, src_diff, bw * 4, - src, p->src.stride, dst, pd->dst.stride); - vp9_fht16x16(tx_type, src_diff, coeff, bw * 4); - vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan, iscan); + if (!x->skip_recode) { + src = p->src.buf + yoff * p->src.stride + xoff; + src_diff = p->src_diff + 4 * bw * yoff + xoff; + vp9_subtract_block(16, 16, src_diff, bw * 4, + src, p->src.stride, dst, pd->dst.stride); + vp9_fht16x16(tx_type, src_diff, coeff, bw * 4); + vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, + p->quant, p->quant_shift, qcoeff, dqcoeff, + pd->dequant, p->zbin_extra, eob, scan, iscan); + } if (!x->skip_encode && *eob) vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); break; @@ -610,16 +614,18 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, xoff = 8 * (block & twmask); yoff = 8 * (block >> twl); dst = pd->dst.buf + yoff * pd->dst.stride + xoff; - src = p->src.buf + yoff * p->src.stride + xoff; - src_diff = p->src_diff + 4 * bw * yoff + xoff; vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode, dst, pd->dst.stride, dst, pd->dst.stride); - vp9_subtract_block(8, 8, src_diff, bw * 4, - src, p->src.stride, dst, pd->dst.stride); - vp9_fht8x8(tx_type, src_diff, coeff, bw * 4); - vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, - p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan, iscan); + if (!x->skip_recode) { + src = p->src.buf + yoff * p->src.stride + xoff; + src_diff = p->src_diff + 4 * bw * yoff + xoff; + vp9_subtract_block(8, 8, src_diff, bw * 4, + src, p->src.stride, dst, pd->dst.stride); + vp9_fht8x8(tx_type, src_diff, coeff, bw * 4); + vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, + p->quant_shift, qcoeff, dqcoeff, + pd->dequant, p->zbin_extra, eob, scan, iscan); + } if (!x->skip_encode && *eob) vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); break; @@ -635,19 +641,23 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, xoff = 4 * (block & twmask); yoff = 4 * (block >> twl); dst = pd->dst.buf + yoff * pd->dst.stride + xoff; - src = p->src.buf + yoff * p->src.stride + xoff; - src_diff = p->src_diff + 4 * bw * yoff + xoff; vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, dst, pd->dst.stride, dst, pd->dst.stride); - vp9_subtract_block(4, 4, src_diff, bw * 4, - src, p->src.stride, dst, pd->dst.stride); - if (tx_type != DCT_DCT) - vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type); - else - x->fwd_txm4x4(src_diff, coeff, bw * 4); - vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, - p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan, iscan); + + if (!x->skip_recode) { + src = p->src.buf + yoff * p->src.stride + xoff; + src_diff = p->src_diff + 4 * bw * yoff + xoff; + vp9_subtract_block(4, 4, src_diff, bw * 4, + src, p->src.stride, dst, pd->dst.stride); + if (tx_type != DCT_DCT) + vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type); + else + x->fwd_txm4x4(src_diff, coeff, bw * 4); + vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, + p->quant_shift, qcoeff, dqcoeff, + pd->dequant, p->zbin_extra, eob, scan, iscan); + } + if (!x->skip_encode && *eob) { if (tx_type == DCT_DCT) // this is like vp9_short_idct4x4 but has a special case around eob<=1 diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index b5428df28..974c300e6 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -535,6 +535,7 @@ void vp9_first_pass(VP9_COMP *cpi) { pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; pd[i].eobs = ctx->eobs_pbuf[i][1]; } + x->skip_recode = 0; // Initialise the MV cost table to the defaults diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 8a9834310..66919de24 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -1452,7 +1452,7 @@ static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk, CHECK_MEM_ERROR(cm, ctx->zcoeff_blk, vpx_calloc(num_4x4_blk, sizeof(uint8_t))); for (i = 0; i < MAX_MB_PLANE; ++i) { - for (k = 0; k < 2; ++k) { + for (k = 0; k < 3; ++k) { CHECK_MEM_ERROR(cm, ctx->coeff[i][k], vpx_memalign(16, num_pix * sizeof(int16_t))); CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k], @@ -1474,7 +1474,7 @@ static void free_mode_context(PICK_MODE_CONTEXT *ctx) { vpx_free(ctx->zcoeff_blk); ctx->zcoeff_blk = 0; for (i = 0; i < MAX_MB_PLANE; ++i) { - for (k = 0; k < 2; ++k) { + for (k = 0; k < 3; ++k) { vpx_free(ctx->coeff[i][k]); ctx->coeff[i][k] = 0; vpx_free(ctx->qcoeff[i][k]); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 695a2e249..78cb06bc5 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -246,7 +246,8 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { vp9_set_speed_features(cpi); - cpi->mb.select_txfm_size = cpi->sf.tx_size_search_method == USE_LARGESTALL ? + cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && + cm->frame_type != KEY_FRAME) ? 0 : 1; set_block_thresholds(cpi); @@ -1329,6 +1330,7 @@ static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x, } static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, + PICK_MODE_CONTEXT *ctx, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, BLOCK_SIZE bsize) { @@ -1364,6 +1366,27 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, *rate_tokenonly = this_rate_tokenonly; *distortion = this_distortion; *skippable = s; + if (!x->select_txfm_size) { + int i; + struct macroblock_plane *const p = x->plane; + struct macroblockd_plane *const pd = x->e_mbd.plane; + for (i = 1; i < MAX_MB_PLANE; ++i) { + p[i].coeff = ctx->coeff_pbuf[i][2]; + pd[i].qcoeff = ctx->qcoeff_pbuf[i][2]; + pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; + pd[i].eobs = ctx->eobs_pbuf[i][2]; + + ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0]; + ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0]; + ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0]; + ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0]; + + ctx->coeff_pbuf[i][0] = p[i].coeff; + ctx->qcoeff_pbuf[i][0] = pd[i].qcoeff; + ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff; + ctx->eobs_pbuf[i][0] = pd[i].eobs; + } + } } } @@ -1389,8 +1412,9 @@ static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x, return this_rd; } -static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE bsize, - int *rate_uv, int *rate_uv_tokenonly, +static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, + BLOCK_SIZE bsize, int *rate_uv, + int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv, MB_PREDICTION_MODE *mode_uv) { MACROBLOCK *const x = &cpi->mb; @@ -1403,7 +1427,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE bsize, // Else do a proper rd search for each possible transform size that may // be considered in the main rd loop. } else { - rd_pick_intra_sbuv_mode(cpi, x, + rd_pick_intra_sbuv_mode(cpi, x, ctx, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); } @@ -3033,12 +3057,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, return this_rd; // if 0, this will be re-calculated by caller } -static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { - int i; +static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, + int max_plane) { struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = x->e_mbd.plane; + int i; - for (i = 0; i < MAX_MB_PLANE; ++i) { + for (i = 0; i < max_plane; ++i) { p[i].coeff = ctx->coeff_pbuf[i][1]; pd[i].qcoeff = ctx->qcoeff_pbuf[i][1]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; @@ -3075,7 +3100,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = INT_MAX; return; } - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, + rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip, bsize); } else { y_skip = 0; @@ -3084,7 +3109,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = INT_MAX; return; } - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, + rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip, BLOCK_8X8); } @@ -3450,7 +3475,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, uv_tx = MIN(mbmi->tx_size, max_uv_txsize_lookup[bsize]); if (rate_uv_intra[uv_tx] == INT_MAX) { - choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[uv_tx], + choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]); @@ -3584,6 +3609,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Did this mode help.. i.e. is it the new best mode if (this_rd < best_rd || x->skip) { + int max_plane = MAX_MB_PLANE; if (!mode_excluded) { // Note index of best mode so far best_mode_index = mode_index; @@ -3591,6 +3617,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (ref_frame == INTRA_FRAME) { /* required for left and above block mv */ mbmi->mv[0].as_int = 0; + max_plane = 1; } *returnrate = rate2; @@ -3599,7 +3626,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, best_mbmode = *mbmi; best_skip2 = this_skip2; if (!x->select_txfm_size) - swap_block_ptr(x, ctx); + swap_block_ptr(x, ctx, max_plane); vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], sizeof(uint8_t) * ctx->num_4x4_blk); @@ -3706,7 +3733,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Do Intra UV best rd mode selection if best mode choice above was intra. if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) { TX_SIZE uv_tx_size = get_uv_tx_size(mbmi); - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size], + rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], &rate_uv_tokenonly[uv_tx_size], &dist_uv[uv_tx_size], &skip_uv[uv_tx_size], @@ -4075,7 +4102,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += distortion_y; if (rate_uv_intra[TX_4X4] == INT_MAX) { - choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[TX_4X4], + choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[TX_4X4], &rate_uv_tokenonly[TX_4X4], &dist_uv[TX_4X4], &skip_uv[TX_4X4], &mode_uv[TX_4X4]); @@ -4329,12 +4356,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // Did this mode help.. i.e. is it the new best mode if (this_rd < best_rd || x->skip) { if (!mode_excluded) { + int max_plane = MAX_MB_PLANE; // Note index of best mode so far best_mode_index = mode_index; if (ref_frame == INTRA_FRAME) { /* required for left and above block mv */ mbmi->mv[0].as_int = 0; + max_plane = 1; } *returnrate = rate2; @@ -4345,7 +4374,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, best_mbmode = *mbmi; best_skip2 = this_skip2; if (!x->select_txfm_size) - swap_block_ptr(x, ctx); + swap_block_ptr(x, ctx, max_plane); vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], sizeof(uint8_t) * ctx->num_4x4_blk); @@ -4452,7 +4481,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // Do Intra UV best rd mode selection if best mode choice above was intra. if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME) { TX_SIZE uv_tx_size = get_uv_tx_size(mbmi); - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size], + rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], &rate_uv_tokenonly[uv_tx_size], &dist_uv[uv_tx_size], &skip_uv[uv_tx_size],