diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index b88506b3b..98662c94a 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -32,12 +32,13 @@ typedef struct { int16_t *dqcoeff[MAX_MB_PLANE][2]; uint16_t *eobs[MAX_MB_PLANE][2]; - // dual buffer pointers + // dual buffer pointers, 0: in use, 1: best in store int16_t *coeff_pbuf[MAX_MB_PLANE][2]; int16_t *qcoeff_pbuf[MAX_MB_PLANE][2]; int16_t *dqcoeff_pbuf[MAX_MB_PLANE][2]; uint16_t *eobs_pbuf[MAX_MB_PLANE][2]; + int is_coded; int num_4x4_blk; int skip; int_mv best_ref_mv; @@ -92,6 +93,8 @@ struct macroblock { MACROBLOCKD e_mbd; int skip_block; + int select_txfm_size; + int skip_optimize; search_site *ss; int ss_count; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 2e2ecf8cd..9d806508e 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -613,11 +613,12 @@ static void pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, xd->mi_8x8[0]->mbmi.sb_type = bsize; for (i = 0; i < MAX_MB_PLANE; ++i) { - p[i].coeff = ctx->coeff_pbuf[i][1]; - pd[i].qcoeff = ctx->qcoeff_pbuf[i][1]; - pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; - pd[i].eobs = ctx->eobs_pbuf[i][1]; + p[i].coeff = ctx->coeff_pbuf[i][0]; + pd[i].qcoeff = ctx->qcoeff_pbuf[i][0]; + pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0]; + pd[i].eobs = ctx->eobs_pbuf[i][0]; } + ctx->is_coded = 0; // Set to zero to make sure we do not use the previous encoded frame stats xd->mi_8x8[0]->mbmi.skip_coeff = 0; @@ -2400,10 +2401,13 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, MODE_INFO **mi_8x8 = xd->mi_8x8; MODE_INFO *mi = mi_8x8[0]; MB_MODE_INFO *mbmi = &mi->mbmi; + PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize); unsigned int segment_id = mbmi->segment_id; const int mis = cm->mode_info_stride; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; + x->skip_optimize = ctx->is_coded; + ctx->is_coded = 1; x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 70008103e..7afed26d6 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -432,18 +432,26 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, // TODO(jingning): per transformed block zero forcing only enabled for // luma component. will integrate chroma components as well. if (x->zcoeff_blk[tx_size][block] && plane == 0) { - int i, j; + int i, k; pd->eobs[block] = 0; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); + txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &k); ctx->ta[plane][i] = 0; - ctx->tl[plane][j] = 0; + ctx->tl[plane][k] = 0; return; } - vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); + if (x->select_txfm_size || xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) + vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); - if (x->optimize) + if (x->optimize && (x->select_txfm_size || + xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8|| !x->skip_optimize)) { vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx); + } else { + int i, k; + txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &k); + ctx->ta[plane][i] = pd->eobs[block] > 0; + ctx->tl[plane][k] = pd->eobs[block] > 0; + } if (x->skip_encode || pd->eobs[block] == 0) return; @@ -507,7 +515,8 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { struct optimize_ctx ctx; struct encode_b_args arg = {x, &ctx}; - vp9_subtract_sb(x, bsize); + if (x->select_txfm_size || xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) + vp9_subtract_sb(x, bsize); if (x->optimize) { int i; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 651ce5ed8..8a9834310 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -834,6 +834,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->adaptive_rd_thresh = 2; sf->recode_loop = 2; + sf->use_lp32x32fdct = 1; sf->mode_skip_start = 11; sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 2155a9c11..3565c0762 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -246,6 +246,9 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { vp9_set_speed_features(cpi); + cpi->mb.select_txfm_size = cpi->sf.tx_size_search_method == USE_LARGESTALL ? + 0 : 1; + set_block_thresholds(cpi); fill_token_costs(cpi->mb.token_costs, cm->fc.coef_probs); @@ -3030,6 +3033,29 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, return this_rd; // if 0, this will be re-calculated by caller } +static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { + int i; + struct macroblock_plane *const p = x->plane; + struct macroblockd_plane *const pd = x->e_mbd.plane; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + p[i].coeff = ctx->coeff_pbuf[i][1]; + pd[i].qcoeff = ctx->qcoeff_pbuf[i][1]; + pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; + pd[i].eobs = ctx->eobs_pbuf[i][1]; + + ctx->coeff_pbuf[i][1] = ctx->coeff_pbuf[i][0]; + ctx->qcoeff_pbuf[i][1] = ctx->qcoeff_pbuf[i][0]; + ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0]; + ctx->eobs_pbuf[i][1] = ctx->eobs_pbuf[i][0]; + + ctx->coeff_pbuf[i][0] = p[i].coeff; + ctx->qcoeff_pbuf[i][0] = pd[i].qcoeff; + ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff; + ctx->eobs_pbuf[i][0] = pd[i].eobs; + } +} + void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int *returnrate, int64_t *returndist, BLOCK_SIZE bsize, @@ -3572,6 +3598,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, best_rd = this_rd; best_mbmode = *mbmi; best_skip2 = this_skip2; + if (!x->select_txfm_size) + swap_block_ptr(x, ctx); vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], sizeof(uint8_t) * ctx->num_4x4_blk); @@ -4316,6 +4344,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv); best_mbmode = *mbmi; best_skip2 = this_skip2; + if (!x->select_txfm_size) + swap_block_ptr(x, ctx); vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], sizeof(uint8_t) * ctx->num_4x4_blk);