diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 3eaa9deb8..147743e8d 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -65,8 +65,14 @@ struct macroblock { int skip_optimize; int q_index; + // The equivalent error at the current rdmult of one whole bit (not one + // bitcost unit). int errorperbit; + // The equivalend SAD error of one (whole) bit at the current quantizer + // for large blocks. int sadperbit16; + // The equivalend SAD error of one (whole) bit at the current quantizer + // for sub-8x8 blocks. int sadperbit4; int rddiv; int rdmult; diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 607941cfa..8b7825e7b 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -80,27 +80,29 @@ int vp9_mv_bit_cost(const MV *mv, const MV *ref, return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); } -static int mv_err_cost(const MV *mv, const MV *ref, - const int *mvjcost, int *mvcost[2], - int error_per_bit) { +#define PIXEL_TRANSFORM_ERROR_SCALE 4 +static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost, + int *mvcost[2], int error_per_bit) { if (mvcost) { - const MV diff = { mv->row - ref->row, - mv->col - ref->col }; - // TODO(aconverse): See if this shift needs to be tied to - // VP9_PROB_COST_SHIFT. - return ROUND_POWER_OF_TWO((unsigned)mv_cost(&diff, mvjcost, mvcost) * - error_per_bit, 13); + const MV diff = {mv->row - ref->row, mv->col - ref->col}; + // This product sits at a 32-bit ceiling right now and any additional + // accuracy in either bit cost or error cost will cause it to overflow. + return ROUND_POWER_OF_TWO( + (unsigned)mv_cost(&diff, mvjcost, mvcost) * error_per_bit, + RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT + + PIXEL_TRANSFORM_ERROR_SCALE); } return 0; } static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref, - int error_per_bit) { + int sad_per_bit) { const MV diff = { mv->row - ref->row, mv->col - ref->col }; - // TODO(aconverse): See if this shift needs to be tied to VP9_PROB_COST_SHIFT. - return ROUND_POWER_OF_TWO((unsigned)mv_cost(&diff, x->nmvjointsadcost, - x->nmvsadcost) * error_per_bit, 8); + return ROUND_POWER_OF_TWO( + (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * + sad_per_bit, + VP9_PROB_COST_SHIFT); } void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) { @@ -152,12 +154,13 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { * could reduce the area. */ -/* estimated cost of a motion vector (r,c) */ +/* Estimated (square) error cost of a motion vector (r,c). The 14 scale comes + * from the same math as in mv_err_cost(). */ #define MVC(r, c) \ (mvcost ? \ ((unsigned)(mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ - error_per_bit + 4096) >> 13 : 0) + error_per_bit + 8192) >> 14 : 0) // convert motion vector component to offset for sv[a]f calc diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 980a49f0a..91f877ed7 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -342,8 +342,7 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP); x->q_index = qindex; - x->errorperbit = rdmult >> 6; - x->errorperbit += (x->errorperbit == 0); + set_error_per_bit(x, rdmult); vp9_initialize_me_consts(cpi, x, x->q_index); } diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index a8a939ee4..fc32d1911 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -41,7 +41,6 @@ #include "vp9/encoder/vp9_tokenize.h" #define RD_THRESH_POW 1.25 -#define RD_MULT_EPB_RATIO 64 // Factor to weigh the rate for switchable interp filters. #define SWITCHABLE_INTERP_RATE_FACTOR 1 @@ -279,8 +278,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128). rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); - x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO; - x->errorperbit += (x->errorperbit == 0); + set_error_per_bit(x, rd->RDMULT); x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && cm->frame_type != KEY_FRAME) ? 0 : 1; diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index a92b14edf..9b8e2732c 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -24,6 +24,7 @@ extern "C" { #endif #define RDDIV_BITS 7 +#define RD_EPB_SHIFT 6 #define RDCOST(RM, DM, R, D) \ (ROUND_POWER_OF_TWO(((int64_t)R) * (RM), VP9_PROB_COST_SHIFT) + (D << DM)) @@ -168,6 +169,11 @@ static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX; } +static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) { + x->errorperbit = rdmult >> RD_EPB_SHIFT; + x->errorperbit += (x->errorperbit == 0); +} + void vp9_mv_pred(struct VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame, BLOCK_SIZE block_size); diff --git a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c index b475f8db1..0bc417fc1 100644 --- a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c +++ b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c @@ -47,12 +47,12 @@ static INLINE int mv_cost(const int_mv mv, } static int mvsad_err_cost(const MACROBLOCK *x, const int_mv mv, const MV *ref, - int error_per_bit) { + int sad_per_bit) { const int_mv diff = pack_int_mv(mv.as_mv.row - ref->row, mv.as_mv.col - ref->col); return ROUND_POWER_OF_TWO((unsigned)mv_cost(diff, x->nmvjointsadcost, x->nmvsadcost) * - error_per_bit, 8); + sad_per_bit, VP9_PROB_COST_SHIFT); } /*****************************************************************************