Add single motion search for OBMC predictor

Weighted single motion search is implemented for obmc predictor.
When NEWMV mode is used, to determine the MV for the current block,
we run weighted motion search to compare the weighted prediction
with (source - weighted prediction using neighbors' MVs), in which
the distortion is the actual prediction error of obmc prediction.

Coding gain: 0.404/0.425/0.366 for lowres/midres/hdres
Speed impact: +14% encoding time
              (obmc w/o mv search 13%-> obmc w/ mv search 27%)

Change-Id: Id7ad3fc6ba295b23d9c53c8a16a4ac1677ad835c
This commit is contained in:
Yue Chen 2016-04-22 15:09:12 -07:00
Родитель 1d2d1e752e
Коммит 370f203a40
10 изменённых файлов: 1753 добавлений и 11 удалений

Просмотреть файл

@ -462,6 +462,7 @@ static INLINE int vp10_is_interp_needed(const MACROBLOCKD *const xd) {
#endif // CONFIG_EXT_INTERP
#if CONFIG_OBMC
void setup_obmc_mask(int length, const uint8_t *mask[2]);
void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
MACROBLOCKD *xd, int mi_row, int mi_col,
int use_tmp_dst_buf,

Просмотреть файл

@ -1219,6 +1219,49 @@ MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad4x8)
MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad4x4)
#endif // CONFIG_EXT_INTER
#if CONFIG_OBMC
#define HIGHBD_OBFP(BT, OSDF, OVF, OSVF) \
cpi->fn_ptr[BT].osdf = OSDF; \
cpi->fn_ptr[BT].ovf = OVF; \
cpi->fn_ptr[BT].osvf = OSVF;
#define MAKE_OBFP_SAD_WRAPPER(fnname) \
static unsigned int fnname##_bits8(const uint8_t *ref, int ref_stride, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride) { \
return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride); \
} \
static unsigned int fnname##_bits10(const uint8_t *ref, int ref_stride, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride) { \
return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride) >> 2; \
} \
static unsigned int fnname##_bits12(const uint8_t *ref, int ref_stride, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride) { \
return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride) >> 4; \
}
#if CONFIG_EXT_PARTITION
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad128x128)
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad128x64)
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad64x128)
#endif // CONFIG_EXT_PARTITION
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad64x64)
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad64x32)
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad32x64)
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad32x32)
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad32x16)
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad16x32)
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad16x16)
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad16x8)
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad8x16)
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad8x8)
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad8x4)
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad4x8)
MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad4x4)
#endif // CONFIG_OBMC
static void highbd_set_var_fns(VP10_COMP *const cpi) {
VP10_COMMON *const cm = &cpi->common;
if (cm->use_highbitdepth) {
@ -1454,6 +1497,74 @@ static void highbd_set_var_fns(VP10_COMP *const cpi) {
vpx_highbd_masked_variance4x4,
vpx_highbd_masked_sub_pixel_variance4x4)
#endif // CONFIG_EXT_INTER
#if CONFIG_OBMC
#if CONFIG_EXT_PARTITION
HIGHBD_OBFP(BLOCK_128X128,
vpx_highbd_obmc_sad128x128_bits8,
vpx_highbd_obmc_variance128x128,
vpx_highbd_obmc_sub_pixel_variance128x128)
HIGHBD_OBFP(BLOCK_128X64,
vpx_highbd_obmc_sad128x64_bits8,
vpx_highbd_obmc_variance128x64,
vpx_highbd_obmc_sub_pixel_variance128x64)
HIGHBD_OBFP(BLOCK_64X128,
vpx_highbd_obmc_sad64x128_bits8,
vpx_highbd_obmc_variance64x128,
vpx_highbd_obmc_sub_pixel_variance64x128)
#endif // CONFIG_EXT_PARTITION
HIGHBD_OBFP(BLOCK_64X64,
vpx_highbd_obmc_sad64x64_bits8,
vpx_highbd_obmc_variance64x64,
vpx_highbd_obmc_sub_pixel_variance64x64)
HIGHBD_OBFP(BLOCK_64X32,
vpx_highbd_obmc_sad64x32_bits8,
vpx_highbd_obmc_variance64x32,
vpx_highbd_obmc_sub_pixel_variance64x32)
HIGHBD_OBFP(BLOCK_32X64,
vpx_highbd_obmc_sad32x64_bits8,
vpx_highbd_obmc_variance32x64,
vpx_highbd_obmc_sub_pixel_variance32x64)
HIGHBD_OBFP(BLOCK_32X32,
vpx_highbd_obmc_sad32x32_bits8,
vpx_highbd_obmc_variance32x32,
vpx_highbd_obmc_sub_pixel_variance32x32)
HIGHBD_OBFP(BLOCK_32X16,
vpx_highbd_obmc_sad32x16_bits8,
vpx_highbd_obmc_variance32x16,
vpx_highbd_obmc_sub_pixel_variance32x16)
HIGHBD_OBFP(BLOCK_16X32,
vpx_highbd_obmc_sad16x32_bits8,
vpx_highbd_obmc_variance16x32,
vpx_highbd_obmc_sub_pixel_variance16x32)
HIGHBD_OBFP(BLOCK_16X16,
vpx_highbd_obmc_sad16x16_bits8,
vpx_highbd_obmc_variance16x16,
vpx_highbd_obmc_sub_pixel_variance16x16)
HIGHBD_OBFP(BLOCK_8X16,
vpx_highbd_obmc_sad8x16_bits8,
vpx_highbd_obmc_variance8x16,
vpx_highbd_obmc_sub_pixel_variance8x16)
HIGHBD_OBFP(BLOCK_16X8,
vpx_highbd_obmc_sad16x8_bits8,
vpx_highbd_obmc_variance16x8,
vpx_highbd_obmc_sub_pixel_variance16x8)
HIGHBD_OBFP(BLOCK_8X8,
vpx_highbd_obmc_sad8x8_bits8,
vpx_highbd_obmc_variance8x8,
vpx_highbd_obmc_sub_pixel_variance8x8)
HIGHBD_OBFP(BLOCK_4X8,
vpx_highbd_obmc_sad4x8_bits8,
vpx_highbd_obmc_variance4x8,
vpx_highbd_obmc_sub_pixel_variance4x8)
HIGHBD_OBFP(BLOCK_8X4,
vpx_highbd_obmc_sad8x4_bits8,
vpx_highbd_obmc_variance8x4,
vpx_highbd_obmc_sub_pixel_variance8x4)
HIGHBD_OBFP(BLOCK_4X4,
vpx_highbd_obmc_sad4x4_bits8,
vpx_highbd_obmc_variance4x4,
vpx_highbd_obmc_sub_pixel_variance4x4)
#endif // CONFIG_OBMC
break;
case VPX_BITS_10:
@ -1687,6 +1798,74 @@ static void highbd_set_var_fns(VP10_COMP *const cpi) {
vpx_highbd_10_masked_variance4x4,
vpx_highbd_10_masked_sub_pixel_variance4x4)
#endif // CONFIG_EXT_INTER
#if CONFIG_OBMC
#if CONFIG_EXT_PARTITION
HIGHBD_OBFP(BLOCK_128X128,
vpx_highbd_obmc_sad128x128_bits10,
vpx_highbd_10_obmc_variance128x128,
vpx_highbd_10_obmc_sub_pixel_variance128x128)
HIGHBD_OBFP(BLOCK_128X64,
vpx_highbd_obmc_sad128x64_bits10,
vpx_highbd_10_obmc_variance128x64,
vpx_highbd_10_obmc_sub_pixel_variance128x64)
HIGHBD_OBFP(BLOCK_64X128,
vpx_highbd_obmc_sad64x128_bits10,
vpx_highbd_10_obmc_variance64x128,
vpx_highbd_10_obmc_sub_pixel_variance64x128)
#endif // CONFIG_EXT_PARTITION
HIGHBD_OBFP(BLOCK_64X64,
vpx_highbd_obmc_sad64x64_bits10,
vpx_highbd_10_obmc_variance64x64,
vpx_highbd_10_obmc_sub_pixel_variance64x64)
HIGHBD_OBFP(BLOCK_64X32,
vpx_highbd_obmc_sad64x32_bits10,
vpx_highbd_10_obmc_variance64x32,
vpx_highbd_10_obmc_sub_pixel_variance64x32)
HIGHBD_OBFP(BLOCK_32X64,
vpx_highbd_obmc_sad32x64_bits10,
vpx_highbd_10_obmc_variance32x64,
vpx_highbd_10_obmc_sub_pixel_variance32x64)
HIGHBD_OBFP(BLOCK_32X32,
vpx_highbd_obmc_sad32x32_bits10,
vpx_highbd_10_obmc_variance32x32,
vpx_highbd_10_obmc_sub_pixel_variance32x32)
HIGHBD_OBFP(BLOCK_32X16,
vpx_highbd_obmc_sad32x16_bits10,
vpx_highbd_10_obmc_variance32x16,
vpx_highbd_10_obmc_sub_pixel_variance32x16)
HIGHBD_OBFP(BLOCK_16X32,
vpx_highbd_obmc_sad16x32_bits10,
vpx_highbd_10_obmc_variance16x32,
vpx_highbd_10_obmc_sub_pixel_variance16x32)
HIGHBD_OBFP(BLOCK_16X16,
vpx_highbd_obmc_sad16x16_bits10,
vpx_highbd_10_obmc_variance16x16,
vpx_highbd_10_obmc_sub_pixel_variance16x16)
HIGHBD_OBFP(BLOCK_8X16,
vpx_highbd_obmc_sad8x16_bits10,
vpx_highbd_10_obmc_variance8x16,
vpx_highbd_10_obmc_sub_pixel_variance8x16)
HIGHBD_OBFP(BLOCK_16X8,
vpx_highbd_obmc_sad16x8_bits10,
vpx_highbd_10_obmc_variance16x8,
vpx_highbd_10_obmc_sub_pixel_variance16x8)
HIGHBD_OBFP(BLOCK_8X8,
vpx_highbd_obmc_sad8x8_bits10,
vpx_highbd_10_obmc_variance8x8,
vpx_highbd_10_obmc_sub_pixel_variance8x8)
HIGHBD_OBFP(BLOCK_4X8,
vpx_highbd_obmc_sad4x8_bits10,
vpx_highbd_10_obmc_variance4x8,
vpx_highbd_10_obmc_sub_pixel_variance4x8)
HIGHBD_OBFP(BLOCK_8X4,
vpx_highbd_obmc_sad8x4_bits10,
vpx_highbd_10_obmc_variance8x4,
vpx_highbd_10_obmc_sub_pixel_variance8x4)
HIGHBD_OBFP(BLOCK_4X4,
vpx_highbd_obmc_sad4x4_bits10,
vpx_highbd_10_obmc_variance4x4,
vpx_highbd_10_obmc_sub_pixel_variance4x4)
#endif // CONFIG_OBMC
break;
case VPX_BITS_12:
@ -1920,6 +2099,75 @@ static void highbd_set_var_fns(VP10_COMP *const cpi) {
vpx_highbd_12_masked_variance4x4,
vpx_highbd_12_masked_sub_pixel_variance4x4)
#endif // CONFIG_EXT_INTER
#if CONFIG_OBMC
#if CONFIG_EXT_PARTITION
HIGHBD_OBFP(BLOCK_128X128,
vpx_highbd_obmc_sad128x128_bits12,
vpx_highbd_12_obmc_variance128x128,
vpx_highbd_12_obmc_sub_pixel_variance128x128)
HIGHBD_OBFP(BLOCK_128X64,
vpx_highbd_obmc_sad128x64_bits12,
vpx_highbd_12_obmc_variance128x64,
vpx_highbd_12_obmc_sub_pixel_variance128x64)
HIGHBD_OBFP(BLOCK_64X128,
vpx_highbd_obmc_sad64x128_bits12,
vpx_highbd_12_obmc_variance64x128,
vpx_highbd_12_obmc_sub_pixel_variance64x128)
#endif // CONFIG_EXT_PARTITION
HIGHBD_OBFP(BLOCK_64X64,
vpx_highbd_obmc_sad64x64_bits12,
vpx_highbd_12_obmc_variance64x64,
vpx_highbd_12_obmc_sub_pixel_variance64x64)
HIGHBD_OBFP(BLOCK_64X32,
vpx_highbd_obmc_sad64x32_bits12,
vpx_highbd_12_obmc_variance64x32,
vpx_highbd_12_obmc_sub_pixel_variance64x32)
HIGHBD_OBFP(BLOCK_32X64,
vpx_highbd_obmc_sad32x64_bits12,
vpx_highbd_12_obmc_variance32x64,
vpx_highbd_12_obmc_sub_pixel_variance32x64)
HIGHBD_OBFP(BLOCK_32X32,
vpx_highbd_obmc_sad32x32_bits12,
vpx_highbd_12_obmc_variance32x32,
vpx_highbd_12_obmc_sub_pixel_variance32x32)
HIGHBD_OBFP(BLOCK_32X16,
vpx_highbd_obmc_sad32x16_bits12,
vpx_highbd_12_obmc_variance32x16,
vpx_highbd_12_obmc_sub_pixel_variance32x16)
HIGHBD_OBFP(BLOCK_16X32,
vpx_highbd_obmc_sad16x32_bits12,
vpx_highbd_12_obmc_variance16x32,
vpx_highbd_12_obmc_sub_pixel_variance16x32)
HIGHBD_OBFP(BLOCK_16X16,
vpx_highbd_obmc_sad16x16_bits12,
vpx_highbd_12_obmc_variance16x16,
vpx_highbd_12_obmc_sub_pixel_variance16x16)
HIGHBD_OBFP(BLOCK_8X16,
vpx_highbd_obmc_sad8x16_bits12,
vpx_highbd_12_obmc_variance8x16,
vpx_highbd_12_obmc_sub_pixel_variance8x16)
HIGHBD_OBFP(BLOCK_16X8,
vpx_highbd_obmc_sad16x8_bits12,
vpx_highbd_12_obmc_variance16x8,
vpx_highbd_12_obmc_sub_pixel_variance16x8)
HIGHBD_OBFP(BLOCK_8X8,
vpx_highbd_obmc_sad8x8_bits12,
vpx_highbd_12_obmc_variance8x8,
vpx_highbd_12_obmc_sub_pixel_variance8x8)
HIGHBD_OBFP(BLOCK_4X8,
vpx_highbd_obmc_sad4x8_bits12,
vpx_highbd_12_obmc_variance4x8,
vpx_highbd_12_obmc_sub_pixel_variance4x8)
HIGHBD_OBFP(BLOCK_8X4,
vpx_highbd_obmc_sad8x4_bits12,
vpx_highbd_12_obmc_variance8x4,
vpx_highbd_12_obmc_sub_pixel_variance8x4)
HIGHBD_OBFP(BLOCK_4X4,
vpx_highbd_obmc_sad4x4_bits12,
vpx_highbd_12_obmc_variance4x4,
vpx_highbd_12_obmc_sub_pixel_variance4x4)
#endif // CONFIG_OBMC
break;
default:
@ -2415,6 +2663,48 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
vpx_sub_pixel_avg_variance4x4,
vpx_sad4x4x3, vpx_sad4x4x8, vpx_sad4x4x4d)
#if CONFIG_OBMC
#define OBFP(BT, OSDF, OVF, OSVF) \
cpi->fn_ptr[BT].osdf = OSDF; \
cpi->fn_ptr[BT].ovf = OVF; \
cpi->fn_ptr[BT].osvf = OSVF;
#if CONFIG_EXT_PARTITION
OBFP(BLOCK_128X128, vpx_obmc_sad128x128, vpx_obmc_variance128x128,
vpx_obmc_sub_pixel_variance128x128)
OBFP(BLOCK_128X64, vpx_obmc_sad128x64, vpx_obmc_variance128x64,
vpx_obmc_sub_pixel_variance128x64)
OBFP(BLOCK_64X128, vpx_obmc_sad64x128, vpx_obmc_variance64x128,
vpx_obmc_sub_pixel_variance64x128)
#endif // CONFIG_EXT_PARTITION
OBFP(BLOCK_64X64, vpx_obmc_sad64x64, vpx_obmc_variance64x64,
vpx_obmc_sub_pixel_variance64x64)
OBFP(BLOCK_64X32, vpx_obmc_sad64x32, vpx_obmc_variance64x32,
vpx_obmc_sub_pixel_variance64x32)
OBFP(BLOCK_32X64, vpx_obmc_sad32x64, vpx_obmc_variance32x64,
vpx_obmc_sub_pixel_variance32x64)
OBFP(BLOCK_32X32, vpx_obmc_sad32x32, vpx_obmc_variance32x32,
vpx_obmc_sub_pixel_variance32x32)
OBFP(BLOCK_32X16, vpx_obmc_sad32x16, vpx_obmc_variance32x16,
vpx_obmc_sub_pixel_variance32x16)
OBFP(BLOCK_16X32, vpx_obmc_sad16x32, vpx_obmc_variance16x32,
vpx_obmc_sub_pixel_variance16x32)
OBFP(BLOCK_16X16, vpx_obmc_sad16x16, vpx_obmc_variance16x16,
vpx_obmc_sub_pixel_variance16x16)
OBFP(BLOCK_16X8, vpx_obmc_sad16x8, vpx_obmc_variance16x8,
vpx_obmc_sub_pixel_variance16x8)
OBFP(BLOCK_8X16, vpx_obmc_sad8x16, vpx_obmc_variance8x16,
vpx_obmc_sub_pixel_variance8x16)
OBFP(BLOCK_8X8, vpx_obmc_sad8x8, vpx_obmc_variance8x8,
vpx_obmc_sub_pixel_variance8x8)
OBFP(BLOCK_4X8, vpx_obmc_sad4x8, vpx_obmc_variance4x8,
vpx_obmc_sub_pixel_variance4x8)
OBFP(BLOCK_8X4, vpx_obmc_sad8x4, vpx_obmc_variance8x4,
vpx_obmc_sub_pixel_variance8x4)
OBFP(BLOCK_4X4, vpx_obmc_sad4x4, vpx_obmc_variance4x4,
vpx_obmc_sub_pixel_variance4x4)
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
#define MBFP(BT, MSDF, MVF, MSVF) \
cpi->fn_ptr[BT].msdf = MSDF; \

Просмотреть файл

@ -3253,3 +3253,544 @@ int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
return bestsme;
}
#endif // CONFIG_EXT_INTER
#if CONFIG_OBMC
/* returns subpixel variance error function */
#define DIST(r, c) \
vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
src_stride, mask, mask_stride, &sse)
/* checks if (r, c) has better score than previous best */
#define MVC(r, c) \
(mvcost ? \
((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
error_per_bit + 4096) >> 13 : 0)
#define CHECK_BETTER(v, r, c) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
thismse = (DIST(r, c)); \
if ((v = MVC(r, c) + thismse) < besterr) { \
besterr = v; \
br = r; \
bc = c; \
*distortion = thismse; \
*sse1 = sse; \
} \
} else { \
v = INT_MAX; \
}
#undef CHECK_BETTER0
#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
#undef CHECK_BETTER1
#define CHECK_BETTER1(v, r, c) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
thismse = upsampled_obmc_pref_error(xd, \
mask, mask_stride, \
vfp, z, src_stride, \
upre(y, y_stride, r, c), \
y_stride, \
w, h, &sse); \
if ((v = MVC(r, c) + thismse) < besterr) { \
besterr = v; \
br = r; \
bc = c; \
*distortion = thismse; \
*sse1 = sse; \
} \
} else { \
v = INT_MAX; \
}
static unsigned int setup_obmc_center_error(const int *mask,
int mask_stride,
const MV *bestmv,
const MV *ref_mv,
int error_per_bit,
const vp10_variance_fn_ptr_t *vfp,
const int *const wsrc,
const int wsrc_stride,
const uint8_t *const y,
int y_stride,
int offset,
int *mvjcost, int *mvcost[2],
unsigned int *sse1,
int *distortion) {
unsigned int besterr;
besterr = vfp->ovf(y + offset, y_stride, wsrc, wsrc_stride,
mask, mask_stride, sse1);
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
return besterr;
}
static int upsampled_obmc_pref_error(const MACROBLOCKD *xd,
const int *mask, int mask_stride,
const vp10_variance_fn_ptr_t *vfp,
const int *const wsrc,
const int wsrc_stride,
const uint8_t *const y, int y_stride,
int w, int h, unsigned int *sse) {
unsigned int besterr;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
vpx_highbd_upsampled_pred(pred16, w, h, y, y_stride);
besterr = vfp->ovf(CONVERT_TO_BYTEPTR(pred16), w, wsrc, wsrc_stride,
mask, mask_stride, sse);
} else {
DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
#else
DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
(void) xd;
#endif // CONFIG_VP9_HIGHBITDEPTH
vpx_upsampled_pred(pred, w, h, y, y_stride);
besterr = vfp->ovf(pred, w, wsrc, wsrc_stride, mask, mask_stride, sse);
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif
return besterr;
}
static unsigned int upsampled_setup_obmc_center_error(
const MACROBLOCKD *xd,
const int *mask, int mask_stride,
const MV *bestmv, const MV *ref_mv,
int error_per_bit, const vp10_variance_fn_ptr_t *vfp,
const int *const wsrc, const int wsrc_stride,
const uint8_t *const y, int y_stride,
int w, int h, int offset, int *mvjcost, int *mvcost[2],
unsigned int *sse1, int *distortion) {
unsigned int besterr = upsampled_obmc_pref_error(xd, mask, mask_stride, vfp,
wsrc, wsrc_stride,
y + offset, y_stride,
w, h, sse1);
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
return besterr;
}
int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
int mi_row, int mi_col,
MV *bestmv, const MV *ref_mv,
int allow_hp, int error_per_bit,
const vp10_variance_fn_ptr_t *vfp,
int forced_stop, int iters_per_step,
int *mvjcost, int *mvcost[2],
int *distortion, unsigned int *sse1,
int is_second,
int use_upsampled_ref) {
const int *const z = wsrc;
const int *const src_address = z;
const int src_stride = wsrc_stride;
MACROBLOCKD *xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[0];
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
unsigned int besterr = INT_MAX;
unsigned int sse;
unsigned int thismse;
int rr = ref_mv->row;
int rc = ref_mv->col;
int br = bestmv->row * 8;
int bc = bestmv->col * 8;
int hstep = 4;
int iter;
int round = 3 - forced_stop;
const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
int tr = br;
int tc = bc;
const MV *search_step = search_step_table;
int idx, best_idx = -1;
unsigned int cost_array[5];
int kr, kc;
const int w = 4 * num_4x4_blocks_wide_lookup[mbmi->sb_type];
const int h = 4 * num_4x4_blocks_high_lookup[mbmi->sb_type];
int offset;
int y_stride;
const uint8_t *y;
const struct buf_2d backup_pred = pd->pre[is_second];
if (use_upsampled_ref) {
int ref = xd->mi[0]->mbmi.ref_frame[is_second];
const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
setup_pred_plane(&pd->pre[is_second], upsampled_ref->y_buffer,
upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
NULL, pd->subsampling_x, pd->subsampling_y);
}
y = pd->pre[is_second].buf;
y_stride = pd->pre[is_second].stride;
offset = bestmv->row * y_stride + bestmv->col;
if (!(allow_hp && vp10_use_mv_hp(ref_mv)))
if (round == 3)
round = 2;
bestmv->row *= 8;
bestmv->col *= 8;
// use_upsampled_ref can be 0 or 1
if (use_upsampled_ref)
besterr = upsampled_setup_obmc_center_error(
xd, mask, mask_stride, bestmv, ref_mv, error_per_bit,
vfp, z, src_stride, y, y_stride,
w, h, (offset << 3),
mvjcost, mvcost, sse1, distortion);
else
besterr = setup_obmc_center_error(
mask, mask_stride, bestmv, ref_mv, error_per_bit,
vfp, z, src_stride, y, y_stride,
offset, mvjcost, mvcost, sse1, distortion);
for (iter = 0; iter < round; ++iter) {
// Check vertical and horizontal sub-pixel positions.
for (idx = 0; idx < 4; ++idx) {
tr = br + search_step[idx].row;
tc = bc + search_step[idx].col;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
MV this_mv = {tr, tc};
if (use_upsampled_ref) {
const uint8_t *const pre_address = y + tr * y_stride + tc;
thismse = upsampled_obmc_pref_error(xd, mask, mask_stride,
vfp, src_address, src_stride,
pre_address, y_stride,
w, h, &sse);
} else {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride +
(tc >> 3);
thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride,
mask, mask_stride, &sse);
}
cost_array[idx] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
if (cost_array[idx] < besterr) {
best_idx = idx;
besterr = cost_array[idx];
*distortion = thismse;
*sse1 = sse;
}
} else {
cost_array[idx] = INT_MAX;
}
}
// Check diagonal sub-pixel position
kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
tc = bc + kc;
tr = br + kr;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
MV this_mv = {tr, tc};
if (use_upsampled_ref) {
const uint8_t *const pre_address = y + tr * y_stride + tc;
thismse = upsampled_obmc_pref_error(xd, mask, mask_stride,
vfp, src_address, src_stride,
pre_address, y_stride,
w, h, &sse);
} else {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, mask, mask_stride, &sse);
}
cost_array[4] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
if (cost_array[4] < besterr) {
best_idx = 4;
besterr = cost_array[4];
*distortion = thismse;
*sse1 = sse;
}
} else {
cost_array[idx] = INT_MAX;
}
if (best_idx < 4 && best_idx >= 0) {
br += search_step[best_idx].row;
bc += search_step[best_idx].col;
} else if (best_idx == 4) {
br = tr;
bc = tc;
}
if (iters_per_step > 1 && best_idx != -1) {
if (use_upsampled_ref) {
SECOND_LEVEL_CHECKS_BEST(1);
} else {
SECOND_LEVEL_CHECKS_BEST(0);
}
}
tr = br;
tc = bc;
search_step += 4;
hstep >>= 1;
best_idx = -1;
}
// These lines insure static analysis doesn't warn that
// tr and tc aren't used after the above point.
(void) tr;
(void) tc;
bestmv->row = br;
bestmv->col = bc;
if (use_upsampled_ref) {
pd->pre[is_second] = backup_pred;
}
if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
(abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
return INT_MAX;
return besterr;
}
#undef DIST
#undef MVC
#undef CHECK_BETTER
static int get_obmc_mvpred_var(const MACROBLOCK *x,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
const MV *best_mv, const MV *center_mv,
const vp10_variance_fn_ptr_t *vfp,
int use_mvcost, int is_second) {
const MACROBLOCKD *const xd = &x->e_mbd;
const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
const MV mv = {best_mv->row * 8, best_mv->col * 8};
unsigned int unused;
return vfp->ovf(get_buf_from_mv(in_what, best_mv), in_what->stride,
wsrc, wsrc_stride, mask, mask_stride, &unused) +
(use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost,
x->mvcost, x->errorperbit) : 0);
}
int obmc_refining_search_sad(const MACROBLOCK *x,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
MV *ref_mv, int error_per_bit,
int search_range,
const vp10_variance_fn_ptr_t *fn_ptr,
const MV *center_mv, int is_second) {
const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
const MACROBLOCKD *const xd = &x->e_mbd;
const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv(in_what, ref_mv),
in_what->stride,
wsrc, wsrc_stride, mask, mask_stride) +
mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
int i, j;
for (i = 0; i < search_range; i++) {
int best_site = -1;
for (j = 0; j < 4; j++) {
const MV mv = {ref_mv->row + neighbors[j].row,
ref_mv->col + neighbors[j].col};
if (is_mv_in(x, &mv)) {
unsigned int sad = fn_ptr->osdf(get_buf_from_mv(in_what, &mv),
in_what->stride, wsrc, wsrc_stride,
mask, mask_stride);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
if (sad < best_sad) {
best_sad = sad;
best_site = j;
}
}
}
}
if (best_site == -1) {
break;
} else {
ref_mv->row += neighbors[best_site].row;
ref_mv->col += neighbors[best_site].col;
}
}
return best_sad;
}
int obmc_diamond_search_sad(const MACROBLOCK *x,
const search_site_config *cfg,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
MV *ref_mv, MV *best_mv,
int search_param,
int sad_per_bit, int *num00,
const vp10_variance_fn_ptr_t *fn_ptr,
const MV *center_mv, int is_second) {
const MACROBLOCKD *const xd = &x->e_mbd;
const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
// search_param determines the length of the initial step and hence the number
// of iterations
// 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
// (MAX_FIRST_STEP/4) pel... etc.
const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
const uint8_t *best_address, *in_what_ref;
int best_sad = INT_MAX;
int best_site = 0;
int last_site = 0;
int i, j, step;
clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
in_what_ref = in_what->buf + ref_mv->row * in_what->stride + ref_mv->col;
best_address = in_what_ref;
*num00 = 0;
*best_mv = *ref_mv;
// Check the starting position
best_sad = fn_ptr->osdf(best_address, in_what->stride,
wsrc, wsrc_stride, mask, mask_stride) +
mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
i = 1;
for (step = 0; step < tot_steps; step++) {
for (j = 0; j < cfg->searches_per_step; j++) {
const MV mv = {best_mv->row + ss[i].mv.row,
best_mv->col + ss[i].mv.col};
if (is_mv_in(x, &mv)) {
int sad = fn_ptr->osdf(best_address + ss[i].offset, in_what->stride,
wsrc, wsrc_stride, mask, mask_stride);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
if (sad < best_sad) {
best_sad = sad;
best_site = i;
}
}
}
i++;
}
if (best_site != last_site) {
best_mv->row += ss[best_site].mv.row;
best_mv->col += ss[best_site].mv.col;
best_address += ss[best_site].offset;
last_site = best_site;
#if defined(NEW_DIAMOND_SEARCH)
while (1) {
const MV this_mv = {best_mv->row + ss[best_site].mv.row,
best_mv->col + ss[best_site].mv.col};
if (is_mv_in(x, &this_mv)) {
int sad = fn_ptr->osdf(best_address + ss[best_site].offset,
in_what->stride, wsrc, wsrc_stride,
mask, mask_stride);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
if (sad < best_sad) {
best_sad = sad;
best_mv->row += ss[best_site].mv.row;
best_mv->col += ss[best_site].mv.col;
best_address += ss[best_site].offset;
continue;
}
}
}
break;
}
#endif
} else if (best_address == in_what_ref) {
(*num00)++;
}
}
return best_sad;
}
int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
MV *mvp_full, int step_param,
int sadpb, int further_steps, int do_refine,
const vp10_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv, MV *dst_mv,
int is_second) {
MV temp_mv;
int thissme, n, num00 = 0;
int bestsme = obmc_diamond_search_sad(x, &cpi->ss_cfg,
wsrc, wsrc_stride,
mask, mask_stride,
mvp_full, &temp_mv,
step_param, sadpb, &n,
fn_ptr, ref_mv, is_second);
if (bestsme < INT_MAX)
bestsme = get_obmc_mvpred_var(x, wsrc, wsrc_stride, mask, mask_stride,
&temp_mv, ref_mv, fn_ptr, 1, is_second);
*dst_mv = temp_mv;
// If there won't be more n-step search, check to see if refining search is
// needed.
if (n > further_steps)
do_refine = 0;
while (n < further_steps) {
++n;
if (num00) {
num00--;
} else {
thissme = obmc_diamond_search_sad(x, &cpi->ss_cfg,
wsrc, wsrc_stride,
mask, mask_stride,
mvp_full, &temp_mv,
step_param + n, sadpb, &num00,
fn_ptr, ref_mv, is_second);
if (thissme < INT_MAX)
thissme = get_obmc_mvpred_var(x, wsrc, wsrc_stride, mask, mask_stride,
&temp_mv, ref_mv, fn_ptr, 1, is_second);
// check to see if refining search is needed.
if (num00 > further_steps - n)
do_refine = 0;
if (thissme < bestsme) {
bestsme = thissme;
*dst_mv = temp_mv;
}
}
}
// final 1-away diamond refining search
if (do_refine) {
const int search_range = 8;
MV best_mv = *dst_mv;
thissme = obmc_refining_search_sad(x, wsrc, wsrc_stride, mask, mask_stride,
&best_mv, sadpb, search_range,
fn_ptr, ref_mv, is_second);
if (thissme < INT_MAX)
thissme = get_obmc_mvpred_var(x, wsrc, wsrc_stride, mask, mask_stride,
&best_mv, ref_mv, fn_ptr, 1, is_second);
if (thissme < bestsme) {
bestsme = thissme;
*dst_mv = best_mv;
}
}
return bestsme;
}
#endif // CONFIG_OBMC

Просмотреть файл

@ -195,6 +195,29 @@ int vp10_masked_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
const MV *ref_mv, MV *dst_mv,
int is_second);
#endif // CONFIG_EXT_INTER
#if CONFIG_OBMC
int vp10_obmc_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
MV *mvp_full, int step_param,
int sadpb, int further_steps, int do_refine,
const vp10_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv, MV *dst_mv,
int is_second);
int vp10_find_best_obmc_sub_pixel_tree_up(struct VP10_COMP *cpi, MACROBLOCK *x,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
int mi_row, int mi_col,
MV *bestmv, const MV *ref_mv,
int allow_hp, int error_per_bit,
const vp10_variance_fn_ptr_t *vfp,
int forced_stop, int iters_per_step,
int *mvjcost, int *mvcost[2],
int *distortion, unsigned int *sse1,
int is_second,
int use_upsampled_ref);
#endif // CONFIG_OBMC
#ifdef __cplusplus
} // extern "C"
#endif

Просмотреть файл

@ -5980,6 +5980,149 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd,
}
}
#if CONFIG_OBMC
static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int mi_row, int mi_col,
const int* wsrc, int wsrc_stride,
const int* mask, int mask_stride,
#if CONFIG_EXT_INTER
int ref_idx,
int mv_idx,
#endif // CONFIG_EXT_INTER
int_mv *tmp_mv, int_mv pred_mv,
int *rate_mv) {
MACROBLOCKD *xd = &x->e_mbd;
const VP10_COMMON *cm = &cpi->common;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
int bestsme = INT_MAX;
int step_param;
int sadpb = x->sadperbit16;
MV mvp_full;
#if CONFIG_EXT_INTER
int ref = mbmi->ref_frame[ref_idx];
MV ref_mv = x->mbmi_ext->ref_mvs[ref][mv_idx].as_mv;
#else
int ref = mbmi->ref_frame[0];
MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
int ref_idx = 0;
#endif // CONFIG_EXT_INTER
int tmp_col_min = x->mv_col_min;
int tmp_col_max = x->mv_col_max;
int tmp_row_min = x->mv_row_min;
int tmp_row_max = x->mv_row_max;
const YV12_BUFFER_CONFIG *scaled_ref_frame = vp10_get_scaled_ref_frame(cpi,
ref);
#if CONFIG_REF_MV
vp10_set_mvcost(x, ref);
#endif
if (scaled_ref_frame) {
int i;
// Swap out the reference frame for a version that's been scaled to
// match the resolution of the current frame, allowing the existing
// motion search code to be used without additional modifications.
for (i = 0; i < MAX_MB_PLANE; i++)
backup_yv12[i] = xd->plane[i].pre[ref_idx];
vp10_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
}
vp10_set_mv_search_range(x, &ref_mv);
// Work out the size of the first step in the mv step search.
// 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
// Take wtd average of the step_params based on the last frame's
// max mv magnitude and that based on the best ref mvs of the current
// block for the given reference.
step_param = (vp10_init_search_range(x->max_mv_context[ref]) +
cpi->mv_step_param) / 2;
} else {
step_param = cpi->mv_step_param;
}
if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size) {
int boffset = 2 * (b_width_log2_lookup[cm->sb_size] -
VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
step_param = VPXMAX(step_param, boffset);
}
if (cpi->sf.adaptive_motion_search) {
int bwl = b_width_log2_lookup[bsize];
int bhl = b_height_log2_lookup[bsize];
int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
if (tlevel < 5)
step_param += 2;
// prev_mv_sad is not setup for dynamically scaled frames.
if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
int i;
for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
x->pred_mv[ref].row = 0;
x->pred_mv[ref].col = 0;
tmp_mv->as_int = INVALID_MV;
if (scaled_ref_frame) {
int i;
for (i = 0; i < MAX_MB_PLANE; ++i)
xd->plane[i].pre[ref_idx] = backup_yv12[i];
}
return;
}
}
}
}
mvp_full = pred_mv.as_mv;
mvp_full.col >>= 3;
mvp_full.row >>= 3;
bestsme = vp10_obmc_full_pixel_diamond(cpi, x, wsrc, wsrc_stride,
mask, mask_stride,
&mvp_full, step_param, sadpb,
MAX_MVSEARCH_STEPS - 1 - step_param,
1, &cpi->fn_ptr[bsize],
&ref_mv, &tmp_mv->as_mv, ref_idx);
x->mv_col_min = tmp_col_min;
x->mv_col_max = tmp_col_max;
x->mv_row_min = tmp_row_min;
x->mv_row_max = tmp_row_max;
if (bestsme < INT_MAX) {
int dis;
vp10_find_best_obmc_sub_pixel_tree_up(cpi, x,
wsrc, wsrc_stride,
mask, mask_stride,
mi_row, mi_col,
&tmp_mv->as_mv, &ref_mv,
cm->allow_high_precision_mv,
x->errorperbit,
&cpi->fn_ptr[bsize],
cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step,
x->nmvjointcost, x->mvcost,
&dis, &x->pred_sse[ref],
ref_idx,
cpi->sf.use_upsampled_references);
}
*rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
if (scaled_ref_frame) {
int i;
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[ref_idx] = backup_yv12[i];
}
}
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
static void do_masked_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
const uint8_t *mask, int mask_stride,
@ -6314,10 +6457,10 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
int_mv (*mode_mv)[MAX_REF_FRAMES],
int mi_row, int mi_col,
#if CONFIG_OBMC
uint8_t *dst_buf1[3],
int dst_stride1[3],
uint8_t *dst_buf2[3],
int dst_stride2[3],
uint8_t *dst_buf1[3], int dst_stride1[3],
uint8_t *dst_buf2[3], int dst_stride2[3],
int *wsrc, int wsrc_strides,
int *mask2d, int mask2d_strides,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
int_mv single_newmvs[2][MAX_REF_FRAMES],
@ -6379,6 +6522,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
MB_MODE_INFO best_mbmi;
#if CONFIG_EXT_INTER
int rate2_bmc_nocoeff;
int rate_mv_bmc;
MB_MODE_INFO best_bmc_mbmi;
#endif // CONFIG_EXT_INTER
#endif // CONFIG_OBMC
@ -6817,6 +6961,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
#if CONFIG_EXT_INTER
#if CONFIG_OBMC
best_bmc_mbmi = *mbmi;
rate_mv_bmc = rate_mv;
rate2_bmc_nocoeff = *rate2;
if (cm->interp_filter == SWITCHABLE)
rate2_bmc_nocoeff += rs;
@ -7294,14 +7439,45 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
for (mbmi->obmc = 0; mbmi->obmc <= allow_obmc; mbmi->obmc++) {
int64_t tmp_rd, tmp_dist;
int tmp_rate;
#if CONFIG_EXT_INTER
int tmp_rate2 = mbmi->obmc ? rate2_bmc_nocoeff : rate2_nocoeff;
#else
int tmp_rate2 = rate2_nocoeff;
#endif // CONFIG_EXT_INTER
if (mbmi->obmc) {
#if CONFIG_EXT_INTER
*mbmi = best_bmc_mbmi;
assert(!mbmi->use_wedge_interinter);
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
mbmi->obmc = 1;
#endif // CONFIG_EXT_INTER
if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
int_mv tmp_mv;
int_mv pred_mv;
int tmp_rate_mv = 0;
pred_mv.as_int = mbmi->mv[0].as_int;
single_motion_search_obmc(cpi, x, bsize, mi_row, mi_col,
wsrc, wsrc_strides,
mask2d, mask2d_strides,
#if CONFIG_EXT_INTER
0, mv_idx,
#endif // CONFIG_EXT_INTER
&tmp_mv, pred_mv, &tmp_rate_mv);
mbmi->mv[0].as_int = tmp_mv.as_int;
if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) {
tmp_rate_mv = VPXMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
}
#if CONFIG_EXT_INTER
tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
#else
tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
#endif // CONFIG_EXT_INTER
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
#if CONFIG_EXT_INTER
} else {
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
#endif // CONFIG_EXT_INTER
}
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 0,
NULL, NULL,
dst_buf1, dst_stride1,
@ -7323,11 +7499,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
#endif // CONFIG_VP9_HIGHBITDEPTH
x->skip = 0;
#if CONFIG_EXT_INTER
*rate2 = mbmi->obmc ? rate2_bmc_nocoeff : rate2_nocoeff;
#else
*rate2 = rate2_nocoeff;
#endif // CONFIG_EXT_INTER
*rate2 = tmp_rate2;
if (allow_obmc)
*rate2 += cpi->obmc_cost[bsize][mbmi->obmc];
*distortion = 0;
@ -7835,9 +8007,13 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, int, weighted_src_buf[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, int, mask2d_buf[MAX_SB_SQUARE]);
uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
int weighted_src_stride = MAX_SB_SIZE;
int mask2d_stride = MAX_SB_SIZE;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@ -7939,6 +8115,11 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
vp10_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2,
dst_stride2);
vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
calc_target_weighted_pred(cm, x, xd, mi_row, mi_col,
dst_buf1[0], dst_stride1[0],
dst_buf2[0], dst_stride2[0],
mask2d_buf, mask2d_stride,
weighted_src_buf, weighted_src_stride);
#endif // CONFIG_OBMC
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
@ -8485,6 +8666,8 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
#if CONFIG_OBMC
dst_buf1, dst_stride1,
dst_buf2, dst_stride2,
weighted_src_buf, weighted_src_stride,
mask2d_buf, mask2d_stride,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
single_newmvs,
@ -8596,6 +8779,9 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
#if CONFIG_OBMC
dst_buf1, dst_stride1,
dst_buf2, dst_stride2,
weighted_src_buf,
weighted_src_stride,
mask2d_buf, mask2d_stride,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
dummy_single_newmvs,
@ -10153,3 +10339,194 @@ void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi,
store_coding_context(x, ctx, best_ref_index,
best_pred_diff, 0);
}
#if CONFIG_OBMC
void calc_target_weighted_pred(VP10_COMMON *cm,
MACROBLOCK *x,
MACROBLOCKD *xd,
int mi_row, int mi_col,
uint8_t *above_buf, int above_stride,
uint8_t *left_buf, int left_stride,
int *mask_buf, int mask_stride,
int *weighted_src_buf, int weighted_src_stride) {
const TileInfo *const tile = &xd->tile;
BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
int row, col, i, mi_step;
int bw = 8 * xd->n8_w;
int bh = 8 * xd->n8_h;
int *dst = weighted_src_buf;
int *mask2d = mask_buf;
uint8_t *src;
#if CONFIG_VP9_HIGHBITDEPTH
int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
#endif // CONFIG_VP9_HIGHBITDEPTH
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col) {
dst[col] = 0;
mask2d[col] = 64;
}
dst += weighted_src_stride;
mask2d += mask_stride;
}
// handle above row
#if CONFIG_EXT_TILE
if (mi_row > 0 && (mi_row - 1 >= tile->mi_row_start)) {
#else
if (mi_row > 0) {
#endif // CONFIG_EXT_TILE
for (i = 0; i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
int mi_row_offset = -1;
int mi_col_offset = i;
MODE_INFO *above_mi = xd->mi[mi_col_offset +
mi_row_offset * xd->mi_stride];
MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
int overlap = num_4x4_blocks_high_lookup[bsize] << 1;
mi_step = VPXMIN(xd->n8_w,
num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
if (is_neighbor_overlappable(above_mbmi)) {
const struct macroblockd_plane *pd = &xd->plane[0];
int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
int bh = overlap >> pd->subsampling_y;
int dst_stride = weighted_src_stride;
int *dst = weighted_src_buf + (i * MI_SIZE >> pd->subsampling_x);
int tmp_stride = above_stride;
uint8_t *tmp = above_buf + (i * MI_SIZE >> pd->subsampling_x);
int mask2d_stride = mask_stride;
int *mask2d = mask_buf + (i * MI_SIZE >> pd->subsampling_x);
const uint8_t *mask1d[2];
setup_obmc_mask(bh, mask1d);
#if CONFIG_VP9_HIGHBITDEPTH
if (is_hbd) {
uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col) {
dst[col] = mask1d[1][row] * tmp16[col];
mask2d[col] = mask1d[0][row];
}
dst += dst_stride;
tmp16 += tmp_stride;
mask2d += mask2d_stride;
}
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col) {
dst[col] = mask1d[1][row] * tmp[col];
mask2d[col] = mask1d[0][row];
}
dst += dst_stride;
tmp += tmp_stride;
mask2d += mask2d_stride;
}
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif // CONFIG_VP9_HIGHBITDEPTH
}
} // each mi in the above row
}
// handle left column
dst = weighted_src_buf;
mask2d = mask_buf;
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col) {
dst[col] = dst[col] << 6;
mask2d[col] = mask2d[col] << 6;
}
dst += weighted_src_stride;
mask2d += mask_stride;
}
if (mi_col > 0 && (mi_col - 1 >= tile->mi_col_start)) {
for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
int mi_row_offset = i;
int mi_col_offset = -1;
int overlap = num_4x4_blocks_wide_lookup[bsize] << 1;
MODE_INFO *left_mi = xd->mi[mi_col_offset +
mi_row_offset * xd->mi_stride];
MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
mi_step = VPXMIN(xd->n8_h,
num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
if (is_neighbor_overlappable(left_mbmi)) {
const struct macroblockd_plane *pd = &xd->plane[0];
int bw = overlap >> pd->subsampling_x;
int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
int dst_stride = weighted_src_stride;
int *dst = weighted_src_buf +
(i * MI_SIZE * dst_stride >> pd->subsampling_y);
int tmp_stride = left_stride;
uint8_t *tmp = left_buf +
(i * MI_SIZE * tmp_stride >> pd->subsampling_y);
int mask2d_stride = mask_stride;
int *mask2d = mask_buf +
(i * MI_SIZE * mask2d_stride >> pd->subsampling_y);
const uint8_t *mask1d[2];
setup_obmc_mask(bw, mask1d);
#if CONFIG_VP9_HIGHBITDEPTH
if (is_hbd) {
uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col) {
dst[col] = (dst[col] >> 6) * mask1d[0][col] +
(tmp16[col] << 6) * mask1d[1][col];
mask2d[col] = (mask2d[col] >> 6) * mask1d[0][col];
}
dst += dst_stride;
tmp16 += tmp_stride;
mask2d += mask2d_stride;
}
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col) {
dst[col] = (dst[col] >> 6) * mask1d[0][col] +
(tmp[col] << 6) * mask1d[1][col];
mask2d[col] = (mask2d[col] >> 6) * mask1d[0][col];
}
dst += dst_stride;
tmp += tmp_stride;
mask2d += mask2d_stride;
}
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif // CONFIG_VP9_HIGHBITDEPTH
}
} // each mi in the left column
}
dst = weighted_src_buf;
src = x->plane[0].src.buf;
#if CONFIG_VP9_HIGHBITDEPTH
if (is_hbd) {
uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col)
dst[col] = (src16[col] << 12) - dst[col];
dst += weighted_src_stride;
src16 += x->plane[0].src.stride;
}
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col)
dst[col] = (src[col] << 12) - dst[col];
dst += weighted_src_stride;
src += x->plane[0].src.stride;
}
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif // CONFIG_VP9_HIGHBITDEPTH
}
#endif // CONFIG_OBMC

Просмотреть файл

@ -108,6 +108,17 @@ static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(VP10_COMP *cpi,
return &cpi->upsampled_ref_bufs[cpi->upsampled_ref_idx[ref_idx]].buf;
}
#if CONFIG_OBMC
void calc_target_weighted_pred(VP10_COMMON *cm,
MACROBLOCK *x,
MACROBLOCKD *xd,
int mi_row, int mi_col,
uint8_t *above_buf, int above_stride,
uint8_t *left_buf, int left_stride,
int *mask_buf, int mask_stride,
int *weighted_src_buf, int weighted_src_stride);
#endif // CONFIG_OBMC
#ifdef __cplusplus
} // extern "C"
#endif

Просмотреть файл

@ -450,3 +450,109 @@ HIGHBD_MASKSADMXN(4, 8)
HIGHBD_MASKSADMXN(4, 4)
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_VP10 && CONFIG_EXT_INTER
#if CONFIG_VP10 && CONFIG_OBMC
// a: pred
// b: target weighted prediction (has been *4096 to keep precision)
// m: 2d weights (scaled by 4096)
static INLINE unsigned int obmc_sad(const uint8_t *a, int a_stride,
const int *b, int b_stride,
const int *m, int m_stride,
int width, int height) {
int y, x;
unsigned int sad = 0;
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
int abs_diff = abs(b[x] - a[x] * m[x]);
sad += (abs_diff + 2048) >> 12;
}
a += a_stride;
b += b_stride;
m += m_stride;
}
return sad;
}
#define OBMCSADMxN(m, n) \
unsigned int vpx_obmc_sad##m##x##n##_c(const uint8_t *ref, int ref_stride, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride) { \
return obmc_sad(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride, m, n); \
}
#if CONFIG_EXT_PARTITION
OBMCSADMxN(128, 128)
OBMCSADMxN(128, 64)
OBMCSADMxN(64, 128)
#endif // CONFIG_EXT_PARTITION
OBMCSADMxN(64, 64)
OBMCSADMxN(64, 32)
OBMCSADMxN(32, 64)
OBMCSADMxN(32, 32)
OBMCSADMxN(32, 16)
OBMCSADMxN(16, 32)
OBMCSADMxN(16, 16)
OBMCSADMxN(16, 8)
OBMCSADMxN(8, 16)
OBMCSADMxN(8, 8)
OBMCSADMxN(8, 4)
OBMCSADMxN(4, 8)
OBMCSADMxN(4, 4)
#if CONFIG_VP9_HIGHBITDEPTH
static INLINE unsigned int highbd_obmc_sad(const uint8_t *a8, int a_stride,
const int *b, int b_stride,
const int *m, int m_stride,
int width, int height) {
int y, x;
unsigned int sad = 0;
const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
int abs_diff = abs(b[x] - a[x] * m[x]);
sad += (abs_diff + 2048) >> 12;
}
a += a_stride;
b += b_stride;
m += m_stride;
}
return sad;
}
#define HIGHBD_OBMCSADMXN(m, n) \
unsigned int vpx_highbd_obmc_sad##m##x##n##_c(const uint8_t *ref, \
int ref_stride, \
const int *wsrc, \
int wsrc_stride, \
const int *msk, \
int msk_stride) { \
return highbd_obmc_sad(ref, ref_stride, wsrc, wsrc_stride, \
msk, msk_stride, m, n); \
}
#if CONFIG_EXT_PARTITION
HIGHBD_OBMCSADMXN(128, 128)
HIGHBD_OBMCSADMXN(128, 64)
HIGHBD_OBMCSADMXN(64, 128)
#endif // CONFIG_EXT_PARTITION
HIGHBD_OBMCSADMXN(64, 64)
HIGHBD_OBMCSADMXN(64, 32)
HIGHBD_OBMCSADMXN(32, 64)
HIGHBD_OBMCSADMXN(32, 32)
HIGHBD_OBMCSADMXN(32, 16)
HIGHBD_OBMCSADMXN(16, 32)
HIGHBD_OBMCSADMXN(16, 16)
HIGHBD_OBMCSADMXN(16, 8)
HIGHBD_OBMCSADMXN(8, 16)
HIGHBD_OBMCSADMXN(8, 8)
HIGHBD_OBMCSADMXN(8, 4)
HIGHBD_OBMCSADMXN(4, 8)
HIGHBD_OBMCSADMXN(4, 4)
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_VP10 && CONFIG_OBMC

Просмотреть файл

@ -7,6 +7,7 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
@ -1022,3 +1023,322 @@ HIGHBD_MASK_SUBPIX_VAR(128, 128)
#endif // CONFIG_EXT_PARTITION
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_VP10 && CONFIG_EXT_INTER
#if CONFIG_VP10 && CONFIG_OBMC
void obmc_variance(const uint8_t *a, int a_stride,
const int *b, int b_stride,
const int *m, int m_stride,
int w, int h, unsigned int *sse, int *sum) {
int i, j;
*sse = 0;
*sum = 0;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
int scaled_diff = b[j] - a[j] * m[j];
int abs_diff = (abs(scaled_diff) + 2048) >> 12;
int diff = (scaled_diff >= 0) ? abs_diff : -abs_diff;
*sum += diff;
*sse += diff * diff;
}
a += a_stride;
b += b_stride;
m += m_stride;
}
}
#define OBMC_VAR(W, H) \
unsigned int vpx_obmc_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
const int *b, int b_stride, \
const int *m, int m_stride, \
unsigned int *sse) { \
int sum; \
obmc_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, &sum); \
return *sse - (((int64_t)sum * sum) / (W * H)); \
}
#define OBMC_SUBPIX_VAR(W, H) \
unsigned int vpx_obmc_sub_pixel_variance##W##x##H##_c( \
const uint8_t *pre, int pre_stride, \
int xoffset, int yoffset, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride, \
unsigned int *sse) { \
uint16_t fdata3[(H + 1) * W]; \
uint8_t temp2[H * W]; \
\
var_filter_block2d_bil_first_pass(pre, fdata3, pre_stride, 1, H + 1, W, \
bilinear_filters_2t[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
bilinear_filters_2t[yoffset]); \
\
return vpx_obmc_variance##W##x##H##_c(temp2, W, wsrc, wsrc_stride, \
msk, msk_stride, sse); \
}
OBMC_VAR(4, 4)
OBMC_SUBPIX_VAR(4, 4)
OBMC_VAR(4, 8)
OBMC_SUBPIX_VAR(4, 8)
OBMC_VAR(8, 4)
OBMC_SUBPIX_VAR(8, 4)
OBMC_VAR(8, 8)
OBMC_SUBPIX_VAR(8, 8)
OBMC_VAR(8, 16)
OBMC_SUBPIX_VAR(8, 16)
OBMC_VAR(16, 8)
OBMC_SUBPIX_VAR(16, 8)
OBMC_VAR(16, 16)
OBMC_SUBPIX_VAR(16, 16)
OBMC_VAR(16, 32)
OBMC_SUBPIX_VAR(16, 32)
OBMC_VAR(32, 16)
OBMC_SUBPIX_VAR(32, 16)
OBMC_VAR(32, 32)
OBMC_SUBPIX_VAR(32, 32)
OBMC_VAR(32, 64)
OBMC_SUBPIX_VAR(32, 64)
OBMC_VAR(64, 32)
OBMC_SUBPIX_VAR(64, 32)
OBMC_VAR(64, 64)
OBMC_SUBPIX_VAR(64, 64)
#if CONFIG_EXT_PARTITION
OBMC_VAR(64, 128)
OBMC_SUBPIX_VAR(64, 128)
OBMC_VAR(128, 64)
OBMC_SUBPIX_VAR(128, 64)
OBMC_VAR(128, 128)
OBMC_SUBPIX_VAR(128, 128)
#endif // CONFIG_EXT_PARTITION
#if CONFIG_VP9_HIGHBITDEPTH
void highbd_obmc_variance64(const uint8_t *a8, int a_stride,
const int *b, int b_stride,
const int *m, int m_stride,
int w, int h, uint64_t *sse, int64_t *sum) {
int i, j;
uint16_t *a = CONVERT_TO_SHORTPTR(a8);
*sse = 0;
*sum = 0;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
int scaled_diff = b[j] - a[j] * m[j];
int abs_diff = (abs(scaled_diff) + 2048) >> 12;
int diff = (scaled_diff >= 0) ? abs_diff : -abs_diff;
*sum += diff;
*sse += diff * diff;
}
a += a_stride;
b += b_stride;
m += m_stride;
}
}
void highbd_obmc_variance(const uint8_t *a8, int a_stride,
const int *b, int b_stride,
const int *m, int m_stride,
int w, int h, unsigned int *sse, int *sum) {
int64_t sum64;
uint64_t sse64;
highbd_obmc_variance64(a8, a_stride, b, b_stride, m, m_stride,
w, h, &sse64, &sum64);
*sum = (int)sum64;
*sse = (unsigned int)sse64;
}
void highbd_10_obmc_variance(const uint8_t *a8, int a_stride,
const int *b, int b_stride,
const int *m, int m_stride,
int w, int h, unsigned int *sse, int *sum) {
int64_t sum64;
uint64_t sse64;
highbd_obmc_variance64(a8, a_stride, b, b_stride, m, m_stride,
w, h, &sse64, &sum64);
*sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
*sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
}
void highbd_12_obmc_variance(const uint8_t *a8, int a_stride,
const int *b, int b_stride,
const int *m, int m_stride,
int w, int h, unsigned int *sse, int *sum) {
int64_t sum64;
uint64_t sse64;
highbd_obmc_variance64(a8, a_stride, b, b_stride, m, m_stride,
w, h, &sse64, &sum64);
*sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
*sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
}
#define HIGHBD_OBMC_VAR(W, H) \
unsigned int vpx_highbd_obmc_variance##W##x##H##_c(const uint8_t *a, \
int a_stride, \
const int *b, \
int b_stride, \
const int *m, \
int m_stride, \
unsigned int *sse) { \
int sum; \
highbd_obmc_variance(a, a_stride, b, b_stride, m, m_stride, \
W, H, sse, &sum); \
return *sse - (((int64_t)sum * sum) / (W * H)); \
} \
\
unsigned int vpx_highbd_10_obmc_variance##W##x##H##_c(const uint8_t *a, \
int a_stride, \
const int *b, \
int b_stride, \
const int *m, \
int m_stride, \
unsigned int *sse) { \
int sum; \
highbd_10_obmc_variance(a, a_stride, b, b_stride, m, m_stride, \
W, H, sse, &sum); \
return *sse - (((int64_t)sum * sum) / (W * H)); \
} \
\
unsigned int vpx_highbd_12_obmc_variance##W##x##H##_c(const uint8_t *a, \
int a_stride, \
const int *b, \
int b_stride, \
const int *m, \
int m_stride, \
unsigned int *sse) { \
int sum; \
highbd_12_obmc_variance(a, a_stride, b, b_stride, m, m_stride, \
W, H, sse, &sum); \
return *sse - (((int64_t)sum * sum) / (W * H)); \
}
#define HIGHBD_OBMC_SUBPIX_VAR(W, H) \
unsigned int vpx_highbd_obmc_sub_pixel_variance##W##x##H##_c( \
const uint8_t *pre, int pre_stride, \
int xoffset, int yoffset, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride, \
unsigned int *sse) { \
uint16_t fdata3[(H + 1) * W]; \
uint16_t temp2[H * W]; \
\
vpx_highbd_var_filter_block2d_bil_first_pass(pre, fdata3, pre_stride, 1, \
H + 1, W, \
bilinear_filters_2t[xoffset]); \
vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, wsrc, wsrc_stride, \
msk, msk_stride, sse); \
} \
\
unsigned int vpx_highbd_10_obmc_sub_pixel_variance##W##x##H##_c( \
const uint8_t *pre, int pre_stride, \
int xoffset, int yoffset, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride, \
unsigned int *sse) { \
uint16_t fdata3[(H + 1) * W]; \
uint16_t temp2[H * W]; \
\
vpx_highbd_var_filter_block2d_bil_first_pass(pre, fdata3, pre_stride, 1, \
H + 1, W, \
bilinear_filters_2t[xoffset]); \
vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_10_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, wsrc, wsrc_stride, \
msk, msk_stride, sse); \
} \
\
unsigned int vpx_highbd_12_obmc_sub_pixel_variance##W##x##H##_c( \
const uint8_t *pre, int pre_stride, \
int xoffset, int yoffset, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride, \
unsigned int *sse) { \
uint16_t fdata3[(H + 1) * W]; \
uint16_t temp2[H * W]; \
\
vpx_highbd_var_filter_block2d_bil_first_pass(pre, fdata3, pre_stride, 1, \
H + 1, W, \
bilinear_filters_2t[xoffset]); \
vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_12_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, wsrc, wsrc_stride, \
msk, msk_stride, sse); \
}
HIGHBD_OBMC_VAR(4, 4)
HIGHBD_OBMC_SUBPIX_VAR(4, 4)
HIGHBD_OBMC_VAR(4, 8)
HIGHBD_OBMC_SUBPIX_VAR(4, 8)
HIGHBD_OBMC_VAR(8, 4)
HIGHBD_OBMC_SUBPIX_VAR(8, 4)
HIGHBD_OBMC_VAR(8, 8)
HIGHBD_OBMC_SUBPIX_VAR(8, 8)
HIGHBD_OBMC_VAR(8, 16)
HIGHBD_OBMC_SUBPIX_VAR(8, 16)
HIGHBD_OBMC_VAR(16, 8)
HIGHBD_OBMC_SUBPIX_VAR(16, 8)
HIGHBD_OBMC_VAR(16, 16)
HIGHBD_OBMC_SUBPIX_VAR(16, 16)
HIGHBD_OBMC_VAR(16, 32)
HIGHBD_OBMC_SUBPIX_VAR(16, 32)
HIGHBD_OBMC_VAR(32, 16)
HIGHBD_OBMC_SUBPIX_VAR(32, 16)
HIGHBD_OBMC_VAR(32, 32)
HIGHBD_OBMC_SUBPIX_VAR(32, 32)
HIGHBD_OBMC_VAR(32, 64)
HIGHBD_OBMC_SUBPIX_VAR(32, 64)
HIGHBD_OBMC_VAR(64, 32)
HIGHBD_OBMC_SUBPIX_VAR(64, 32)
HIGHBD_OBMC_VAR(64, 64)
HIGHBD_OBMC_SUBPIX_VAR(64, 64)
#if CONFIG_EXT_PARTITION
HIGHBD_OBMC_VAR(64, 128)
HIGHBD_OBMC_SUBPIX_VAR(64, 128)
HIGHBD_OBMC_VAR(128, 64)
HIGHBD_OBMC_SUBPIX_VAR(128, 64)
HIGHBD_OBMC_VAR(128, 128)
HIGHBD_OBMC_SUBPIX_VAR(128, 128)
#endif // CONFIG_EXT_PARTITION
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_VP10 && CONFIG_OBMC

Просмотреть файл

@ -98,6 +98,30 @@ typedef unsigned int (*vpx_masked_subpixvariance_fn_t)(const uint8_t *src,
unsigned int *sse);
#endif // CONFIG_VP10 && CONFIG_EXT_INTER
#if CONFIG_VP10 && CONFIG_OBMC
typedef unsigned int(*vpx_obmc_sad_fn_t)(const uint8_t *pred,
int pred_stride,
const int *wsrc,
int wsrc_stride,
const int *msk,
int msk_stride);
typedef unsigned int (*vpx_obmc_variance_fn_t)(const uint8_t *pred,
int pred_stride,
const int *wsrc,
int wsrc_stride,
const int *msk,
int msk_stride,
unsigned int *sse);
typedef unsigned int (*vpx_obmc_subpixvariance_fn_t)(const uint8_t *pred,
int pred_stride,
int xoffset, int yoffset,
const int *wsrc,
int wsrc_stride,
const int *msk,
int msk_stride,
unsigned int *sse);
#endif // CONFIG_VP10 && CONFIG_OBMC
#if CONFIG_VP9
typedef struct vp9_variance_vtable {
vpx_sad_fn_t sdf;
@ -126,6 +150,11 @@ typedef struct vp10_variance_vtable {
vpx_masked_variance_fn_t mvf;
vpx_masked_subpixvariance_fn_t msvf;
#endif // CONFIG_EXT_INTER
#if CONFIG_OBMC
vpx_obmc_sad_fn_t osdf;
vpx_obmc_variance_fn_t ovf;
vpx_obmc_subpixvariance_fn_t osvf;
#endif // CONFIG_OBMC
} vp10_variance_fn_ptr_t;
#endif // CONFIG_VP10

Просмотреть файл

@ -1094,6 +1094,25 @@ if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
}
}
#
# OBMC SAD
#
if (vpx_config("CONFIG_OBMC") eq "yes") {
foreach (@block_sizes) {
($w, $h) = @$_;
add_proto qw/unsigned int/, "vpx_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride";
specialize "vpx_obmc_sad${w}x${h}";
}
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
foreach (@block_sizes) {
($w, $h) = @$_;
add_proto qw/unsigned int/, "vpx_highbd_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride";
specialize "vpx_highbd_obmc_sad${w}x${h}";
}
}
}
#
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
#
@ -1364,6 +1383,31 @@ if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
}
}
#
# OBMC Variance / OBMC Subpixel Variance
#
if (vpx_config("CONFIG_OBMC") eq "yes") {
foreach (@block_sizes) {
($w, $h) = @$_;
add_proto qw/unsigned int/, "vpx_obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
add_proto qw/unsigned int/, "vpx_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int yoffset, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
specialize "vpx_obmc_variance${w}x${h}";
specialize "vpx_obmc_sub_pixel_variance${w}x${h}";
}
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
foreach $bd ("_", "_10_", "_12_") {
foreach (@block_sizes) {
($w, $h) = @$_;
add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int yoffset, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
specialize "vpx_highbd${bd}obmc_variance${w}x${h}";
specialize "vpx_highbd${bd}obmc_sub_pixel_variance${w}x${h}";
}
}
}
}
#
# Specialty Subpixel
#