Compute compound average in warp_plane only for COMPOUND_AVERAGE
This fixes a mismatch which occurs when global/warped motion and a masked compound type are used together. Change-Id: I08b2702cdb3b85f8d8817b9286a73951c97cf379
This commit is contained in:
Родитель
f533400a2d
Коммит
7afb8b758a
|
@ -608,11 +608,11 @@ if (aom_config("CONFIG_PVQ") eq "yes") {
|
|||
|
||||
if ((aom_config("CONFIG_WARPED_MOTION") eq "yes") ||
|
||||
(aom_config("CONFIG_GLOBAL_MOTION") eq "yes")) {
|
||||
add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int ref_frm, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
|
||||
add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
|
||||
specialize qw/av1_warp_affine sse2 ssse3/;
|
||||
|
||||
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int ref_frm, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
|
||||
add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
|
||||
specialize qw/av1_highbd_warp_affine ssse3/;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -415,13 +415,19 @@ static INLINE void av1_make_inter_predictor(
|
|||
if (do_warp) {
|
||||
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||
const struct buf_2d *const pre_buf = &pd->pre[ref];
|
||||
#if CONFIG_EXT_INTER
|
||||
int compute_avg =
|
||||
ref && mi->mbmi.interinter_compound_type == COMPOUND_AVERAGE;
|
||||
#else
|
||||
int compute_avg = ref;
|
||||
#endif // CONFIG_EXT_INTER
|
||||
av1_warp_plane(&final_warp_params,
|
||||
#if CONFIG_HIGHBITDEPTH
|
||||
xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
|
||||
#endif // CONFIG_HIGHBITDEPTH
|
||||
pre_buf->buf0, pre_buf->width, pre_buf->height,
|
||||
pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
|
||||
pd->subsampling_x, pd->subsampling_y, xs, ys, ref);
|
||||
pd->subsampling_x, pd->subsampling_y, xs, ys, compute_avg);
|
||||
return;
|
||||
}
|
||||
#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
|
||||
|
|
|
@ -909,7 +909,7 @@ static void highbd_warp_plane_old(WarpedMotionParams *wm, uint8_t *ref8,
|
|||
int p_width, int p_height, int p_stride,
|
||||
int subsampling_x, int subsampling_y,
|
||||
int x_scale, int y_scale, int bd,
|
||||
int ref_frm) {
|
||||
int comp_avg) {
|
||||
int i, j;
|
||||
ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
|
||||
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
|
||||
|
@ -923,7 +923,7 @@ static void highbd_warp_plane_old(WarpedMotionParams *wm, uint8_t *ref8,
|
|||
projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
|
||||
out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
|
||||
out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
|
||||
if (ref_frm)
|
||||
if (comp_avg)
|
||||
pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
|
||||
pred[(j - p_col) + (i - p_row) * p_stride] +
|
||||
highbd_warp_interpolate(ref, out[0], out[1], width, height,
|
||||
|
@ -953,7 +953,7 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
|
|||
int width, int height, int stride, uint16_t *pred,
|
||||
int p_col, int p_row, int p_width, int p_height,
|
||||
int p_stride, int subsampling_x,
|
||||
int subsampling_y, int bd, int ref_frm,
|
||||
int subsampling_y, int bd, int comp_avg,
|
||||
int16_t alpha, int16_t beta, int16_t gamma,
|
||||
int16_t delta) {
|
||||
#if HORSHEAR_REDUCE_PREC_BITS >= 5
|
||||
|
@ -1059,7 +1059,7 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
|
|||
}
|
||||
sum = clip_pixel_highbd(
|
||||
ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS), bd);
|
||||
if (ref_frm)
|
||||
if (comp_avg)
|
||||
*p = ROUND_POWER_OF_TWO(*p + sum, 1);
|
||||
else
|
||||
*p = sum;
|
||||
|
@ -1075,7 +1075,7 @@ static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,
|
|||
int p_row, int p_width, int p_height,
|
||||
int p_stride, int subsampling_x,
|
||||
int subsampling_y, int x_scale, int y_scale,
|
||||
int bd, int ref_frm) {
|
||||
int bd, int comp_avg) {
|
||||
if (wm->wmtype == ROTZOOM) {
|
||||
wm->wmmat[5] = wm->wmmat[2];
|
||||
wm->wmmat[4] = -wm->wmmat[3];
|
||||
|
@ -1092,12 +1092,12 @@ static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,
|
|||
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
|
||||
av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
|
||||
p_width, p_height, p_stride, subsampling_x,
|
||||
subsampling_y, bd, ref_frm, alpha, beta, gamma,
|
||||
subsampling_y, bd, comp_avg, alpha, beta, gamma,
|
||||
delta);
|
||||
} else {
|
||||
highbd_warp_plane_old(wm, ref8, width, height, stride, pred8, p_col, p_row,
|
||||
p_width, p_height, p_stride, subsampling_x,
|
||||
subsampling_y, x_scale, y_scale, bd, ref_frm);
|
||||
subsampling_y, x_scale, y_scale, bd, comp_avg);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1138,7 +1138,7 @@ static void warp_plane_old(WarpedMotionParams *wm, uint8_t *ref, int width,
|
|||
int height, int stride, uint8_t *pred, int p_col,
|
||||
int p_row, int p_width, int p_height, int p_stride,
|
||||
int subsampling_x, int subsampling_y, int x_scale,
|
||||
int y_scale, int ref_frm) {
|
||||
int y_scale, int comp_avg) {
|
||||
int i, j;
|
||||
ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
|
||||
if (projectpoints == NULL) return;
|
||||
|
@ -1150,7 +1150,7 @@ static void warp_plane_old(WarpedMotionParams *wm, uint8_t *ref, int width,
|
|||
projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
|
||||
out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
|
||||
out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
|
||||
if (ref_frm)
|
||||
if (comp_avg)
|
||||
pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
|
||||
pred[(j - p_col) + (i - p_row) * p_stride] +
|
||||
warp_interpolate(ref, out[0], out[1], width, height, stride),
|
||||
|
@ -1202,7 +1202,7 @@ static void warp_plane_old(WarpedMotionParams *wm, uint8_t *ref, int width,
|
|||
void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
|
||||
int height, int stride, uint8_t *pred, int p_col,
|
||||
int p_row, int p_width, int p_height, int p_stride,
|
||||
int subsampling_x, int subsampling_y, int ref_frm,
|
||||
int subsampling_x, int subsampling_y, int comp_avg,
|
||||
int16_t alpha, int16_t beta, int16_t gamma,
|
||||
int16_t delta) {
|
||||
int16_t tmp[15 * 8];
|
||||
|
@ -1316,7 +1316,7 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
|
|||
sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
|
||||
}
|
||||
sum = clip_pixel(ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS));
|
||||
if (ref_frm)
|
||||
if (comp_avg)
|
||||
*p = ROUND_POWER_OF_TWO(*p + sum, 1);
|
||||
else
|
||||
*p = sum;
|
||||
|
@ -1331,7 +1331,7 @@ static void warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width,
|
|||
int height, int stride, uint8_t *pred, int p_col,
|
||||
int p_row, int p_width, int p_height, int p_stride,
|
||||
int subsampling_x, int subsampling_y, int x_scale,
|
||||
int y_scale, int ref_frm) {
|
||||
int y_scale, int comp_avg) {
|
||||
if (wm->wmtype == ROTZOOM) {
|
||||
wm->wmmat[5] = wm->wmmat[2];
|
||||
wm->wmmat[4] = -wm->wmmat[3];
|
||||
|
@ -1346,11 +1346,11 @@ static void warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width,
|
|||
|
||||
av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
|
||||
p_width, p_height, p_stride, subsampling_x, subsampling_y,
|
||||
ref_frm, alpha, beta, gamma, delta);
|
||||
comp_avg, alpha, beta, gamma, delta);
|
||||
} else {
|
||||
warp_plane_old(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
|
||||
p_height, p_stride, subsampling_x, subsampling_y, x_scale,
|
||||
y_scale, ref_frm);
|
||||
y_scale, comp_avg);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1409,17 +1409,17 @@ void av1_warp_plane(WarpedMotionParams *wm,
|
|||
uint8_t *ref, int width, int height, int stride,
|
||||
uint8_t *pred, int p_col, int p_row, int p_width,
|
||||
int p_height, int p_stride, int subsampling_x,
|
||||
int subsampling_y, int x_scale, int y_scale, int ref_frm) {
|
||||
int subsampling_y, int x_scale, int y_scale, int comp_avg) {
|
||||
#if CONFIG_HIGHBITDEPTH
|
||||
if (use_hbd)
|
||||
highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row,
|
||||
p_width, p_height, p_stride, subsampling_x, subsampling_y,
|
||||
x_scale, y_scale, bd, ref_frm);
|
||||
x_scale, y_scale, bd, comp_avg);
|
||||
else
|
||||
#endif // CONFIG_HIGHBITDEPTH
|
||||
warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
|
||||
p_height, p_stride, subsampling_x, subsampling_y, x_scale,
|
||||
y_scale, ref_frm);
|
||||
y_scale, comp_avg);
|
||||
}
|
||||
|
||||
#if CONFIG_WARPED_MOTION
|
||||
|
|
|
@ -87,7 +87,7 @@ void av1_warp_plane(WarpedMotionParams *wm,
|
|||
uint8_t *ref, int width, int height, int stride,
|
||||
uint8_t *pred, int p_col, int p_row, int p_width,
|
||||
int p_height, int p_stride, int subsampling_x,
|
||||
int subsampling_y, int x_scale, int y_scale, int ref_frm);
|
||||
int subsampling_y, int x_scale, int y_scale, int comp_avg);
|
||||
|
||||
int find_projection(int np, int *pts1, int *pts2, BLOCK_SIZE bsize, int mvy,
|
||||
int mvx, WarpedMotionParams *wm_params, int mi_row,
|
||||
|
|
|
@ -20,7 +20,7 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
|
|||
uint16_t *pred, int p_col, int p_row,
|
||||
int p_width, int p_height, int p_stride,
|
||||
int subsampling_x, int subsampling_y, int bd,
|
||||
int ref_frm, int16_t alpha, int16_t beta,
|
||||
int comp_avg, int16_t alpha, int16_t beta,
|
||||
int16_t gamma, int16_t delta) {
|
||||
#if HORSHEAR_REDUCE_PREC_BITS >= 5
|
||||
__m128i tmp[15];
|
||||
|
@ -304,10 +304,12 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
|
|||
// to only output 4 pixels at this point, to avoid encode/decode
|
||||
// mismatches when encoding with multiple threads.
|
||||
if (p_width == 4) {
|
||||
if (ref_frm) res_16bit = _mm_avg_epu16(res_16bit, _mm_loadl_epi64(p));
|
||||
if (comp_avg)
|
||||
res_16bit = _mm_avg_epu16(res_16bit, _mm_loadl_epi64(p));
|
||||
_mm_storel_epi64(p, res_16bit);
|
||||
} else {
|
||||
if (ref_frm) res_16bit = _mm_avg_epu16(res_16bit, _mm_loadu_si128(p));
|
||||
if (comp_avg)
|
||||
res_16bit = _mm_avg_epu16(res_16bit, _mm_loadu_si128(p));
|
||||
_mm_storeu_si128(p, res_16bit);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
|
||||
int height, int stride, uint8_t *pred, int p_col,
|
||||
int p_row, int p_width, int p_height, int p_stride,
|
||||
int subsampling_x, int subsampling_y, int ref_frm,
|
||||
int subsampling_x, int subsampling_y, int comp_avg,
|
||||
int16_t alpha, int16_t beta, int16_t gamma,
|
||||
int16_t delta) {
|
||||
__m128i tmp[15];
|
||||
|
@ -296,13 +296,13 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
|
|||
// to only output 4 pixels at this point, to avoid encode/decode
|
||||
// mismatches when encoding with multiple threads.
|
||||
if (p_width == 4) {
|
||||
if (ref_frm) {
|
||||
if (comp_avg) {
|
||||
const __m128i orig = _mm_cvtsi32_si128(*(uint32_t *)p);
|
||||
res_8bit = _mm_avg_epu8(res_8bit, orig);
|
||||
}
|
||||
*(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
|
||||
} else {
|
||||
if (ref_frm) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
|
||||
if (comp_avg) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
|
||||
_mm_storel_epi64(p, res_8bit);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -205,7 +205,7 @@ static const uint8_t odd_mask[16] = { 1, 3, 3, 5, 5, 7, 7, 9,
|
|||
void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
|
||||
int height, int stride, uint8_t *pred, int p_col,
|
||||
int p_row, int p_width, int p_height, int p_stride,
|
||||
int subsampling_x, int subsampling_y, int ref_frm,
|
||||
int subsampling_x, int subsampling_y, int comp_avg,
|
||||
int16_t alpha, int16_t beta, int16_t gamma,
|
||||
int16_t delta) {
|
||||
__m128i tmp[15];
|
||||
|
@ -473,13 +473,13 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
|
|||
// to only output 4 pixels at this point, to avoid encode/decode
|
||||
// mismatches when encoding with multiple threads.
|
||||
if (p_width == 4) {
|
||||
if (ref_frm) {
|
||||
if (comp_avg) {
|
||||
const __m128i orig = _mm_cvtsi32_si128(*(uint32_t *)p);
|
||||
res_8bit = _mm_avg_epu8(res_8bit, orig);
|
||||
}
|
||||
*(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
|
||||
} else {
|
||||
if (ref_frm) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
|
||||
if (comp_avg) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
|
||||
_mm_storel_epi64(p, res_8bit);
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче