Previously, the projected positions of chroma pixels would effectively
undergo double rounding, since we round both when calculating x4 / y4
and when calculating the filter index. Further, the two roundings
were different: x4 / y4 used ROUND_POWER_OF_TWO_SIGNED, whereas
the filter index uses ROUND_POWER_OF_TWO.

It is slightly more accurate (and faster) to replace the first
rounding by a shift; this is motivated by the fact that
ROUND_POWER_OF_TWO(x >> a, b) == ROUND_POWER_OF_TWO(x, a + b)

Change-Id: Ia52b05745168d0aeb05f0af4c75ff33eee791d82
This commit is contained in:
David Barker 2017-05-05 11:18:14 +01:00 коммит произвёл Debargha Mukherjee
Родитель 40f22ef85b
Коммит f7a5ee536b
4 изменённых файлов: 30 добавлений и 40 удалений

Просмотреть файл

@ -984,18 +984,16 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
for (j = p_col; j < p_col + p_width; j += 8) {
int32_t x4, y4, ix4, sx4, iy4, sy4;
if (subsampling_x)
x4 = ROUND_POWER_OF_TWO_SIGNED(
mat[2] * 2 * (j + 4) + mat[3] * 2 * (i + 4) + mat[0] +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1);
x4 = (mat[2] * 4 * (j + 4) + mat[3] * 4 * (i + 4) + mat[0] * 2 +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
if (subsampling_y)
y4 = ROUND_POWER_OF_TWO_SIGNED(
mat[4] * 2 * (j + 4) + mat[5] * 2 * (i + 4) + mat[1] +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1);
y4 = (mat[4] * 4 * (j + 4) + mat[5] * 4 * (i + 4) + mat[1] * 2 +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
@ -1229,18 +1227,16 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
for (j = p_col; j < p_col + p_width; j += 8) {
int32_t x4, y4, ix4, sx4, iy4, sy4;
if (subsampling_x)
x4 = ROUND_POWER_OF_TWO_SIGNED(
mat[2] * 2 * (j + 4) + mat[3] * 2 * (i + 4) + mat[0] +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1);
x4 = (mat[2] * 4 * (j + 4) + mat[3] * 4 * (i + 4) + mat[0] * 2 +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
if (subsampling_y)
y4 = ROUND_POWER_OF_TWO_SIGNED(
mat[4] * 2 * (j + 4) + mat[5] * 2 * (i + 4) + mat[1] +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1);
y4 = (mat[4] * 4 * (j + 4) + mat[5] * 4 * (i + 4) + mat[1] * 2 +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];

Просмотреть файл

@ -50,18 +50,16 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
int32_t x4, y4, ix4, sx4, iy4, sy4;
if (subsampling_x)
x4 = ROUND_POWER_OF_TWO_SIGNED(
mat[2] * 2 * dst_x + mat[3] * 2 * dst_y + mat[0] +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1);
x4 = (mat[2] * 4 * dst_x + mat[3] * 4 * dst_y + mat[0] * 2 +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
x4 = mat[2] * dst_x + mat[3] * dst_y + mat[0];
if (subsampling_y)
y4 = ROUND_POWER_OF_TWO_SIGNED(
mat[4] * 2 * dst_x + mat[5] * 2 * dst_y + mat[1] +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1);
y4 = (mat[4] * 4 * dst_x + mat[5] * 4 * dst_y + mat[1] * 2 +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
y4 = mat[4] * dst_x + mat[5] * dst_y + mat[1];

Просмотреть файл

@ -45,18 +45,16 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
int32_t x4, y4, ix4, sx4, iy4, sy4;
if (subsampling_x)
x4 = ROUND_POWER_OF_TWO_SIGNED(
mat[2] * 2 * dst_x + mat[3] * 2 * dst_y + mat[0] +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1);
x4 = (mat[2] * 4 * dst_x + mat[3] * 4 * dst_y + mat[0] * 2 +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
x4 = mat[2] * dst_x + mat[3] * dst_y + mat[0];
if (subsampling_y)
y4 = ROUND_POWER_OF_TWO_SIGNED(
mat[4] * 2 * dst_x + mat[5] * 2 * dst_y + mat[1] +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1);
y4 = (mat[4] * 4 * dst_x + mat[5] * 4 * dst_y + mat[1] * 2 +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
y4 = mat[4] * dst_x + mat[5] * dst_y + mat[1];

Просмотреть файл

@ -232,18 +232,16 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
int32_t x4, y4, ix4, sx4, iy4, sy4;
if (subsampling_x)
x4 = ROUND_POWER_OF_TWO_SIGNED(
mat[2] * 2 * dst_x + mat[3] * 2 * dst_y + mat[0] +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1);
x4 = (mat[2] * 4 * dst_x + mat[3] * 4 * dst_y + mat[0] * 2 +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
x4 = mat[2] * dst_x + mat[3] * dst_y + mat[0];
if (subsampling_y)
y4 = ROUND_POWER_OF_TWO_SIGNED(
mat[4] * 2 * dst_x + mat[5] * 2 * dst_y + mat[1] +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1);
y4 = (mat[4] * 4 * dst_x + mat[5] * 4 * dst_y + mat[1] * 2 +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
y4 = mat[4] * dst_x + mat[5] * dst_y + mat[1];