Improve rectangular transform accuracy

By adjusting the internal scaling and rounding in the transforms,
we can adjust the maximum round-trip errors to:
* 8x16 and 16x8: 0 pixel values (ie, transforms are exact)
* 16x32: 1 pixel value
* 32x16: 2 pixel values

Change-Id: I0ba691a8d27042dcf1dd5ae81568d07a92d68781
This commit is contained in:
David Barker 2016-11-22 10:59:33 +00:00 коммит произвёл Debargha Mukherjee
Родитель 29c61068d7
Коммит 838a53d623
6 изменённых файлов: 124 добавлений и 54 удалений

Просмотреть файл

@ -1337,7 +1337,9 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
for (i = 0; i < n2; ++i) { for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) temp_in[j] = out[j + i * n]; for (j = 0; j < n; ++j) temp_in[j] = out[j + i * n];
ht.rows(temp_in, temp_out); ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j) output[j + i * n] = temp_out[j] >> 2; for (j = 0; j < n; ++j)
output[j + i * n] =
saturate_int16(temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
} }
// Note: overall scale factor of transform is 8 times unitary // Note: overall scale factor of transform is 8 times unitary
} }
@ -1388,7 +1390,9 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2]; for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.rows(temp_in, temp_out); ht.rows(temp_in, temp_out);
for (j = 0; j < n2; ++j) output[j + i * n2] = temp_out[j] >> 2; for (j = 0; j < n2; ++j)
output[j + i * n2] =
saturate_int16(temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
} }
// Note: overall scale factor of transform is 8 times unitary // Note: overall scale factor of transform is 8 times unitary
} }
@ -1429,16 +1433,20 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
// Columns // Columns
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) for (j = 0; j < n2; ++j)
temp_in[j] = (tran_low_t)fdct_round_shift(input[j * stride + i] * Sqrt2); temp_in[j] =
(tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
ht.cols(temp_in, temp_out); ht.cols(temp_in, temp_out);
for (j = 0; j < n2; ++j) out[j * n + i] = temp_out[j]; for (j = 0; j < n2; ++j)
out[j * n + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
} }
// Rows // Rows
for (i = 0; i < n2; ++i) { for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) temp_in[j] = out[j + i * n]; for (j = 0; j < n; ++j) temp_in[j] = out[j + i * n];
ht.rows(temp_in, temp_out); ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j) output[j + i * n] = temp_out[j] >> 2; for (j = 0; j < n; ++j)
output[j + i * n] =
saturate_int16(temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
} }
// Note: overall scale factor of transform is 4 times unitary // Note: overall scale factor of transform is 4 times unitary
} }
@ -1479,16 +1487,20 @@ void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
// Columns // Columns
for (i = 0; i < n2; ++i) { for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) for (j = 0; j < n; ++j)
temp_in[j] = (tran_low_t)fdct_round_shift(input[j * stride + i] * Sqrt2); temp_in[j] =
(tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
ht.cols(temp_in, temp_out); ht.cols(temp_in, temp_out);
for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j]; for (j = 0; j < n; ++j)
out[j * n2 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
} }
// Rows // Rows
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2]; for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.rows(temp_in, temp_out); ht.rows(temp_in, temp_out);
for (j = 0; j < n2; ++j) output[j + i * n2] = temp_out[j] >> 2; for (j = 0; j < n2; ++j)
output[j + i * n2] =
saturate_int16(temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
} }
// Note: overall scale factor of transform is 4 times unitary // Note: overall scale factor of transform is 4 times unitary
} }

Просмотреть файл

@ -796,14 +796,14 @@ static INLINE void right_shift_8x8(__m128i *res, const int bit) {
if (bit == 2) { if (bit == 2) {
const __m128i const_rounding = _mm_set1_epi16(1); const __m128i const_rounding = _mm_set1_epi16(1);
res[0] = _mm_add_epi16(res[0], const_rounding); res[0] = _mm_adds_epi16(res[0], const_rounding);
res[1] = _mm_add_epi16(res[1], const_rounding); res[1] = _mm_adds_epi16(res[1], const_rounding);
res[2] = _mm_add_epi16(res[2], const_rounding); res[2] = _mm_adds_epi16(res[2], const_rounding);
res[3] = _mm_add_epi16(res[3], const_rounding); res[3] = _mm_adds_epi16(res[3], const_rounding);
res[4] = _mm_add_epi16(res[4], const_rounding); res[4] = _mm_adds_epi16(res[4], const_rounding);
res[5] = _mm_add_epi16(res[5], const_rounding); res[5] = _mm_adds_epi16(res[5], const_rounding);
res[6] = _mm_add_epi16(res[6], const_rounding); res[6] = _mm_adds_epi16(res[6], const_rounding);
res[7] = _mm_add_epi16(res[7], const_rounding); res[7] = _mm_adds_epi16(res[7], const_rounding);
} }
res[0] = _mm_sub_epi16(res[0], sign0); res[0] = _mm_sub_epi16(res[0], sign0);
@ -3140,14 +3140,6 @@ static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in,
scale_sqrt2_8x8_signed(in + 8); scale_sqrt2_8x8_signed(in + 8);
} }
static INLINE void right_shift(__m128i *in, int size, int bit) {
int i = 0;
while (i < size) {
in[i] = _mm_srai_epi16(in[i], bit);
i += 1;
}
}
void av1_fht8x16_sse2(const int16_t *input, tran_low_t *output, int stride, void av1_fht8x16_sse2(const int16_t *input, tran_low_t *output, int stride,
int tx_type) { int tx_type) {
__m128i in[16]; __m128i in[16];
@ -3288,8 +3280,8 @@ void av1_fht8x16_sse2(const int16_t *input, tran_low_t *output, int stride,
#endif #endif
default: assert(0); break; default: assert(0); break;
} }
right_shift(t, 8, 2); right_shift_8x8(t, 2);
right_shift(b, 8, 2); right_shift_8x8(b, 2);
write_buffer_8x8(output, t, 8); write_buffer_8x8(output, t, 8);
write_buffer_8x8(output + 64, b, 8); write_buffer_8x8(output + 64, b, 8);
} }
@ -3424,8 +3416,8 @@ void av1_fht16x8_sse2(const int16_t *input, tran_low_t *output, int stride,
} }
array_transpose_8x8(l, l); array_transpose_8x8(l, l);
array_transpose_8x8(r, r); array_transpose_8x8(r, r);
right_shift(l, 8, 2); right_shift_8x8(l, 2);
right_shift(r, 8, 2); right_shift_8x8(r, 2);
write_buffer_8x8(output, l, 16); write_buffer_8x8(output, l, 16);
write_buffer_8x8(output + 8, r, 16); write_buffer_8x8(output + 8, r, 16);
} }
@ -3496,12 +3488,14 @@ static INLINE void load_buffer_16x32(const int16_t *input, __m128i *intl,
} }
for (i = 0; i < 16; ++i) { for (i = 0; i < 16; ++i) {
intl[i + 0] = _mm_load_si128((const __m128i *)(input + i * stride + 0)); intl[i] = _mm_slli_epi16(
intr[i + 0] = _mm_load_si128((const __m128i *)(input + i * stride + 8)); _mm_load_si128((const __m128i *)(input + i * stride + 0)), 2);
inbl[i + 0] = intr[i] = _mm_slli_epi16(
_mm_load_si128((const __m128i *)(input + (i + 16) * stride + 0)); _mm_load_si128((const __m128i *)(input + i * stride + 8)), 2);
inbr[i + 0] = inbl[i] = _mm_slli_epi16(
_mm_load_si128((const __m128i *)(input + (i + 16) * stride + 8)); _mm_load_si128((const __m128i *)(input + (i + 16) * stride + 0)), 2);
inbr[i] = _mm_slli_epi16(
_mm_load_si128((const __m128i *)(input + (i + 16) * stride + 8)), 2);
} }
if (fliplr) { if (fliplr) {
@ -3526,10 +3520,8 @@ static INLINE void write_buffer_16x32(tran_low_t *output, __m128i *restl,
__m128i *restr, __m128i *resbl, __m128i *restr, __m128i *resbl,
__m128i *resbr) { __m128i *resbr) {
int i; int i;
right_shift(restl, 16, 2); right_shift_16x16(restl, restr);
right_shift(restr, 16, 2); right_shift_16x16(resbl, resbr);
right_shift(resbl, 16, 2);
right_shift(resbr, 16, 2);
for (i = 0; i < 16; ++i) { for (i = 0; i < 16; ++i) {
store_output(&restl[i], output + i * 16 + 0); store_output(&restl[i], output + i * 16 + 0);
store_output(&restr[i], output + i * 16 + 8); store_output(&restr[i], output + i * 16 + 8);
@ -3551,24 +3543,32 @@ void av1_fht16x32_sse2(const int16_t *input, tran_low_t *output, int stride,
case DCT_DCT: case DCT_DCT:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fdct32_16col(intl, intr, inbl, inbr); fdct32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fdct16_sse2(intl, intr); fdct16_sse2(intl, intr);
fdct16_sse2(inbl, inbr); fdct16_sse2(inbl, inbr);
break; break;
case ADST_DCT: case ADST_DCT:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fhalfright32_16col(intl, intr, inbl, inbr); fhalfright32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fdct16_sse2(intl, intr); fdct16_sse2(intl, intr);
fdct16_sse2(inbl, inbr); fdct16_sse2(inbl, inbr);
break; break;
case DCT_ADST: case DCT_ADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fdct32_16col(intl, intr, inbl, inbr); fdct32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr); fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr); fadst16_sse2(inbl, inbr);
break; break;
case ADST_ADST: case ADST_ADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fhalfright32_16col(intl, intr, inbl, inbr); fhalfright32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr); fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr); fadst16_sse2(inbl, inbr);
break; break;
@ -3576,72 +3576,96 @@ void av1_fht16x32_sse2(const int16_t *input, tran_low_t *output, int stride,
case FLIPADST_DCT: case FLIPADST_DCT:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 1, 0); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 1, 0);
fhalfright32_16col(intl, intr, inbl, inbr); fhalfright32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fdct16_sse2(intl, intr); fdct16_sse2(intl, intr);
fdct16_sse2(inbl, inbr); fdct16_sse2(inbl, inbr);
break; break;
case DCT_FLIPADST: case DCT_FLIPADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 1); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 1);
fdct32_16col(intl, intr, inbl, inbr); fdct32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr); fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr); fadst16_sse2(inbl, inbr);
break; break;
case FLIPADST_FLIPADST: case FLIPADST_FLIPADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 1, 1); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 1, 1);
fhalfright32_16col(intl, intr, inbl, inbr); fhalfright32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr); fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr); fadst16_sse2(inbl, inbr);
break; break;
case ADST_FLIPADST: case ADST_FLIPADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 1); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 1);
fhalfright32_16col(intl, intr, inbl, inbr); fhalfright32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr); fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr); fadst16_sse2(inbl, inbr);
break; break;
case FLIPADST_ADST: case FLIPADST_ADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 1, 0); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 1, 0);
fhalfright32_16col(intl, intr, inbl, inbr); fhalfright32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr); fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr); fadst16_sse2(inbl, inbr);
break; break;
case IDTX: case IDTX:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fidtx32_16col(intl, intr, inbl, inbr); fidtx32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fidtx16_sse2(intl, intr); fidtx16_sse2(intl, intr);
fidtx16_sse2(inbl, inbr); fidtx16_sse2(inbl, inbr);
break; break;
case V_DCT: case V_DCT:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fdct32_16col(intl, intr, inbl, inbr); fdct32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fidtx16_sse2(intl, intr); fidtx16_sse2(intl, intr);
fidtx16_sse2(inbl, inbr); fidtx16_sse2(inbl, inbr);
break; break;
case H_DCT: case H_DCT:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fidtx32_16col(intl, intr, inbl, inbr); fidtx32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fdct16_sse2(intl, intr); fdct16_sse2(intl, intr);
fdct16_sse2(inbl, inbr); fdct16_sse2(inbl, inbr);
break; break;
case V_ADST: case V_ADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fhalfright32_16col(intl, intr, inbl, inbr); fhalfright32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fidtx16_sse2(intl, intr); fidtx16_sse2(intl, intr);
fidtx16_sse2(inbl, inbr); fidtx16_sse2(inbl, inbr);
break; break;
case H_ADST: case H_ADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fidtx32_16col(intl, intr, inbl, inbr); fidtx32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr); fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr); fadst16_sse2(inbl, inbr);
break; break;
case V_FLIPADST: case V_FLIPADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 1, 0); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 1, 0);
fhalfright32_16col(intl, intr, inbl, inbr); fhalfright32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fidtx16_sse2(intl, intr); fidtx16_sse2(intl, intr);
fidtx16_sse2(inbl, inbr); fidtx16_sse2(inbl, inbr);
break; break;
case H_FLIPADST: case H_FLIPADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 1); load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 1);
fidtx32_16col(intl, intr, inbl, inbr); fidtx32_16col(intl, intr, inbl, inbr);
right_shift_16x16(intl, intr);
right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr); fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr); fadst16_sse2(inbl, inbr);
break; break;
@ -3661,10 +3685,14 @@ static INLINE void load_buffer_32x16(const int16_t *input, __m128i *in0,
} }
for (i = 0; i < 16; ++i) { for (i = 0; i < 16; ++i) {
in0[i] = _mm_load_si128((const __m128i *)(input + i * stride + 0)); in0[i] = _mm_slli_epi16(
in1[i] = _mm_load_si128((const __m128i *)(input + i * stride + 8)); _mm_load_si128((const __m128i *)(input + i * stride + 0)), 2);
in2[i] = _mm_load_si128((const __m128i *)(input + i * stride + 16)); in1[i] = _mm_slli_epi16(
in3[i] = _mm_load_si128((const __m128i *)(input + i * stride + 24)); _mm_load_si128((const __m128i *)(input + i * stride + 8)), 2);
in2[i] = _mm_slli_epi16(
_mm_load_si128((const __m128i *)(input + i * stride + 16)), 2);
in3[i] = _mm_slli_epi16(
_mm_load_si128((const __m128i *)(input + i * stride + 24)), 2);
} }
if (fliplr) { if (fliplr) {
@ -3688,10 +3716,8 @@ static INLINE void write_buffer_32x16(tran_low_t *output, __m128i *res0,
__m128i *res1, __m128i *res2, __m128i *res1, __m128i *res2,
__m128i *res3) { __m128i *res3) {
int i; int i;
right_shift(res0, 16, 2); right_shift_16x16(res0, res1);
right_shift(res1, 16, 2); right_shift_16x16(res2, res3);
right_shift(res2, 16, 2);
right_shift(res3, 16, 2);
for (i = 0; i < 16; ++i) { for (i = 0; i < 16; ++i) {
store_output(&res0[i], output + i * 32 + 0); store_output(&res0[i], output + i * 32 + 0);
store_output(&res1[i], output + i * 32 + 8); store_output(&res1[i], output + i * 32 + 8);
@ -3709,21 +3735,29 @@ void av1_fht32x16_sse2(const int16_t *input, tran_low_t *output, int stride,
case DCT_DCT: case DCT_DCT:
fdct16_sse2(in0, in1); fdct16_sse2(in0, in1);
fdct16_sse2(in2, in3); fdct16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fdct32_16col(in0, in1, in2, in3); fdct32_16col(in0, in1, in2, in3);
break; break;
case ADST_DCT: case ADST_DCT:
fadst16_sse2(in0, in1); fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3); fadst16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fdct32_16col(in0, in1, in2, in3); fdct32_16col(in0, in1, in2, in3);
break; break;
case DCT_ADST: case DCT_ADST:
fdct16_sse2(in0, in1); fdct16_sse2(in0, in1);
fdct16_sse2(in2, in3); fdct16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3); fhalfright32_16col(in0, in1, in2, in3);
break; break;
case ADST_ADST: case ADST_ADST:
fadst16_sse2(in0, in1); fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3); fadst16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3); fhalfright32_16col(in0, in1, in2, in3);
break; break;
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
@ -3731,72 +3765,96 @@ void av1_fht32x16_sse2(const int16_t *input, tran_low_t *output, int stride,
load_buffer_32x16(input, in0, in1, in2, in3, stride, 1, 0); load_buffer_32x16(input, in0, in1, in2, in3, stride, 1, 0);
fadst16_sse2(in0, in1); fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3); fadst16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fdct32_16col(in0, in1, in2, in3); fdct32_16col(in0, in1, in2, in3);
break; break;
case DCT_FLIPADST: case DCT_FLIPADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 1); load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 1);
fdct16_sse2(in0, in1); fdct16_sse2(in0, in1);
fdct16_sse2(in2, in3); fdct16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3); fhalfright32_16col(in0, in1, in2, in3);
break; break;
case FLIPADST_FLIPADST: case FLIPADST_FLIPADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 1, 1); load_buffer_32x16(input, in0, in1, in2, in3, stride, 1, 1);
fadst16_sse2(in0, in1); fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3); fadst16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3); fhalfright32_16col(in0, in1, in2, in3);
break; break;
case ADST_FLIPADST: case ADST_FLIPADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 1); load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 1);
fadst16_sse2(in0, in1); fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3); fadst16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3); fhalfright32_16col(in0, in1, in2, in3);
break; break;
case FLIPADST_ADST: case FLIPADST_ADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 1, 0); load_buffer_32x16(input, in0, in1, in2, in3, stride, 1, 0);
fadst16_sse2(in0, in1); fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3); fadst16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3); fhalfright32_16col(in0, in1, in2, in3);
break; break;
case IDTX: case IDTX:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0); load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
fidtx16_sse2(in0, in1); fidtx16_sse2(in0, in1);
fidtx16_sse2(in2, in3); fidtx16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fidtx32_16col(in0, in1, in2, in3); fidtx32_16col(in0, in1, in2, in3);
break; break;
case V_DCT: case V_DCT:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0); load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
fdct16_sse2(in0, in1); fdct16_sse2(in0, in1);
fdct16_sse2(in2, in3); fdct16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fidtx32_16col(in0, in1, in2, in3); fidtx32_16col(in0, in1, in2, in3);
break; break;
case H_DCT: case H_DCT:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0); load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
fidtx16_sse2(in0, in1); fidtx16_sse2(in0, in1);
fidtx16_sse2(in2, in3); fidtx16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fdct32_16col(in0, in1, in2, in3); fdct32_16col(in0, in1, in2, in3);
break; break;
case V_ADST: case V_ADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0); load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
fadst16_sse2(in0, in1); fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3); fadst16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fidtx32_16col(in0, in1, in2, in3); fidtx32_16col(in0, in1, in2, in3);
break; break;
case H_ADST: case H_ADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0); load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
fidtx16_sse2(in0, in1); fidtx16_sse2(in0, in1);
fidtx16_sse2(in2, in3); fidtx16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3); fhalfright32_16col(in0, in1, in2, in3);
break; break;
case V_FLIPADST: case V_FLIPADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 1, 0); load_buffer_32x16(input, in0, in1, in2, in3, stride, 1, 0);
fadst16_sse2(in0, in1); fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3); fadst16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fidtx32_16col(in0, in1, in2, in3); fidtx32_16col(in0, in1, in2, in3);
break; break;
case H_FLIPADST: case H_FLIPADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 1); load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 1);
fidtx16_sse2(in0, in1); fidtx16_sse2(in0, in1);
fidtx16_sse2(in2, in3); fidtx16_sse2(in2, in3);
right_shift_16x16(in0, in1);
right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3); fhalfright32_16col(in0, in1, in2, in3);
break; break;
#endif #endif

Просмотреть файл

@ -69,11 +69,11 @@ class AV1Trans16x32HT : public libaom_test::TransformTestBase,
IhtFunc inv_txfm_; IhtFunc inv_txfm_;
}; };
TEST_P(AV1Trans16x32HT, AccuracyCheck) { RunAccuracyCheck(48); } TEST_P(AV1Trans16x32HT, AccuracyCheck) { RunAccuracyCheck(1); }
TEST_P(AV1Trans16x32HT, CoeffCheck) { RunCoeffCheck(); } TEST_P(AV1Trans16x32HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans16x32HT, MemCheck) { RunMemCheck(); } TEST_P(AV1Trans16x32HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans16x32HT, InvCoeffCheck) { RunInvCoeffCheck(); } TEST_P(AV1Trans16x32HT, InvCoeffCheck) { RunInvCoeffCheck(); }
TEST_P(AV1Trans16x32HT, InvAccuracyCheck) { RunInvAccuracyCheck(9); } TEST_P(AV1Trans16x32HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
using std::tr1::make_tuple; using std::tr1::make_tuple;
const Ht16x32Param kArrayHt16x32Param_c[] = { const Ht16x32Param kArrayHt16x32Param_c[] = {

Просмотреть файл

@ -69,11 +69,11 @@ class AV1Trans16x8HT : public libaom_test::TransformTestBase,
IhtFunc inv_txfm_; IhtFunc inv_txfm_;
}; };
TEST_P(AV1Trans16x8HT, AccuracyCheck) { RunAccuracyCheck(1); } TEST_P(AV1Trans16x8HT, AccuracyCheck) { RunAccuracyCheck(0); }
TEST_P(AV1Trans16x8HT, CoeffCheck) { RunCoeffCheck(); } TEST_P(AV1Trans16x8HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans16x8HT, MemCheck) { RunMemCheck(); } TEST_P(AV1Trans16x8HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans16x8HT, InvCoeffCheck) { RunInvCoeffCheck(); } TEST_P(AV1Trans16x8HT, InvCoeffCheck) { RunInvCoeffCheck(); }
TEST_P(AV1Trans16x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); } TEST_P(AV1Trans16x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
using std::tr1::make_tuple; using std::tr1::make_tuple;

Просмотреть файл

@ -70,10 +70,10 @@ class AV1Trans32x16HT : public libaom_test::TransformTestBase,
}; };
TEST_P(AV1Trans32x16HT, MemCheck) { RunMemCheck(); } TEST_P(AV1Trans32x16HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans32x16HT, AccuracyCheck) { RunAccuracyCheck(43); } TEST_P(AV1Trans32x16HT, AccuracyCheck) { RunAccuracyCheck(2); }
TEST_P(AV1Trans32x16HT, CoeffCheck) { RunCoeffCheck(); } TEST_P(AV1Trans32x16HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans32x16HT, InvCoeffCheck) { RunInvCoeffCheck(); } TEST_P(AV1Trans32x16HT, InvCoeffCheck) { RunInvCoeffCheck(); }
TEST_P(AV1Trans32x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(9); } TEST_P(AV1Trans32x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
using std::tr1::make_tuple; using std::tr1::make_tuple;
const Ht32x16Param kArrayHt32x16Param_c[] = { const Ht32x16Param kArrayHt32x16Param_c[] = {

Просмотреть файл

@ -70,10 +70,10 @@ class AV1Trans8x16HT : public libaom_test::TransformTestBase,
}; };
TEST_P(AV1Trans8x16HT, MemCheck) { RunMemCheck(); } TEST_P(AV1Trans8x16HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans8x16HT, AccuracyCheck) { RunAccuracyCheck(1); } TEST_P(AV1Trans8x16HT, AccuracyCheck) { RunAccuracyCheck(0); }
TEST_P(AV1Trans8x16HT, CoeffCheck) { RunCoeffCheck(); } TEST_P(AV1Trans8x16HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans8x16HT, InvCoeffCheck) { RunInvCoeffCheck(); } TEST_P(AV1Trans8x16HT, InvCoeffCheck) { RunInvCoeffCheck(); }
TEST_P(AV1Trans8x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); } TEST_P(AV1Trans8x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
using std::tr1::make_tuple; using std::tr1::make_tuple;