Skip adding zero siginal to prediction with DC only idct

If DC only idct gives zero, then we can skip the steps which
add zero signal to predicted signal.
DC only idct cases will occur more frequently at lower bit rates.

Similar changes can be done for C version of high bit depth idct functions.

Change-Id: I53af22904568f7043091710da70ca8299bf361c5
This commit is contained in:
Yushin Cho 2017-03-30 11:58:20 -07:00
Родитель b5bf51ec82
Коммит 27acc47869
2 изменённых файлов: 13 добавлений и 0 удалений

Просмотреть файл

@ -145,6 +145,8 @@ void aom_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,
out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 4);
if (a1 == 0) return;
for (i = 0; i < 4; i++) {
dest[0] = clip_pixel_add(dest[0], a1);
dest[1] = clip_pixel_add(dest[1], a1);
@ -238,6 +240,7 @@ void aom_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 5);
if (a1 == 0) return;
for (j = 0; j < 8; ++j) {
for (i = 0; i < 8; ++i) dest[i] = clip_pixel_add(dest[i], a1);
dest += stride;
@ -776,6 +779,7 @@ void aom_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 6);
if (a1 == 0) return;
for (j = 0; j < 16; ++j) {
for (i = 0; i < 16; ++i) dest[i] = clip_pixel_add(dest[i], a1);
dest += stride;
@ -1245,6 +1249,7 @@ void aom_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 6);
if (a1 == 0) return;
for (j = 0; j < 32; ++j) {
for (i = 0; i < 32; ++i) dest[i] = clip_pixel_add(dest[i], a1);

Просмотреть файл

@ -163,6 +163,8 @@ void aom_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest,
a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 4);
if (a == 0) return;
dc_value = _mm_set1_epi16(a);
RECON_AND_STORE4X4(dest + 0 * stride, dc_value);
@ -521,6 +523,8 @@ void aom_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest,
a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 5);
if (a == 0) return;
dc_value = _mm_set1_epi16(a);
RECON_AND_STORE(dest + 0 * stride, dc_value);
@ -1291,6 +1295,8 @@ void aom_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest,
a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 6);
if (a == 0) return;
dc_value = _mm_set1_epi16(a);
for (i = 0; i < 16; ++i) {
@ -3437,6 +3443,8 @@ void aom_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest,
a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 6);
if (a == 0) return;
dc_value = _mm_set1_epi16(a);
for (j = 0; j < 32; ++j) {