optimize 8x8 fdct rounding for accuracy

The commit added a final rounding choice for 8x8 forward dct to get
rid of a sign bias at DC position and improve the accuracry in term
of round trip error for 8x8 fDCT/iDCT.

This commit also enabled forward 8x8 dct test.

Change-Id: Ib67f99b0a24d513e230c7812bc04569d472fdc50
This commit is contained in:
Yaowu Xu 2013-02-22 11:14:04 -08:00
Родитель 4e2697f5cd
Коммит 22012ee994
3 изменённых файлов: 3 добавлений и 246 удалений

Просмотреть файл

@ -141,7 +141,7 @@ TEST(VP9Fdct8x8Test, ExtremalCheck) {
// Initialize a test block with input range {-255, 255}.
for (int j = 0; j < 64; ++j)
test_input_block[j] = rnd.Rand8() % 2 ? 255 : -255;
test_input_block[j] = rnd.Rand8() % 2 ? 255 : -256;
const int pitch = 16;
vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);

Просмотреть файл

@ -72,7 +72,7 @@ endif
LIBVPX_TEST_SRCS-$(CONFIG_VP9) += convolve_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc

Просмотреть файл

@ -323,247 +323,6 @@ static const int16_t adst_i16[256] = {
};
#endif
#define NEW_FDCT8x8 1
#if !NEW_FDCT8x8
static const int xC1S7 = 16069;
static const int xC2S6 = 15137;
static const int xC3S5 = 13623;
static const int xC4S4 = 11585;
static const int xC5S3 = 9102;
static const int xC6S2 = 6270;
static const int xC7S1 = 3196;
#define SHIFT_BITS 14
#define DOROUND(X) X += (1<<(SHIFT_BITS-1));
#define FINAL_SHIFT 3
#define FINAL_ROUNDING (1<<(FINAL_SHIFT -1))
#define IN_SHIFT (FINAL_SHIFT+1)
void vp9_short_fdct8x8_c(short *InputData, short *OutputData, int pitch) {
int loop;
int short_pitch = pitch >> 1;
int is07, is12, is34, is56;
int is0734, is1256;
int id07, id12, id34, id56;
int irot_input_x, irot_input_y;
int icommon_product1; // Re-used product (c4s4 * (s12 - s56))
int icommon_product2; // Re-used product (c4s4 * (d12 + d56))
int temp1, temp2; // intermediate variable for computation
int InterData[64];
int *ip = InterData;
short *op = OutputData;
for (loop = 0; loop < 8; loop++) {
// Pre calculate some common sums and differences.
is07 = (InputData[0] + InputData[7]) << IN_SHIFT;
is12 = (InputData[1] + InputData[2]) << IN_SHIFT;
is34 = (InputData[3] + InputData[4]) << IN_SHIFT;
is56 = (InputData[5] + InputData[6]) << IN_SHIFT;
id07 = (InputData[0] - InputData[7]) << IN_SHIFT;
id12 = (InputData[1] - InputData[2]) << IN_SHIFT;
id34 = (InputData[3] - InputData[4]) << IN_SHIFT;
id56 = (InputData[5] - InputData[6]) << IN_SHIFT;
is0734 = is07 + is34;
is1256 = is12 + is56;
// Pre-Calculate some common product terms.
icommon_product1 = xC4S4 * (is12 - is56);
DOROUND(icommon_product1)
icommon_product1 >>= SHIFT_BITS;
icommon_product2 = xC4S4 * (id12 + id56);
DOROUND(icommon_product2)
icommon_product2 >>= SHIFT_BITS;
ip[0] = (xC4S4 * (is0734 + is1256));
DOROUND(ip[0]);
ip[0] >>= SHIFT_BITS;
ip[4] = (xC4S4 * (is0734 - is1256));
DOROUND(ip[4]);
ip[4] >>= SHIFT_BITS;
// Define inputs to rotation for outputs 2 and 6
irot_input_x = id12 - id56;
irot_input_y = is07 - is34;
// Apply rotation for outputs 2 and 6.
temp1 = xC6S2 * irot_input_x;
DOROUND(temp1);
temp1 >>= SHIFT_BITS;
temp2 = xC2S6 * irot_input_y;
DOROUND(temp2);
temp2 >>= SHIFT_BITS;
ip[2] = temp1 + temp2;
temp1 = xC6S2 * irot_input_y;
DOROUND(temp1);
temp1 >>= SHIFT_BITS;
temp2 = xC2S6 * irot_input_x;
DOROUND(temp2);
temp2 >>= SHIFT_BITS;
ip[6] = temp1 - temp2;
// Define inputs to rotation for outputs 1 and 7
irot_input_x = icommon_product1 + id07;
irot_input_y = -(id34 + icommon_product2);
// Apply rotation for outputs 1 and 7.
temp1 = xC1S7 * irot_input_x;
DOROUND(temp1);
temp1 >>= SHIFT_BITS;
temp2 = xC7S1 * irot_input_y;
DOROUND(temp2);
temp2 >>= SHIFT_BITS;
ip[1] = temp1 - temp2;
temp1 = xC7S1 * irot_input_x;
DOROUND(temp1);
temp1 >>= SHIFT_BITS;
temp2 = xC1S7 * irot_input_y;
DOROUND(temp2);
temp2 >>= SHIFT_BITS;
ip[7] = temp1 + temp2;
// Define inputs to rotation for outputs 3 and 5
irot_input_x = id07 - icommon_product1;
irot_input_y = id34 - icommon_product2;
// Apply rotation for outputs 3 and 5.
temp1 = xC3S5 * irot_input_x;
DOROUND(temp1);
temp1 >>= SHIFT_BITS;
temp2 = xC5S3 * irot_input_y;
DOROUND(temp2);
temp2 >>= SHIFT_BITS;
ip[3] = temp1 - temp2;
temp1 = xC5S3 * irot_input_x;
DOROUND(temp1);
temp1 >>= SHIFT_BITS;
temp2 = xC3S5 * irot_input_y;
DOROUND(temp2);
temp2 >>= SHIFT_BITS;
ip[5] = temp1 + temp2;
// Increment data pointer for next row
InputData += short_pitch;
ip += 8;
}
// Performed DCT on rows, now transform the columns
ip = InterData;
for (loop = 0; loop < 8; loop++) {
// Pre calculate some common sums and differences.
is07 = ip[0 * 8] + ip[7 * 8];
is12 = ip[1 * 8] + ip[2 * 8];
is34 = ip[3 * 8] + ip[4 * 8];
is56 = ip[5 * 8] + ip[6 * 8];
id07 = ip[0 * 8] - ip[7 * 8];
id12 = ip[1 * 8] - ip[2 * 8];
id34 = ip[3 * 8] - ip[4 * 8];
id56 = ip[5 * 8] - ip[6 * 8];
is0734 = is07 + is34;
is1256 = is12 + is56;
// Pre-Calculate some common product terms
icommon_product1 = xC4S4 * (is12 - is56);
icommon_product2 = xC4S4 * (id12 + id56);
DOROUND(icommon_product1)
DOROUND(icommon_product2)
icommon_product1 >>= SHIFT_BITS;
icommon_product2 >>= SHIFT_BITS;
temp1 = xC4S4 * (is0734 + is1256);
temp2 = xC4S4 * (is0734 - is1256);
DOROUND(temp1);
DOROUND(temp2);
temp1 >>= SHIFT_BITS;
temp2 >>= SHIFT_BITS;
op[0 * 8] = (temp1 + FINAL_ROUNDING) >> FINAL_SHIFT;
op[4 * 8] = (temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
// Define inputs to rotation for outputs 2 and 6
irot_input_x = id12 - id56;
irot_input_y = is07 - is34;
// Apply rotation for outputs 2 and 6.
temp1 = xC6S2 * irot_input_x;
DOROUND(temp1);
temp1 >>= SHIFT_BITS;
temp2 = xC2S6 * irot_input_y;
DOROUND(temp2);
temp2 >>= SHIFT_BITS;
op[2 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
temp1 = xC6S2 * irot_input_y;
DOROUND(temp1);
temp1 >>= SHIFT_BITS;
temp2 = xC2S6 * irot_input_x;
DOROUND(temp2);
temp2 >>= SHIFT_BITS;
op[6 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
// Define inputs to rotation for outputs 1 and 7
irot_input_x = icommon_product1 + id07;
irot_input_y = -(id34 + icommon_product2);
// Apply rotation for outputs 1 and 7.
temp1 = xC1S7 * irot_input_x;
DOROUND(temp1);
temp1 >>= SHIFT_BITS;
temp2 = xC7S1 * irot_input_y;
DOROUND(temp2);
temp2 >>= SHIFT_BITS;
op[1 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
temp1 = xC7S1 * irot_input_x;
DOROUND(temp1);
temp1 >>= SHIFT_BITS;
temp2 = xC1S7 * irot_input_y;
DOROUND(temp2);
temp2 >>= SHIFT_BITS;
op[7 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
// Define inputs to rotation for outputs 3 and 5
irot_input_x = id07 - icommon_product1;
irot_input_y = id34 - icommon_product2;
// Apply rotation for outputs 3 and 5.
temp1 = xC3S5 * irot_input_x;
DOROUND(temp1);
temp1 >>= SHIFT_BITS;
temp2 = xC5S3 * irot_input_y;
DOROUND(temp2);
temp2 >>= SHIFT_BITS;
op[3 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
temp1 = xC5S3 * irot_input_x;
DOROUND(temp1);
temp1 >>= SHIFT_BITS;
temp2 = xC3S5 * irot_input_y;
DOROUND(temp2);
temp2 >>= SHIFT_BITS;
op[5 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
// Increment data pointer for next column.
ip++;
op++;
}
}
#endif
/* For test */
#define TEST_INT 1
#if TEST_INT
@ -918,7 +677,6 @@ void vp9_short_fdct8x4_c(short *input, short *output, int pitch)
vp9_short_fdct4x4_c(input + 4, output + 16, pitch);
}
#if NEW_FDCT8x8
static void fdct8_1d(int16_t *input, int16_t *output) {
int16_t step[8];
int temp1, temp2;
@ -986,10 +744,9 @@ void vp9_short_fdct8x8_c(int16_t *input, int16_t *output, int pitch) {
temp_in[j] = out[j + i * 8];
fdct8_1d(temp_in, temp_out);
for (j = 0; j < 8; ++j)
output[j + i * 8] = temp_out[j] >> 1;
output[j + i * 8] = temp_out[j] / 2;
}
}
#endif
#if CONFIG_INTHT
static void fadst8_1d(int16_t *input, int16_t *output) {