optimize 8x8 fdct rounding for accuracy
The commit added a final rounding choice for 8x8 forward dct to get rid of a sign bias at DC position and improve the accuracry in term of round trip error for 8x8 fDCT/iDCT. This commit also enabled forward 8x8 dct test. Change-Id: Ib67f99b0a24d513e230c7812bc04569d472fdc50
This commit is contained in:
Родитель
4e2697f5cd
Коммит
22012ee994
|
@ -141,7 +141,7 @@ TEST(VP9Fdct8x8Test, ExtremalCheck) {
|
|||
|
||||
// Initialize a test block with input range {-255, 255}.
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = rnd.Rand8() % 2 ? 255 : -255;
|
||||
test_input_block[j] = rnd.Rand8() % 2 ? 255 : -256;
|
||||
|
||||
const int pitch = 16;
|
||||
vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
|
||||
|
|
|
@ -72,7 +72,7 @@ endif
|
|||
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9) += convolve_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
|
||||
#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
|
||||
#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
|
||||
#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc
|
||||
|
|
|
@ -323,247 +323,6 @@ static const int16_t adst_i16[256] = {
|
|||
};
|
||||
#endif
|
||||
|
||||
#define NEW_FDCT8x8 1
|
||||
#if !NEW_FDCT8x8
|
||||
static const int xC1S7 = 16069;
|
||||
static const int xC2S6 = 15137;
|
||||
static const int xC3S5 = 13623;
|
||||
static const int xC4S4 = 11585;
|
||||
static const int xC5S3 = 9102;
|
||||
static const int xC6S2 = 6270;
|
||||
static const int xC7S1 = 3196;
|
||||
|
||||
#define SHIFT_BITS 14
|
||||
#define DOROUND(X) X += (1<<(SHIFT_BITS-1));
|
||||
|
||||
#define FINAL_SHIFT 3
|
||||
#define FINAL_ROUNDING (1<<(FINAL_SHIFT -1))
|
||||
#define IN_SHIFT (FINAL_SHIFT+1)
|
||||
|
||||
|
||||
void vp9_short_fdct8x8_c(short *InputData, short *OutputData, int pitch) {
|
||||
int loop;
|
||||
int short_pitch = pitch >> 1;
|
||||
int is07, is12, is34, is56;
|
||||
int is0734, is1256;
|
||||
int id07, id12, id34, id56;
|
||||
int irot_input_x, irot_input_y;
|
||||
int icommon_product1; // Re-used product (c4s4 * (s12 - s56))
|
||||
int icommon_product2; // Re-used product (c4s4 * (d12 + d56))
|
||||
int temp1, temp2; // intermediate variable for computation
|
||||
|
||||
int InterData[64];
|
||||
int *ip = InterData;
|
||||
short *op = OutputData;
|
||||
|
||||
for (loop = 0; loop < 8; loop++) {
|
||||
// Pre calculate some common sums and differences.
|
||||
is07 = (InputData[0] + InputData[7]) << IN_SHIFT;
|
||||
is12 = (InputData[1] + InputData[2]) << IN_SHIFT;
|
||||
is34 = (InputData[3] + InputData[4]) << IN_SHIFT;
|
||||
is56 = (InputData[5] + InputData[6]) << IN_SHIFT;
|
||||
id07 = (InputData[0] - InputData[7]) << IN_SHIFT;
|
||||
id12 = (InputData[1] - InputData[2]) << IN_SHIFT;
|
||||
id34 = (InputData[3] - InputData[4]) << IN_SHIFT;
|
||||
id56 = (InputData[5] - InputData[6]) << IN_SHIFT;
|
||||
|
||||
is0734 = is07 + is34;
|
||||
is1256 = is12 + is56;
|
||||
|
||||
// Pre-Calculate some common product terms.
|
||||
icommon_product1 = xC4S4 * (is12 - is56);
|
||||
DOROUND(icommon_product1)
|
||||
icommon_product1 >>= SHIFT_BITS;
|
||||
|
||||
icommon_product2 = xC4S4 * (id12 + id56);
|
||||
DOROUND(icommon_product2)
|
||||
icommon_product2 >>= SHIFT_BITS;
|
||||
|
||||
|
||||
ip[0] = (xC4S4 * (is0734 + is1256));
|
||||
DOROUND(ip[0]);
|
||||
ip[0] >>= SHIFT_BITS;
|
||||
|
||||
ip[4] = (xC4S4 * (is0734 - is1256));
|
||||
DOROUND(ip[4]);
|
||||
ip[4] >>= SHIFT_BITS;
|
||||
|
||||
// Define inputs to rotation for outputs 2 and 6
|
||||
irot_input_x = id12 - id56;
|
||||
irot_input_y = is07 - is34;
|
||||
|
||||
// Apply rotation for outputs 2 and 6.
|
||||
temp1 = xC6S2 * irot_input_x;
|
||||
DOROUND(temp1);
|
||||
temp1 >>= SHIFT_BITS;
|
||||
temp2 = xC2S6 * irot_input_y;
|
||||
DOROUND(temp2);
|
||||
temp2 >>= SHIFT_BITS;
|
||||
ip[2] = temp1 + temp2;
|
||||
|
||||
temp1 = xC6S2 * irot_input_y;
|
||||
DOROUND(temp1);
|
||||
temp1 >>= SHIFT_BITS;
|
||||
temp2 = xC2S6 * irot_input_x;
|
||||
DOROUND(temp2);
|
||||
temp2 >>= SHIFT_BITS;
|
||||
ip[6] = temp1 - temp2;
|
||||
|
||||
// Define inputs to rotation for outputs 1 and 7
|
||||
irot_input_x = icommon_product1 + id07;
|
||||
irot_input_y = -(id34 + icommon_product2);
|
||||
|
||||
// Apply rotation for outputs 1 and 7.
|
||||
temp1 = xC1S7 * irot_input_x;
|
||||
DOROUND(temp1);
|
||||
temp1 >>= SHIFT_BITS;
|
||||
temp2 = xC7S1 * irot_input_y;
|
||||
DOROUND(temp2);
|
||||
temp2 >>= SHIFT_BITS;
|
||||
ip[1] = temp1 - temp2;
|
||||
|
||||
temp1 = xC7S1 * irot_input_x;
|
||||
DOROUND(temp1);
|
||||
temp1 >>= SHIFT_BITS;
|
||||
temp2 = xC1S7 * irot_input_y;
|
||||
DOROUND(temp2);
|
||||
temp2 >>= SHIFT_BITS;
|
||||
ip[7] = temp1 + temp2;
|
||||
|
||||
// Define inputs to rotation for outputs 3 and 5
|
||||
irot_input_x = id07 - icommon_product1;
|
||||
irot_input_y = id34 - icommon_product2;
|
||||
|
||||
// Apply rotation for outputs 3 and 5.
|
||||
temp1 = xC3S5 * irot_input_x;
|
||||
DOROUND(temp1);
|
||||
temp1 >>= SHIFT_BITS;
|
||||
temp2 = xC5S3 * irot_input_y;
|
||||
DOROUND(temp2);
|
||||
temp2 >>= SHIFT_BITS;
|
||||
ip[3] = temp1 - temp2;
|
||||
|
||||
|
||||
temp1 = xC5S3 * irot_input_x;
|
||||
DOROUND(temp1);
|
||||
temp1 >>= SHIFT_BITS;
|
||||
temp2 = xC3S5 * irot_input_y;
|
||||
DOROUND(temp2);
|
||||
temp2 >>= SHIFT_BITS;
|
||||
ip[5] = temp1 + temp2;
|
||||
|
||||
// Increment data pointer for next row
|
||||
InputData += short_pitch;
|
||||
ip += 8;
|
||||
}
|
||||
|
||||
// Performed DCT on rows, now transform the columns
|
||||
ip = InterData;
|
||||
for (loop = 0; loop < 8; loop++) {
|
||||
// Pre calculate some common sums and differences.
|
||||
is07 = ip[0 * 8] + ip[7 * 8];
|
||||
is12 = ip[1 * 8] + ip[2 * 8];
|
||||
is34 = ip[3 * 8] + ip[4 * 8];
|
||||
is56 = ip[5 * 8] + ip[6 * 8];
|
||||
|
||||
id07 = ip[0 * 8] - ip[7 * 8];
|
||||
id12 = ip[1 * 8] - ip[2 * 8];
|
||||
id34 = ip[3 * 8] - ip[4 * 8];
|
||||
id56 = ip[5 * 8] - ip[6 * 8];
|
||||
|
||||
is0734 = is07 + is34;
|
||||
is1256 = is12 + is56;
|
||||
|
||||
// Pre-Calculate some common product terms
|
||||
icommon_product1 = xC4S4 * (is12 - is56);
|
||||
icommon_product2 = xC4S4 * (id12 + id56);
|
||||
DOROUND(icommon_product1)
|
||||
DOROUND(icommon_product2)
|
||||
icommon_product1 >>= SHIFT_BITS;
|
||||
icommon_product2 >>= SHIFT_BITS;
|
||||
|
||||
|
||||
temp1 = xC4S4 * (is0734 + is1256);
|
||||
temp2 = xC4S4 * (is0734 - is1256);
|
||||
DOROUND(temp1);
|
||||
DOROUND(temp2);
|
||||
temp1 >>= SHIFT_BITS;
|
||||
|
||||
temp2 >>= SHIFT_BITS;
|
||||
op[0 * 8] = (temp1 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
||||
op[4 * 8] = (temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
||||
|
||||
// Define inputs to rotation for outputs 2 and 6
|
||||
irot_input_x = id12 - id56;
|
||||
irot_input_y = is07 - is34;
|
||||
|
||||
// Apply rotation for outputs 2 and 6.
|
||||
temp1 = xC6S2 * irot_input_x;
|
||||
DOROUND(temp1);
|
||||
temp1 >>= SHIFT_BITS;
|
||||
temp2 = xC2S6 * irot_input_y;
|
||||
DOROUND(temp2);
|
||||
temp2 >>= SHIFT_BITS;
|
||||
op[2 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
||||
|
||||
temp1 = xC6S2 * irot_input_y;
|
||||
DOROUND(temp1);
|
||||
temp1 >>= SHIFT_BITS;
|
||||
temp2 = xC2S6 * irot_input_x;
|
||||
DOROUND(temp2);
|
||||
temp2 >>= SHIFT_BITS;
|
||||
op[6 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
||||
|
||||
// Define inputs to rotation for outputs 1 and 7
|
||||
irot_input_x = icommon_product1 + id07;
|
||||
irot_input_y = -(id34 + icommon_product2);
|
||||
|
||||
// Apply rotation for outputs 1 and 7.
|
||||
temp1 = xC1S7 * irot_input_x;
|
||||
DOROUND(temp1);
|
||||
temp1 >>= SHIFT_BITS;
|
||||
temp2 = xC7S1 * irot_input_y;
|
||||
DOROUND(temp2);
|
||||
temp2 >>= SHIFT_BITS;
|
||||
op[1 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
||||
|
||||
temp1 = xC7S1 * irot_input_x;
|
||||
DOROUND(temp1);
|
||||
temp1 >>= SHIFT_BITS;
|
||||
temp2 = xC1S7 * irot_input_y;
|
||||
DOROUND(temp2);
|
||||
temp2 >>= SHIFT_BITS;
|
||||
op[7 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
||||
|
||||
// Define inputs to rotation for outputs 3 and 5
|
||||
irot_input_x = id07 - icommon_product1;
|
||||
irot_input_y = id34 - icommon_product2;
|
||||
|
||||
// Apply rotation for outputs 3 and 5.
|
||||
temp1 = xC3S5 * irot_input_x;
|
||||
DOROUND(temp1);
|
||||
temp1 >>= SHIFT_BITS;
|
||||
temp2 = xC5S3 * irot_input_y;
|
||||
DOROUND(temp2);
|
||||
temp2 >>= SHIFT_BITS;
|
||||
op[3 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
||||
|
||||
|
||||
temp1 = xC5S3 * irot_input_x;
|
||||
DOROUND(temp1);
|
||||
temp1 >>= SHIFT_BITS;
|
||||
temp2 = xC3S5 * irot_input_y;
|
||||
DOROUND(temp2);
|
||||
temp2 >>= SHIFT_BITS;
|
||||
op[5 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
||||
|
||||
// Increment data pointer for next column.
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* For test */
|
||||
#define TEST_INT 1
|
||||
#if TEST_INT
|
||||
|
@ -918,7 +677,6 @@ void vp9_short_fdct8x4_c(short *input, short *output, int pitch)
|
|||
vp9_short_fdct4x4_c(input + 4, output + 16, pitch);
|
||||
}
|
||||
|
||||
#if NEW_FDCT8x8
|
||||
static void fdct8_1d(int16_t *input, int16_t *output) {
|
||||
int16_t step[8];
|
||||
int temp1, temp2;
|
||||
|
@ -986,10 +744,9 @@ void vp9_short_fdct8x8_c(int16_t *input, int16_t *output, int pitch) {
|
|||
temp_in[j] = out[j + i * 8];
|
||||
fdct8_1d(temp_in, temp_out);
|
||||
for (j = 0; j < 8; ++j)
|
||||
output[j + i * 8] = temp_out[j] >> 1;
|
||||
output[j + i * 8] = temp_out[j] / 2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CONFIG_INTHT
|
||||
static void fadst8_1d(int16_t *input, int16_t *output) {
|
||||
|
|
Загрузка…
Ссылка в новой задаче