Do interpolation with lower-tap filter first

There is 0.003% BDRate change on lowres dataset.
Change-Id: Ie62a5ea07bdcfe0a62f37e8f981382df4cc59918
This commit is contained in:
Angie Chiang 2016-11-28 18:49:51 -08:00
Родитель d7ec47f0c2
Коммит b79424e973
2 изменённых файлов: 129 добавлений и 30 удалений

Просмотреть файл

@ -146,43 +146,79 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_y_q4, y_step_q4, ref_idx);
} else {
// temp's size is set to (maximum possible intermediate_height) *
// MAX_BLOCK_WIDTH
uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
// temp's size is set to (maximum possible intermediate height or width) *
// MAX_SB_SIZE
uint8_t temp[((((MAX_SB_SIZE - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
MAX_FILTER_TAP) *
MAX_BLOCK_WIDTH];
int temp_stride = MAX_BLOCK_WIDTH;
MAX_SB_SIZE];
int filter_size;
InterpFilterParams filter_params;
#if CONFIG_DUAL_FILTER
InterpFilterParams filter_params_x =
av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
InterpFilterParams filter_params = filter_params_x;
// The filter size implies the required number of reference pixels for
// the second stage filtering. It is possible that the two directions
// require different filter sizes.
int filter_size = filter_params_y.taps;
#else
InterpFilterParams filter_params =
av1_get_interp_filter_params(interp_filter);
int filter_size = filter_params.taps;
#endif
int intermediate_height =
#if CONFIG_DUAL_FILTER
// we do filter with fewer taps first to reduce hardware implementation
// complexity
if (filter_params_y.taps < filter_params_x.taps) {
int intermediate_width;
int temp_stride;
#if CONFIG_DUAL_FILTER
filter_params = filter_params_y;
filter_size = filter_params_x.taps;
#else
filter_params = av1_get_interp_filter_params(interp_filter);
filter_size = filter_params.taps;
#endif
intermediate_width =
(((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
temp_stride = intermediate_width;
assert(filter_params.taps <= MAX_FILTER_TAP);
av1_convolve_vert(src - (filter_size / 2 - 1), src_stride, temp,
temp_stride, intermediate_width, h, filter_params,
subpel_y_q4, y_step_q4, 0);
#if CONFIG_DUAL_FILTER
filter_params = filter_params_x;
#else
filter_params = av1_get_interp_filter_params(interp_filter);
#endif
assert(filter_params.taps <= MAX_FILTER_TAP);
av1_convolve_horiz(temp + (filter_size / 2 - 1), temp_stride, dst,
dst_stride, w, h, filter_params, subpel_x_q4,
x_step_q4, ref_idx);
} else
#endif
{
int intermediate_height;
int temp_stride = w;
#if CONFIG_DUAL_FILTER
filter_params = filter_params_x;
filter_size = filter_params_y.taps;
#else
filter_params = av1_get_interp_filter_params(interp_filter);
filter_size = filter_params.taps;
#endif
intermediate_height =
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
assert(filter_params.taps <= MAX_FILTER_TAP);
av1_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
temp, temp_stride, w, intermediate_height, filter_params,
subpel_x_q4, x_step_q4, 0);
temp, temp_stride, w, intermediate_height,
filter_params, subpel_x_q4, x_step_q4, 0);
#if CONFIG_DUAL_FILTER
filter_params = filter_params_y;
#else
filter_params = av1_get_interp_filter_params(interp_filter);
#endif
filter_size = filter_params.taps;
assert(filter_params.taps <= MAX_FILTER_TAP);
av1_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
@ -190,6 +226,7 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
y_step_q4, ref_idx);
}
}
}
void av1_convolve_init_c(void) {
// A placeholder for SIMD initialization

Просмотреть файл

@ -137,6 +137,68 @@ TEST(AV1ConvolveTest, av1_convolve) {
}
}
#if CONFIG_EXT_INTERP && CONFIG_DUAL_FILTER
TEST(AV1ConvolveTest, av1_convolve_vert_first) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, MULTITAP_SHARP,
EIGHTTAP_REGULAR, MULTITAP_SHARP };
InterpFilterParams filter_params_x =
av1_get_interp_filter_params(interp_filter[1]);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params(interp_filter[0]);
int filter_size_x = filter_params_x.taps;
int filter_size_y = filter_params_y.taps;
int filter_center_x = filter_size_x / 2 - 1;
int filter_center_y = filter_size_y / 2 - 1;
uint8_t src[12 * 12];
int src_stride = filter_size_x;
uint8_t dst[1] = { 0 };
int dst_stride = 1;
int x_step_q4 = 16;
int y_step_q4 = 16;
int avg = 0;
int w = 1;
int h = 1;
int subpel_x_q4;
int subpel_y_q4;
ASSERT_LE(filter_size_x, 12);
ASSERT_LE(filter_size_y, 12);
setup_convolve();
for (int i = 0; i < static_cast<int>(sizeof(src) / sizeof(src[0])); i++) {
src[i] = rnd.Rand16() % (1 << 8);
}
for (subpel_x_q4 = 1; subpel_x_q4 < 2; subpel_x_q4++) {
for (subpel_y_q4 = 1; subpel_y_q4 < 2; subpel_y_q4++) {
av1_convolve(src + src_stride * filter_center_y + filter_center_x,
src_stride, dst, dst_stride, w, h, interp_filter,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
const int16_t *x_filter =
av1_get_interp_filter_subpel_kernel(filter_params_x, subpel_x_q4);
const int16_t *y_filter =
av1_get_interp_filter_subpel_kernel(filter_params_y, subpel_y_q4);
int temp[12];
int dst_ref = 0;
for (int c = 0; c < filter_size_x; c++) {
temp[c] = 0;
for (int r = 0; r < filter_size_y; r++) {
temp[c] += y_filter[r] * src[r * filter_size_x + c];
}
temp[c] = clip_pixel(ROUND_POWER_OF_TWO(temp[c], FILTER_BITS));
dst_ref += temp[c] * x_filter[c];
}
dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
EXPECT_EQ(dst[0], dst_ref);
}
}
}
#endif
TEST(AV1ConvolveTest, av1_convolve_avg) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
#if CONFIG_DUAL_FILTER