Do interpolation with lower-tap filter first
There is 0.003% BDRate change on lowres dataset. Change-Id: Ie62a5ea07bdcfe0a62f37e8f981382df4cc59918
This commit is contained in:
Родитель
d7ec47f0c2
Коммит
b79424e973
|
@ -146,43 +146,79 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
|
||||
subpel_y_q4, y_step_q4, ref_idx);
|
||||
} else {
|
||||
// temp's size is set to (maximum possible intermediate_height) *
|
||||
// MAX_BLOCK_WIDTH
|
||||
uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
|
||||
// temp's size is set to (maximum possible intermediate height or width) *
|
||||
// MAX_SB_SIZE
|
||||
uint8_t temp[((((MAX_SB_SIZE - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
|
||||
MAX_FILTER_TAP) *
|
||||
MAX_BLOCK_WIDTH];
|
||||
int temp_stride = MAX_BLOCK_WIDTH;
|
||||
MAX_SB_SIZE];
|
||||
int filter_size;
|
||||
InterpFilterParams filter_params;
|
||||
#if CONFIG_DUAL_FILTER
|
||||
InterpFilterParams filter_params_x =
|
||||
av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
|
||||
InterpFilterParams filter_params_y =
|
||||
av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
|
||||
InterpFilterParams filter_params = filter_params_x;
|
||||
|
||||
// The filter size implies the required number of reference pixels for
|
||||
// the second stage filtering. It is possible that the two directions
|
||||
// require different filter sizes.
|
||||
int filter_size = filter_params_y.taps;
|
||||
#else
|
||||
InterpFilterParams filter_params =
|
||||
av1_get_interp_filter_params(interp_filter);
|
||||
int filter_size = filter_params.taps;
|
||||
#endif
|
||||
int intermediate_height =
|
||||
|
||||
#if CONFIG_DUAL_FILTER
|
||||
// we do filter with fewer taps first to reduce hardware implementation
|
||||
// complexity
|
||||
if (filter_params_y.taps < filter_params_x.taps) {
|
||||
int intermediate_width;
|
||||
int temp_stride;
|
||||
#if CONFIG_DUAL_FILTER
|
||||
filter_params = filter_params_y;
|
||||
filter_size = filter_params_x.taps;
|
||||
#else
|
||||
filter_params = av1_get_interp_filter_params(interp_filter);
|
||||
filter_size = filter_params.taps;
|
||||
#endif
|
||||
intermediate_width =
|
||||
(((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
|
||||
temp_stride = intermediate_width;
|
||||
|
||||
assert(filter_params.taps <= MAX_FILTER_TAP);
|
||||
|
||||
av1_convolve_vert(src - (filter_size / 2 - 1), src_stride, temp,
|
||||
temp_stride, intermediate_width, h, filter_params,
|
||||
subpel_y_q4, y_step_q4, 0);
|
||||
|
||||
#if CONFIG_DUAL_FILTER
|
||||
filter_params = filter_params_x;
|
||||
#else
|
||||
filter_params = av1_get_interp_filter_params(interp_filter);
|
||||
#endif
|
||||
assert(filter_params.taps <= MAX_FILTER_TAP);
|
||||
|
||||
av1_convolve_horiz(temp + (filter_size / 2 - 1), temp_stride, dst,
|
||||
dst_stride, w, h, filter_params, subpel_x_q4,
|
||||
x_step_q4, ref_idx);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
int intermediate_height;
|
||||
int temp_stride = w;
|
||||
#if CONFIG_DUAL_FILTER
|
||||
filter_params = filter_params_x;
|
||||
filter_size = filter_params_y.taps;
|
||||
#else
|
||||
filter_params = av1_get_interp_filter_params(interp_filter);
|
||||
filter_size = filter_params.taps;
|
||||
#endif
|
||||
intermediate_height =
|
||||
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
|
||||
|
||||
assert(filter_params.taps <= MAX_FILTER_TAP);
|
||||
|
||||
av1_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
|
||||
temp, temp_stride, w, intermediate_height, filter_params,
|
||||
subpel_x_q4, x_step_q4, 0);
|
||||
temp, temp_stride, w, intermediate_height,
|
||||
filter_params, subpel_x_q4, x_step_q4, 0);
|
||||
|
||||
#if CONFIG_DUAL_FILTER
|
||||
filter_params = filter_params_y;
|
||||
#else
|
||||
filter_params = av1_get_interp_filter_params(interp_filter);
|
||||
#endif
|
||||
filter_size = filter_params.taps;
|
||||
assert(filter_params.taps <= MAX_FILTER_TAP);
|
||||
|
||||
av1_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
|
||||
|
@ -190,6 +226,7 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
y_step_q4, ref_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void av1_convolve_init_c(void) {
|
||||
// A placeholder for SIMD initialization
|
||||
|
|
|
@ -137,6 +137,68 @@ TEST(AV1ConvolveTest, av1_convolve) {
|
|||
}
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_INTERP && CONFIG_DUAL_FILTER
|
||||
TEST(AV1ConvolveTest, av1_convolve_vert_first) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, MULTITAP_SHARP,
|
||||
EIGHTTAP_REGULAR, MULTITAP_SHARP };
|
||||
InterpFilterParams filter_params_x =
|
||||
av1_get_interp_filter_params(interp_filter[1]);
|
||||
InterpFilterParams filter_params_y =
|
||||
av1_get_interp_filter_params(interp_filter[0]);
|
||||
int filter_size_x = filter_params_x.taps;
|
||||
int filter_size_y = filter_params_y.taps;
|
||||
int filter_center_x = filter_size_x / 2 - 1;
|
||||
int filter_center_y = filter_size_y / 2 - 1;
|
||||
uint8_t src[12 * 12];
|
||||
int src_stride = filter_size_x;
|
||||
uint8_t dst[1] = { 0 };
|
||||
int dst_stride = 1;
|
||||
int x_step_q4 = 16;
|
||||
int y_step_q4 = 16;
|
||||
int avg = 0;
|
||||
int w = 1;
|
||||
int h = 1;
|
||||
|
||||
int subpel_x_q4;
|
||||
int subpel_y_q4;
|
||||
|
||||
ASSERT_LE(filter_size_x, 12);
|
||||
ASSERT_LE(filter_size_y, 12);
|
||||
setup_convolve();
|
||||
|
||||
for (int i = 0; i < static_cast<int>(sizeof(src) / sizeof(src[0])); i++) {
|
||||
src[i] = rnd.Rand16() % (1 << 8);
|
||||
}
|
||||
|
||||
for (subpel_x_q4 = 1; subpel_x_q4 < 2; subpel_x_q4++) {
|
||||
for (subpel_y_q4 = 1; subpel_y_q4 < 2; subpel_y_q4++) {
|
||||
av1_convolve(src + src_stride * filter_center_y + filter_center_x,
|
||||
src_stride, dst, dst_stride, w, h, interp_filter,
|
||||
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
|
||||
|
||||
const int16_t *x_filter =
|
||||
av1_get_interp_filter_subpel_kernel(filter_params_x, subpel_x_q4);
|
||||
const int16_t *y_filter =
|
||||
av1_get_interp_filter_subpel_kernel(filter_params_y, subpel_y_q4);
|
||||
|
||||
int temp[12];
|
||||
int dst_ref = 0;
|
||||
for (int c = 0; c < filter_size_x; c++) {
|
||||
temp[c] = 0;
|
||||
for (int r = 0; r < filter_size_y; r++) {
|
||||
temp[c] += y_filter[r] * src[r * filter_size_x + c];
|
||||
}
|
||||
temp[c] = clip_pixel(ROUND_POWER_OF_TWO(temp[c], FILTER_BITS));
|
||||
dst_ref += temp[c] * x_filter[c];
|
||||
}
|
||||
dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
|
||||
EXPECT_EQ(dst[0], dst_ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST(AV1ConvolveTest, av1_convolve_avg) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
#if CONFIG_DUAL_FILTER
|
||||
|
|
Загрузка…
Ссылка в новой задаче