Do interpolation with lower-tap filter first
There is 0.003% BDRate change on lowres dataset. Change-Id: Ie62a5ea07bdcfe0a62f37e8f981382df4cc59918
This commit is contained in:
Родитель
d7ec47f0c2
Коммит
b79424e973
|
@ -146,43 +146,79 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
|
||||||
av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
|
av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
|
||||||
subpel_y_q4, y_step_q4, ref_idx);
|
subpel_y_q4, y_step_q4, ref_idx);
|
||||||
} else {
|
} else {
|
||||||
// temp's size is set to (maximum possible intermediate_height) *
|
// temp's size is set to (maximum possible intermediate height or width) *
|
||||||
// MAX_BLOCK_WIDTH
|
// MAX_SB_SIZE
|
||||||
uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
|
uint8_t temp[((((MAX_SB_SIZE - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
|
||||||
MAX_FILTER_TAP) *
|
MAX_FILTER_TAP) *
|
||||||
MAX_BLOCK_WIDTH];
|
MAX_SB_SIZE];
|
||||||
int temp_stride = MAX_BLOCK_WIDTH;
|
int filter_size;
|
||||||
|
InterpFilterParams filter_params;
|
||||||
#if CONFIG_DUAL_FILTER
|
#if CONFIG_DUAL_FILTER
|
||||||
InterpFilterParams filter_params_x =
|
InterpFilterParams filter_params_x =
|
||||||
av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
|
av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
|
||||||
InterpFilterParams filter_params_y =
|
InterpFilterParams filter_params_y =
|
||||||
av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
|
av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
|
||||||
InterpFilterParams filter_params = filter_params_x;
|
|
||||||
|
|
||||||
// The filter size implies the required number of reference pixels for
|
|
||||||
// the second stage filtering. It is possible that the two directions
|
|
||||||
// require different filter sizes.
|
|
||||||
int filter_size = filter_params_y.taps;
|
|
||||||
#else
|
|
||||||
InterpFilterParams filter_params =
|
|
||||||
av1_get_interp_filter_params(interp_filter);
|
|
||||||
int filter_size = filter_params.taps;
|
|
||||||
#endif
|
#endif
|
||||||
int intermediate_height =
|
|
||||||
|
#if CONFIG_DUAL_FILTER
|
||||||
|
// we do filter with fewer taps first to reduce hardware implementation
|
||||||
|
// complexity
|
||||||
|
if (filter_params_y.taps < filter_params_x.taps) {
|
||||||
|
int intermediate_width;
|
||||||
|
int temp_stride;
|
||||||
|
#if CONFIG_DUAL_FILTER
|
||||||
|
filter_params = filter_params_y;
|
||||||
|
filter_size = filter_params_x.taps;
|
||||||
|
#else
|
||||||
|
filter_params = av1_get_interp_filter_params(interp_filter);
|
||||||
|
filter_size = filter_params.taps;
|
||||||
|
#endif
|
||||||
|
intermediate_width =
|
||||||
|
(((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
|
||||||
|
temp_stride = intermediate_width;
|
||||||
|
|
||||||
|
assert(filter_params.taps <= MAX_FILTER_TAP);
|
||||||
|
|
||||||
|
av1_convolve_vert(src - (filter_size / 2 - 1), src_stride, temp,
|
||||||
|
temp_stride, intermediate_width, h, filter_params,
|
||||||
|
subpel_y_q4, y_step_q4, 0);
|
||||||
|
|
||||||
|
#if CONFIG_DUAL_FILTER
|
||||||
|
filter_params = filter_params_x;
|
||||||
|
#else
|
||||||
|
filter_params = av1_get_interp_filter_params(interp_filter);
|
||||||
|
#endif
|
||||||
|
assert(filter_params.taps <= MAX_FILTER_TAP);
|
||||||
|
|
||||||
|
av1_convolve_horiz(temp + (filter_size / 2 - 1), temp_stride, dst,
|
||||||
|
dst_stride, w, h, filter_params, subpel_x_q4,
|
||||||
|
x_step_q4, ref_idx);
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
int intermediate_height;
|
||||||
|
int temp_stride = w;
|
||||||
|
#if CONFIG_DUAL_FILTER
|
||||||
|
filter_params = filter_params_x;
|
||||||
|
filter_size = filter_params_y.taps;
|
||||||
|
#else
|
||||||
|
filter_params = av1_get_interp_filter_params(interp_filter);
|
||||||
|
filter_size = filter_params.taps;
|
||||||
|
#endif
|
||||||
|
intermediate_height =
|
||||||
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
|
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
|
||||||
|
|
||||||
assert(filter_params.taps <= MAX_FILTER_TAP);
|
assert(filter_params.taps <= MAX_FILTER_TAP);
|
||||||
|
|
||||||
av1_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
|
av1_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
|
||||||
temp, temp_stride, w, intermediate_height, filter_params,
|
temp, temp_stride, w, intermediate_height,
|
||||||
subpel_x_q4, x_step_q4, 0);
|
filter_params, subpel_x_q4, x_step_q4, 0);
|
||||||
|
|
||||||
#if CONFIG_DUAL_FILTER
|
#if CONFIG_DUAL_FILTER
|
||||||
filter_params = filter_params_y;
|
filter_params = filter_params_y;
|
||||||
#else
|
#else
|
||||||
filter_params = av1_get_interp_filter_params(interp_filter);
|
filter_params = av1_get_interp_filter_params(interp_filter);
|
||||||
#endif
|
#endif
|
||||||
filter_size = filter_params.taps;
|
|
||||||
assert(filter_params.taps <= MAX_FILTER_TAP);
|
assert(filter_params.taps <= MAX_FILTER_TAP);
|
||||||
|
|
||||||
av1_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
|
av1_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
|
||||||
|
@ -190,6 +226,7 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
|
||||||
y_step_q4, ref_idx);
|
y_step_q4, ref_idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void av1_convolve_init_c(void) {
|
void av1_convolve_init_c(void) {
|
||||||
// A placeholder for SIMD initialization
|
// A placeholder for SIMD initialization
|
||||||
|
|
|
@ -137,6 +137,68 @@ TEST(AV1ConvolveTest, av1_convolve) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CONFIG_EXT_INTERP && CONFIG_DUAL_FILTER
|
||||||
|
TEST(AV1ConvolveTest, av1_convolve_vert_first) {
|
||||||
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
|
InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, MULTITAP_SHARP,
|
||||||
|
EIGHTTAP_REGULAR, MULTITAP_SHARP };
|
||||||
|
InterpFilterParams filter_params_x =
|
||||||
|
av1_get_interp_filter_params(interp_filter[1]);
|
||||||
|
InterpFilterParams filter_params_y =
|
||||||
|
av1_get_interp_filter_params(interp_filter[0]);
|
||||||
|
int filter_size_x = filter_params_x.taps;
|
||||||
|
int filter_size_y = filter_params_y.taps;
|
||||||
|
int filter_center_x = filter_size_x / 2 - 1;
|
||||||
|
int filter_center_y = filter_size_y / 2 - 1;
|
||||||
|
uint8_t src[12 * 12];
|
||||||
|
int src_stride = filter_size_x;
|
||||||
|
uint8_t dst[1] = { 0 };
|
||||||
|
int dst_stride = 1;
|
||||||
|
int x_step_q4 = 16;
|
||||||
|
int y_step_q4 = 16;
|
||||||
|
int avg = 0;
|
||||||
|
int w = 1;
|
||||||
|
int h = 1;
|
||||||
|
|
||||||
|
int subpel_x_q4;
|
||||||
|
int subpel_y_q4;
|
||||||
|
|
||||||
|
ASSERT_LE(filter_size_x, 12);
|
||||||
|
ASSERT_LE(filter_size_y, 12);
|
||||||
|
setup_convolve();
|
||||||
|
|
||||||
|
for (int i = 0; i < static_cast<int>(sizeof(src) / sizeof(src[0])); i++) {
|
||||||
|
src[i] = rnd.Rand16() % (1 << 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (subpel_x_q4 = 1; subpel_x_q4 < 2; subpel_x_q4++) {
|
||||||
|
for (subpel_y_q4 = 1; subpel_y_q4 < 2; subpel_y_q4++) {
|
||||||
|
av1_convolve(src + src_stride * filter_center_y + filter_center_x,
|
||||||
|
src_stride, dst, dst_stride, w, h, interp_filter,
|
||||||
|
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
|
||||||
|
|
||||||
|
const int16_t *x_filter =
|
||||||
|
av1_get_interp_filter_subpel_kernel(filter_params_x, subpel_x_q4);
|
||||||
|
const int16_t *y_filter =
|
||||||
|
av1_get_interp_filter_subpel_kernel(filter_params_y, subpel_y_q4);
|
||||||
|
|
||||||
|
int temp[12];
|
||||||
|
int dst_ref = 0;
|
||||||
|
for (int c = 0; c < filter_size_x; c++) {
|
||||||
|
temp[c] = 0;
|
||||||
|
for (int r = 0; r < filter_size_y; r++) {
|
||||||
|
temp[c] += y_filter[r] * src[r * filter_size_x + c];
|
||||||
|
}
|
||||||
|
temp[c] = clip_pixel(ROUND_POWER_OF_TWO(temp[c], FILTER_BITS));
|
||||||
|
dst_ref += temp[c] * x_filter[c];
|
||||||
|
}
|
||||||
|
dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
|
||||||
|
EXPECT_EQ(dst[0], dst_ref);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
TEST(AV1ConvolveTest, av1_convolve_avg) {
|
TEST(AV1ConvolveTest, av1_convolve_avg) {
|
||||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
#if CONFIG_DUAL_FILTER
|
#if CONFIG_DUAL_FILTER
|
||||||
|
|
Загрузка…
Ссылка в новой задаче