From b79424e9733cfd9af7ace5a8f98fa28490e2c679 Mon Sep 17 00:00:00 2001 From: Angie Chiang Date: Mon, 28 Nov 2016 18:49:51 -0800 Subject: [PATCH] Do interpolation with lower-tap filter first There is 0.003% BDRate change on lowres dataset. Change-Id: Ie62a5ea07bdcfe0a62f37e8f981382df4cc59918 --- av1/common/convolve.c | 97 +++++++++++++++++++++++++++------------ test/av1_convolve_test.cc | 62 +++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 30 deletions(-) diff --git a/av1/common/convolve.c b/av1/common/convolve.c index 975c759d8..bb0f4d3ea 100644 --- a/av1/common/convolve.c +++ b/av1/common/convolve.c @@ -146,48 +146,85 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst, av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, ref_idx); } else { - // temp's size is set to (maximum possible intermediate_height) * - // MAX_BLOCK_WIDTH - uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) + + // temp's size is set to (maximum possible intermediate height or width) * + // MAX_SB_SIZE + uint8_t temp[((((MAX_SB_SIZE - 1) * MAX_STEP + 15) >> SUBPEL_BITS) + MAX_FILTER_TAP) * - MAX_BLOCK_WIDTH]; - int temp_stride = MAX_BLOCK_WIDTH; + MAX_SB_SIZE]; + int filter_size; + InterpFilterParams filter_params; #if CONFIG_DUAL_FILTER InterpFilterParams filter_params_x = av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]); InterpFilterParams filter_params_y = av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]); - InterpFilterParams filter_params = filter_params_x; - - // The filter size implies the required number of reference pixels for - // the second stage filtering. It is possible that the two directions - // require different filter sizes. - int filter_size = filter_params_y.taps; -#else - InterpFilterParams filter_params = - av1_get_interp_filter_params(interp_filter); - int filter_size = filter_params.taps; #endif - int intermediate_height = - (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size; - - assert(filter_params.taps <= MAX_FILTER_TAP); - - av1_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, - temp, temp_stride, w, intermediate_height, filter_params, - subpel_x_q4, x_step_q4, 0); #if CONFIG_DUAL_FILTER - filter_params = filter_params_y; + // we do filter with fewer taps first to reduce hardware implementation + // complexity + if (filter_params_y.taps < filter_params_x.taps) { + int intermediate_width; + int temp_stride; +#if CONFIG_DUAL_FILTER + filter_params = filter_params_y; + filter_size = filter_params_x.taps; #else - filter_params = av1_get_interp_filter_params(interp_filter); + filter_params = av1_get_interp_filter_params(interp_filter); + filter_size = filter_params.taps; #endif - filter_size = filter_params.taps; - assert(filter_params.taps <= MAX_FILTER_TAP); + intermediate_width = + (((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size; + temp_stride = intermediate_width; - av1_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride, - dst, dst_stride, w, h, filter_params, subpel_y_q4, - y_step_q4, ref_idx); + assert(filter_params.taps <= MAX_FILTER_TAP); + + av1_convolve_vert(src - (filter_size / 2 - 1), src_stride, temp, + temp_stride, intermediate_width, h, filter_params, + subpel_y_q4, y_step_q4, 0); + +#if CONFIG_DUAL_FILTER + filter_params = filter_params_x; +#else + filter_params = av1_get_interp_filter_params(interp_filter); +#endif + assert(filter_params.taps <= MAX_FILTER_TAP); + + av1_convolve_horiz(temp + (filter_size / 2 - 1), temp_stride, dst, + dst_stride, w, h, filter_params, subpel_x_q4, + x_step_q4, ref_idx); + } else +#endif + { + int intermediate_height; + int temp_stride = w; +#if CONFIG_DUAL_FILTER + filter_params = filter_params_x; + filter_size = filter_params_y.taps; +#else + filter_params = av1_get_interp_filter_params(interp_filter); + filter_size = filter_params.taps; +#endif + intermediate_height = + (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size; + + assert(filter_params.taps <= MAX_FILTER_TAP); + + av1_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, + temp, temp_stride, w, intermediate_height, + filter_params, subpel_x_q4, x_step_q4, 0); + +#if CONFIG_DUAL_FILTER + filter_params = filter_params_y; +#else + filter_params = av1_get_interp_filter_params(interp_filter); +#endif + assert(filter_params.taps <= MAX_FILTER_TAP); + + av1_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride, + dst, dst_stride, w, h, filter_params, subpel_y_q4, + y_step_q4, ref_idx); + } } } diff --git a/test/av1_convolve_test.cc b/test/av1_convolve_test.cc index fda5b8035..23501823d 100644 --- a/test/av1_convolve_test.cc +++ b/test/av1_convolve_test.cc @@ -137,6 +137,68 @@ TEST(AV1ConvolveTest, av1_convolve) { } } +#if CONFIG_EXT_INTERP && CONFIG_DUAL_FILTER +TEST(AV1ConvolveTest, av1_convolve_vert_first) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, MULTITAP_SHARP, + EIGHTTAP_REGULAR, MULTITAP_SHARP }; + InterpFilterParams filter_params_x = + av1_get_interp_filter_params(interp_filter[1]); + InterpFilterParams filter_params_y = + av1_get_interp_filter_params(interp_filter[0]); + int filter_size_x = filter_params_x.taps; + int filter_size_y = filter_params_y.taps; + int filter_center_x = filter_size_x / 2 - 1; + int filter_center_y = filter_size_y / 2 - 1; + uint8_t src[12 * 12]; + int src_stride = filter_size_x; + uint8_t dst[1] = { 0 }; + int dst_stride = 1; + int x_step_q4 = 16; + int y_step_q4 = 16; + int avg = 0; + int w = 1; + int h = 1; + + int subpel_x_q4; + int subpel_y_q4; + + ASSERT_LE(filter_size_x, 12); + ASSERT_LE(filter_size_y, 12); + setup_convolve(); + + for (int i = 0; i < static_cast(sizeof(src) / sizeof(src[0])); i++) { + src[i] = rnd.Rand16() % (1 << 8); + } + + for (subpel_x_q4 = 1; subpel_x_q4 < 2; subpel_x_q4++) { + for (subpel_y_q4 = 1; subpel_y_q4 < 2; subpel_y_q4++) { + av1_convolve(src + src_stride * filter_center_y + filter_center_x, + src_stride, dst, dst_stride, w, h, interp_filter, + subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg); + + const int16_t *x_filter = + av1_get_interp_filter_subpel_kernel(filter_params_x, subpel_x_q4); + const int16_t *y_filter = + av1_get_interp_filter_subpel_kernel(filter_params_y, subpel_y_q4); + + int temp[12]; + int dst_ref = 0; + for (int c = 0; c < filter_size_x; c++) { + temp[c] = 0; + for (int r = 0; r < filter_size_y; r++) { + temp[c] += y_filter[r] * src[r * filter_size_x + c]; + } + temp[c] = clip_pixel(ROUND_POWER_OF_TWO(temp[c], FILTER_BITS)); + dst_ref += temp[c] * x_filter[c]; + } + dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS)); + EXPECT_EQ(dst[0], dst_ref); + } + } +} +#endif + TEST(AV1ConvolveTest, av1_convolve_avg) { ACMRandom rnd(ACMRandom::DeterministicSeed()); #if CONFIG_DUAL_FILTER