From 6fa47a5f16cb5631713ccc03d9b11f5345564abe Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Mon, 16 Jan 2012 11:21:12 -0800 Subject: [PATCH] Adds support for enhanced interpolation for subpel motion using an 8-tap filter. The results with 3 different 8-tap filters on the derf set are in: http://www.corp.google.com/~debargha/vp8_results/enhinterp.html The one that gives the most gain achieves an overall gain of about 0.6%. The results for a set of 12 hd (720p) videos are in: http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd.html with max gain of 0.55% with the same filter. The best filter apparently achieves the best trade-off between pass band ripple and stop band attenuation. Change-Id: I919e28ae245c0493147fa0864f8c9d048a9dd530 --- configure | 1 + vp8/common/filter.c | 189 ++++++++++++++++++++++++++++++++++------- vp8/common/filter.h | 6 +- vp8/encoder/mbgraph.c | 8 +- vp8/encoder/onyx_if.c | 1 + vpx_scale/yv12config.h | 7 ++ 6 files changed, 175 insertions(+), 37 deletions(-) diff --git a/configure b/configure index a955d1466..c6bce0461 100755 --- a/configure +++ b/configure @@ -227,6 +227,7 @@ EXPERIMENT_LIST=" uvintra newnear newlpf + enhanced_interp " CONFIG_LIST=" external_build diff --git a/vp8/common/filter.c b/vp8/common/filter.c index 256ba4736..91022c1e7 100644 --- a/vp8/common/filter.c +++ b/vp8/common/filter.c @@ -25,6 +25,50 @@ DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = { 16, 112 } }; +#if CONFIG_ENHANCED_INTERP +#define FILTER_ALPHA 75 +DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][2*INTERP_EXTEND]) = +{ + /* Generated using MATLAB: + * alpha = 0.75; + * b=intfilt(8,4,alpha); + * bi=round(128*b); + * ba=flipud(reshape([bi 0], 8, 8)); + * disp(num2str(ba, '%d,')) + */ +#if FILTER_ALPHA == 75 + /* alpha = 0.75 */ + { 0, 0, 0, 128, 0, 0, 0, 0}, + {-1, 4, -11, 123, 18, -7, 3, -1}, + {-2, 7, -19, 113, 38, -14, 6, -2}, + {-3, 9, -22, 98, 59, -19, 8, -3}, + {-3, 9, -22, 80, 80, -22, 9, -3}, + {-3, 8, -19, 59, 98, -22, 9, -3}, + {-2, 6, -14, 38, 113, -19, 7, -2}, + {-1, 3, -7, 18, 123, -11, 4, -1} +#elif FILTER_ALPHA == 625 + /* alpha = 0.625 */ + { 0, 0, 0, 128, 0, 0, 0, 0}, + {-1, 3, -10, 123, 18, -6, 2, -1}, + {-1, 5, -17, 112, 38, -12, 4, -1}, + {-1, 6, -20, 97, 58, -17, 6, -1}, + {-2, 7, -20, 79, 79, -20, 7, -2}, + {-1, 6, -17, 58, 97, -20, 6, -1}, + {-1, 4, -12, 38, 112, -17, 5, -1}, + {-1, 2, -6, 18, 123, -10, 3, -1} +#elif FILTER_ALPHA == 50 + /* alpha = 0.5 */ + { 0, 0, 0, 128, 0, 0, 0, 0}, + { 0, 2, -10, 122, 18, -6, 2, 0}, + {-1, 4, -16, 112, 37, -11, 3, -1}, + {-1, 5, -18, 96, 58, -16, 4, -1}, + {-1, 5, -18, 78, 78, -18, 5, -1}, + {-1, 4, -16, 58, 96, -18, 5, -1}, + {-1, 3, -11, 37, 112, -16, 4, -1}, + { 0, 2, -6, 18, 122, -10, 2, 0} +#endif +}; +#else DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = { @@ -37,6 +81,7 @@ DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ { 0, -1, 12, 123, -6, 0 }, }; +#endif static void filter_block2d_first_pass ( @@ -56,13 +101,37 @@ static void filter_block2d_first_pass { for (j = 0; j < output_width; j++) { +#if INTERP_EXTEND == 3 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + - ((int)src_ptr[0] * vp8_filter[2]) + - ((int)src_ptr[pixel_step] * vp8_filter[3]) + - ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + - ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + + ((int)src_ptr[0] * vp8_filter[2]) + + ((int)src_ptr[pixel_step] * vp8_filter[3]) + + ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + + ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ +#elif INTERP_EXTEND == 4 + Temp = ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[0]) + + ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[1]) + + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[2]) + + ((int)src_ptr[0] * vp8_filter[3]) + + ((int)src_ptr[pixel_step] * vp8_filter[4]) + + ((int)src_ptr[2 * pixel_step] * vp8_filter[5]) + + ((int)src_ptr[3 * pixel_step] * vp8_filter[6]) + + ((int)src_ptr[4 * pixel_step] * vp8_filter[7]) + + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ +#elif INTERP_EXTEND == 5 + Temp = ((int)src_ptr[-4 * (int)pixel_step] * vp8_filter[0]) + + ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[1]) + + ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[2]) + + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[3]) + + ((int)src_ptr[0] * vp8_filter[4]) + + ((int)src_ptr[pixel_step] * vp8_filter[5]) + + ((int)src_ptr[2 * pixel_step] * vp8_filter[6]) + + ((int)src_ptr[3 * pixel_step] * vp8_filter[7]) + + ((int)src_ptr[4 * pixel_step] * vp8_filter[8]) + + ((int)src_ptr[5 * pixel_step] * vp8_filter[9]) + + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ +#endif /* Normalize back to 0-255 */ Temp = Temp >> VP8_FILTER_SHIFT; @@ -102,13 +171,37 @@ static void filter_block2d_second_pass for (j = 0; j < output_width; j++) { /* Apply filter */ +#if INTERP_EXTEND == 3 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + - ((int)src_ptr[0] * vp8_filter[2]) + - ((int)src_ptr[pixel_step] * vp8_filter[3]) + - ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + - ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + + ((int)src_ptr[0] * vp8_filter[2]) + + ((int)src_ptr[pixel_step] * vp8_filter[3]) + + ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + + ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ +#elif INTERP_EXTEND == 4 + Temp = ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[0]) + + ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[1]) + + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[2]) + + ((int)src_ptr[0] * vp8_filter[3]) + + ((int)src_ptr[pixel_step] * vp8_filter[4]) + + ((int)src_ptr[2 * pixel_step] * vp8_filter[5]) + + ((int)src_ptr[3 * pixel_step] * vp8_filter[6]) + + ((int)src_ptr[4 * pixel_step] * vp8_filter[7]) + + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ +#elif INTERP_EXTEND == 5 + Temp = ((int)src_ptr[-4 * (int)pixel_step] * vp8_filter[0]) + + ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[1]) + + ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[2]) + + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[3]) + + ((int)src_ptr[0] * vp8_filter[4]) + + ((int)src_ptr[pixel_step] * vp8_filter[5]) + + ((int)src_ptr[2 * pixel_step] * vp8_filter[6]) + + ((int)src_ptr[3 * pixel_step] * vp8_filter[7]) + + ((int)src_ptr[4 * pixel_step] * vp8_filter[8]) + + ((int)src_ptr[5 * pixel_step] * vp8_filter[9]) + + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ +#endif /* Normalize back to 0-255 */ Temp = Temp >> VP8_FILTER_SHIFT; @@ -157,13 +250,37 @@ static void filter_block2d_second_pass_avg for (j = 0; j < output_width; j++) { /* Apply filter */ +#if INTERP_EXTEND == 3 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + - ((int)src_ptr[0] * vp8_filter[2]) + - ((int)src_ptr[pixel_step] * vp8_filter[3]) + - ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + - ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + + ((int)src_ptr[0] * vp8_filter[2]) + + ((int)src_ptr[pixel_step] * vp8_filter[3]) + + ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + + ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ +#elif INTERP_EXTEND == 4 + Temp = ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[0]) + + ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[1]) + + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[2]) + + ((int)src_ptr[0] * vp8_filter[3]) + + ((int)src_ptr[pixel_step] * vp8_filter[4]) + + ((int)src_ptr[2 * pixel_step] * vp8_filter[5]) + + ((int)src_ptr[3 * pixel_step] * vp8_filter[6]) + + ((int)src_ptr[4 * pixel_step] * vp8_filter[7]) + + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ +#elif INTERP_EXTEND == 5 + Temp = ((int)src_ptr[-4 * (int)pixel_step] * vp8_filter[0]) + + ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[1]) + + ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[2]) + + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[3]) + + ((int)src_ptr[0] * vp8_filter[4]) + + ((int)src_ptr[pixel_step] * vp8_filter[5]) + + ((int)src_ptr[2 * pixel_step] * vp8_filter[6]) + + ((int)src_ptr[3 * pixel_step] * vp8_filter[7]) + + ((int)src_ptr[4 * pixel_step] * vp8_filter[8]) + + ((int)src_ptr[5 * pixel_step] * vp8_filter[9]) + + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ +#endif /* Normalize back to 0-255 */ Temp = Temp >> VP8_FILTER_SHIFT; @@ -194,13 +311,14 @@ static void filter_block2d const short *VFilter ) { - int FData[9*4]; /* Temp data buffer used in filtering */ + int FData[(3+INTERP_EXTEND*2)*4]; /* Temp data buffer used in filtering */ /* First filter 1-D horizontally... */ - filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter); + filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1, + 3+INTERP_EXTEND*2, 4, HFilter); /* then filter verticaly... */ - filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter); + filter_block2d_second_pass(FData + 4*(INTERP_EXTEND-1), output_ptr, output_pitch, 4, 4, 4, 4, VFilter); } @@ -234,17 +352,19 @@ void vp8_sixtap_predict8x8_c { const short *HFilter; const short *VFilter; - int FData[13*16]; /* Temp data buffer used in filtering */ + // int FData[(7+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */ + int FData[(7+INTERP_EXTEND*2)*8]; /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ - filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); + filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1, + 7+INTERP_EXTEND*2, 8, HFilter); /* then filter verticaly... */ - filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); + filter_block2d_second_pass(FData + 8*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); } @@ -261,16 +381,18 @@ void vp8_sixtap_predict_avg8x8_c { const short *HFilter; const short *VFilter; - int FData[13*16]; /* Temp data buffer used in filtering */ + // int FData[(7+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */ + int FData[(7+INTERP_EXTEND*2)*8]; /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ - filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); + filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1, + 7+INTERP_EXTEND*2, 8, HFilter); /* then filter verticaly... */ - filter_block2d_second_pass_avg(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); + filter_block2d_second_pass_avg(FData + 8*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); } #endif /* CONFIG_DUALPRED */ @@ -286,17 +408,19 @@ void vp8_sixtap_predict8x4_c { const short *HFilter; const short *VFilter; - int FData[13*16]; /* Temp data buffer used in filtering */ + // int FData[(7+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */ + int FData[(3+INTERP_EXTEND*2)*8]; /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ - filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter); + filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1, + 3+INTERP_EXTEND*2, 8, HFilter); /* then filter verticaly... */ - filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); + filter_block2d_second_pass(FData + 8*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); } @@ -312,17 +436,19 @@ void vp8_sixtap_predict16x16_c { const short *HFilter; const short *VFilter; - int FData[21*24]; /* Temp data buffer used in filtering */ + // int FData[(15+INTERP_EXTEND*2)*24]; /* Temp data buffer used in filtering */ + int FData[(15+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ - filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); + filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1, + 15+INTERP_EXTEND*2, 16, HFilter); /* then filter verticaly... */ - filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); + filter_block2d_second_pass(FData + 16*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); } @@ -339,17 +465,18 @@ void vp8_sixtap_predict_avg16x16_c { const short *HFilter; const short *VFilter; - int FData[21*24]; /* Temp data buffer used in filtering */ + // int FData[(15+INTERP_EXTEND*2)*24]; /* Temp data buffer used in filtering */ + int FData[(15+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ - filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, - src_pixels_per_line, 1, 21, 16, HFilter); + filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, + src_pixels_per_line, 1, 15+INTERP_EXTEND*2, 16, HFilter); /* then filter verticaly... */ - filter_block2d_second_pass_avg(FData + 32, dst_ptr, dst_pitch, + filter_block2d_second_pass_avg(FData + 16*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); } #endif /* CONFIG_DUALPRED */ diff --git a/vp8/common/filter.h b/vp8/common/filter.h index 0f225c25a..1a4738975 100644 --- a/vp8/common/filter.h +++ b/vp8/common/filter.h @@ -8,15 +8,17 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef FILTER_H #define FILTER_H +#include "vpx_config.h" +#include "vpx_scale/yv12config.h" + #define BLOCK_HEIGHT_WIDTH 4 #define VP8_FILTER_WEIGHT 128 #define VP8_FILTER_SHIFT 7 extern const short vp8_bilinear_filters[8][2]; -extern const short vp8_sub_pel_filters[8][6]; +extern const short vp8_sub_pel_filters[8][INTERP_EXTEND*2]; #endif //FILTER_H diff --git a/vp8/encoder/mbgraph.c b/vp8/encoder/mbgraph.c index 79b7fd595..18c30a417 100644 --- a/vp8/encoder/mbgraph.c +++ b/vp8/encoder/mbgraph.c @@ -335,8 +335,8 @@ static void update_mbgraph_frame_stats // Set up limit values for motion vectors to prevent them extending outside the UMV borders arf_top_mv.as_int = 0; gld_top_mv.as_int = 0; - x->mv_row_min = -(VP8BORDERINPIXELS - 19); - x->mv_row_max = (cm->mb_rows - 1) * 16 + VP8BORDERINPIXELS - 19; + x->mv_row_min = -(VP8BORDERINPIXELS - 16 - INTERP_EXTEND); + x->mv_row_max = (cm->mb_rows - 1) * 16 + VP8BORDERINPIXELS - 16 - INTERP_EXTEND; xd->up_available = 0; xd->dst.y_stride = buf->y_stride; xd->pre.y_stride = buf->y_stride; @@ -353,8 +353,8 @@ static void update_mbgraph_frame_stats // Set up limit values for motion vectors to prevent them extending outside the UMV borders arf_left_mv.as_int = arf_top_mv.as_int; gld_left_mv.as_int = gld_top_mv.as_int; - x->mv_col_min = -(VP8BORDERINPIXELS - 19); - x->mv_col_max = (cm->mb_cols - 1) * 16 + VP8BORDERINPIXELS - 19; + x->mv_col_min = -(VP8BORDERINPIXELS - 16 - INTERP_EXTEND); + x->mv_col_max = (cm->mb_cols - 1) * 16 + VP8BORDERINPIXELS - 16 - INTERP_EXTEND; xd->left_available = 0; for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 6715ac2f6..639d53dbc 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -268,6 +268,7 @@ void vp8_initialize() { static int init_done = 0; + if (!init_done) { vp8_scale_machine_specific_config(); diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h index 3cc4746f7..e78046720 100644 --- a/vpx_scale/yv12config.h +++ b/vpx_scale/yv12config.h @@ -17,7 +17,14 @@ extern "C" #endif #define VP7BORDERINPIXELS 48 + +#if CONFIG_ENHANCED_INTERP +#define VP8BORDERINPIXELS 64 +#define INTERP_EXTEND 4 +#else #define VP8BORDERINPIXELS 32 +#define INTERP_EXTEND 3 +#endif /************************************* For INT_YUV: