Adds support for enhanced interpolation for subpel motion

using an 8-tap filter.

The results with 3 different 8-tap filters on the derf set are in:
http://www.corp.google.com/~debargha/vp8_results/enhinterp.html
The one that gives the most gain achieves an overall gain of about
0.6%. The results for a set of 12 hd (720p) videos are in:
http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd.html
with max gain of 0.55% with the same filter. The best filter apparently
achieves the best trade-off between pass band ripple and stop band
attenuation.

Change-Id: I919e28ae245c0493147fa0864f8c9d048a9dd530
This commit is contained in:
Deb Mukherjee 2012-01-16 11:21:12 -08:00
Родитель 5a5d24eed6
Коммит 6fa47a5f16
6 изменённых файлов: 175 добавлений и 37 удалений

1
configure поставляемый
Просмотреть файл

@ -227,6 +227,7 @@ EXPERIMENT_LIST="
uvintra
newnear
newlpf
enhanced_interp
"
CONFIG_LIST="
external_build

Просмотреть файл

@ -25,6 +25,50 @@ DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
{ 16, 112 }
};
#if CONFIG_ENHANCED_INTERP
#define FILTER_ALPHA 75
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][2*INTERP_EXTEND]) =
{
/* Generated using MATLAB:
* alpha = 0.75;
* b=intfilt(8,4,alpha);
* bi=round(128*b);
* ba=flipud(reshape([bi 0], 8, 8));
* disp(num2str(ba, '%d,'))
*/
#if FILTER_ALPHA == 75
/* alpha = 0.75 */
{ 0, 0, 0, 128, 0, 0, 0, 0},
{-1, 4, -11, 123, 18, -7, 3, -1},
{-2, 7, -19, 113, 38, -14, 6, -2},
{-3, 9, -22, 98, 59, -19, 8, -3},
{-3, 9, -22, 80, 80, -22, 9, -3},
{-3, 8, -19, 59, 98, -22, 9, -3},
{-2, 6, -14, 38, 113, -19, 7, -2},
{-1, 3, -7, 18, 123, -11, 4, -1}
#elif FILTER_ALPHA == 625
/* alpha = 0.625 */
{ 0, 0, 0, 128, 0, 0, 0, 0},
{-1, 3, -10, 123, 18, -6, 2, -1},
{-1, 5, -17, 112, 38, -12, 4, -1},
{-1, 6, -20, 97, 58, -17, 6, -1},
{-2, 7, -20, 79, 79, -20, 7, -2},
{-1, 6, -17, 58, 97, -20, 6, -1},
{-1, 4, -12, 38, 112, -17, 5, -1},
{-1, 2, -6, 18, 123, -10, 3, -1}
#elif FILTER_ALPHA == 50
/* alpha = 0.5 */
{ 0, 0, 0, 128, 0, 0, 0, 0},
{ 0, 2, -10, 122, 18, -6, 2, 0},
{-1, 4, -16, 112, 37, -11, 3, -1},
{-1, 5, -18, 96, 58, -16, 4, -1},
{-1, 5, -18, 78, 78, -18, 5, -1},
{-1, 4, -16, 58, 96, -18, 5, -1},
{-1, 3, -11, 37, 112, -16, 4, -1},
{ 0, 2, -6, 18, 122, -10, 2, 0}
#endif
};
#else
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
{
@ -37,6 +81,7 @@ DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
{ 0, -1, 12, 123, -6, 0 },
};
#endif
static void filter_block2d_first_pass
(
@ -56,6 +101,7 @@ static void filter_block2d_first_pass
{
for (j = 0; j < output_width; j++)
{
#if INTERP_EXTEND == 3
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[0] * vp8_filter[2]) +
@ -63,6 +109,29 @@ static void filter_block2d_first_pass
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
#elif INTERP_EXTEND == 4
Temp = ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[2]) +
((int)src_ptr[0] * vp8_filter[3]) +
((int)src_ptr[pixel_step] * vp8_filter[4]) +
((int)src_ptr[2 * pixel_step] * vp8_filter[5]) +
((int)src_ptr[3 * pixel_step] * vp8_filter[6]) +
((int)src_ptr[4 * pixel_step] * vp8_filter[7]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
#elif INTERP_EXTEND == 5
Temp = ((int)src_ptr[-4 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[2]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[3]) +
((int)src_ptr[0] * vp8_filter[4]) +
((int)src_ptr[pixel_step] * vp8_filter[5]) +
((int)src_ptr[2 * pixel_step] * vp8_filter[6]) +
((int)src_ptr[3 * pixel_step] * vp8_filter[7]) +
((int)src_ptr[4 * pixel_step] * vp8_filter[8]) +
((int)src_ptr[5 * pixel_step] * vp8_filter[9]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
#endif
/* Normalize back to 0-255 */
Temp = Temp >> VP8_FILTER_SHIFT;
@ -102,6 +171,7 @@ static void filter_block2d_second_pass
for (j = 0; j < output_width; j++)
{
/* Apply filter */
#if INTERP_EXTEND == 3
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[0] * vp8_filter[2]) +
@ -109,6 +179,29 @@ static void filter_block2d_second_pass
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
#elif INTERP_EXTEND == 4
Temp = ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[2]) +
((int)src_ptr[0] * vp8_filter[3]) +
((int)src_ptr[pixel_step] * vp8_filter[4]) +
((int)src_ptr[2 * pixel_step] * vp8_filter[5]) +
((int)src_ptr[3 * pixel_step] * vp8_filter[6]) +
((int)src_ptr[4 * pixel_step] * vp8_filter[7]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
#elif INTERP_EXTEND == 5
Temp = ((int)src_ptr[-4 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[2]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[3]) +
((int)src_ptr[0] * vp8_filter[4]) +
((int)src_ptr[pixel_step] * vp8_filter[5]) +
((int)src_ptr[2 * pixel_step] * vp8_filter[6]) +
((int)src_ptr[3 * pixel_step] * vp8_filter[7]) +
((int)src_ptr[4 * pixel_step] * vp8_filter[8]) +
((int)src_ptr[5 * pixel_step] * vp8_filter[9]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
#endif
/* Normalize back to 0-255 */
Temp = Temp >> VP8_FILTER_SHIFT;
@ -157,6 +250,7 @@ static void filter_block2d_second_pass_avg
for (j = 0; j < output_width; j++)
{
/* Apply filter */
#if INTERP_EXTEND == 3
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[0] * vp8_filter[2]) +
@ -164,6 +258,29 @@ static void filter_block2d_second_pass_avg
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
#elif INTERP_EXTEND == 4
Temp = ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[2]) +
((int)src_ptr[0] * vp8_filter[3]) +
((int)src_ptr[pixel_step] * vp8_filter[4]) +
((int)src_ptr[2 * pixel_step] * vp8_filter[5]) +
((int)src_ptr[3 * pixel_step] * vp8_filter[6]) +
((int)src_ptr[4 * pixel_step] * vp8_filter[7]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
#elif INTERP_EXTEND == 5
Temp = ((int)src_ptr[-4 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[2]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[3]) +
((int)src_ptr[0] * vp8_filter[4]) +
((int)src_ptr[pixel_step] * vp8_filter[5]) +
((int)src_ptr[2 * pixel_step] * vp8_filter[6]) +
((int)src_ptr[3 * pixel_step] * vp8_filter[7]) +
((int)src_ptr[4 * pixel_step] * vp8_filter[8]) +
((int)src_ptr[5 * pixel_step] * vp8_filter[9]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
#endif
/* Normalize back to 0-255 */
Temp = Temp >> VP8_FILTER_SHIFT;
@ -194,13 +311,14 @@ static void filter_block2d
const short *VFilter
)
{
int FData[9*4]; /* Temp data buffer used in filtering */
int FData[(3+INTERP_EXTEND*2)*4]; /* Temp data buffer used in filtering */
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
3+INTERP_EXTEND*2, 4, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
filter_block2d_second_pass(FData + 4*(INTERP_EXTEND-1), output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
}
@ -234,17 +352,19 @@ void vp8_sixtap_predict8x8_c
{
const short *HFilter;
const short *VFilter;
int FData[13*16]; /* Temp data buffer used in filtering */
// int FData[(7+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */
int FData[(7+INTERP_EXTEND*2)*8]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
7+INTERP_EXTEND*2, 8, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
filter_block2d_second_pass(FData + 8*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
}
@ -261,16 +381,18 @@ void vp8_sixtap_predict_avg8x8_c
{
const short *HFilter;
const short *VFilter;
int FData[13*16]; /* Temp data buffer used in filtering */
// int FData[(7+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */
int FData[(7+INTERP_EXTEND*2)*8]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
7+INTERP_EXTEND*2, 8, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass_avg(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
filter_block2d_second_pass_avg(FData + 8*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
}
#endif /* CONFIG_DUALPRED */
@ -286,17 +408,19 @@ void vp8_sixtap_predict8x4_c
{
const short *HFilter;
const short *VFilter;
int FData[13*16]; /* Temp data buffer used in filtering */
// int FData[(7+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */
int FData[(3+INTERP_EXTEND*2)*8]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
3+INTERP_EXTEND*2, 8, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
filter_block2d_second_pass(FData + 8*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
}
@ -312,17 +436,19 @@ void vp8_sixtap_predict16x16_c
{
const short *HFilter;
const short *VFilter;
int FData[21*24]; /* Temp data buffer used in filtering */
// int FData[(15+INTERP_EXTEND*2)*24]; /* Temp data buffer used in filtering */
int FData[(15+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
15+INTERP_EXTEND*2, 16, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
filter_block2d_second_pass(FData + 16*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
}
@ -339,17 +465,18 @@ void vp8_sixtap_predict_avg16x16_c
{
const short *HFilter;
const short *VFilter;
int FData[21*24]; /* Temp data buffer used in filtering */
// int FData[(15+INTERP_EXTEND*2)*24]; /* Temp data buffer used in filtering */
int FData[(15+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData,
src_pixels_per_line, 1, 21, 16, HFilter);
filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData,
src_pixels_per_line, 1, 15+INTERP_EXTEND*2, 16, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass_avg(FData + 32, dst_ptr, dst_pitch,
filter_block2d_second_pass_avg(FData + 16*(INTERP_EXTEND-1), dst_ptr, dst_pitch,
16, 16, 16, 16, VFilter);
}
#endif /* CONFIG_DUALPRED */

Просмотреть файл

@ -8,15 +8,17 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef FILTER_H
#define FILTER_H
#include "vpx_config.h"
#include "vpx_scale/yv12config.h"
#define BLOCK_HEIGHT_WIDTH 4
#define VP8_FILTER_WEIGHT 128
#define VP8_FILTER_SHIFT 7
extern const short vp8_bilinear_filters[8][2];
extern const short vp8_sub_pel_filters[8][6];
extern const short vp8_sub_pel_filters[8][INTERP_EXTEND*2];
#endif //FILTER_H

Просмотреть файл

@ -335,8 +335,8 @@ static void update_mbgraph_frame_stats
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
arf_top_mv.as_int = 0;
gld_top_mv.as_int = 0;
x->mv_row_min = -(VP8BORDERINPIXELS - 19);
x->mv_row_max = (cm->mb_rows - 1) * 16 + VP8BORDERINPIXELS - 19;
x->mv_row_min = -(VP8BORDERINPIXELS - 16 - INTERP_EXTEND);
x->mv_row_max = (cm->mb_rows - 1) * 16 + VP8BORDERINPIXELS - 16 - INTERP_EXTEND;
xd->up_available = 0;
xd->dst.y_stride = buf->y_stride;
xd->pre.y_stride = buf->y_stride;
@ -353,8 +353,8 @@ static void update_mbgraph_frame_stats
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
arf_left_mv.as_int = arf_top_mv.as_int;
gld_left_mv.as_int = gld_top_mv.as_int;
x->mv_col_min = -(VP8BORDERINPIXELS - 19);
x->mv_col_max = (cm->mb_cols - 1) * 16 + VP8BORDERINPIXELS - 19;
x->mv_col_min = -(VP8BORDERINPIXELS - 16 - INTERP_EXTEND);
x->mv_col_max = (cm->mb_cols - 1) * 16 + VP8BORDERINPIXELS - 16 - INTERP_EXTEND;
xd->left_available = 0;
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)

Просмотреть файл

@ -268,6 +268,7 @@ void vp8_initialize()
{
static int init_done = 0;
if (!init_done)
{
vp8_scale_machine_specific_config();

Просмотреть файл

@ -17,7 +17,14 @@ extern "C"
#endif
#define VP7BORDERINPIXELS 48
#if CONFIG_ENHANCED_INTERP
#define VP8BORDERINPIXELS 64
#define INTERP_EXTEND 4
#else
#define VP8BORDERINPIXELS 32
#define INTERP_EXTEND 3
#endif
/*************************************
For INT_YUV: