diff --git a/vp8/common/arm/armv6/bilinearfilter_v6.asm b/vp8/common/arm/armv6/bilinearfilter_v6.asm index 09d7338d9..a86ed5d0a 100644 --- a/vp8/common/arm/armv6/bilinearfilter_v6.asm +++ b/vp8/common/arm/armv6/bilinearfilter_v6.asm @@ -15,19 +15,19 @@ AREA |.text|, CODE, READONLY ; name this block of code ;------------------------------------- -; r0 unsigned char *src_ptr, -; r1 unsigned short *output_ptr, -; r2 unsigned int src_pixels_per_line, -; r3 unsigned int output_height, -; stack unsigned int output_width, -; stack const short *vp8_filter +; r0 unsigned char *src_ptr, +; r1 unsigned short *dst_ptr, +; r2 unsigned int src_pitch, +; r3 unsigned int height, +; stack unsigned int width, +; stack const short *vp8_filter ;------------------------------------- ; The output is transposed stroed in output array to make it easy for second pass filtering. |vp8_filter_block2d_bil_first_pass_armv6| PROC stmdb sp!, {r4 - r11, lr} ldr r11, [sp, #40] ; vp8_filter address - ldr r4, [sp, #36] ; output width + ldr r4, [sp, #36] ; width mov r12, r3 ; outer-loop counter sub r2, r2, r4 ; src increment for height loop @@ -38,10 +38,10 @@ ldr r5, [r11] ; load up filter coefficients - mov r3, r3, lsl #1 ; output_height*2 + mov r3, r3, lsl #1 ; height*2 add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1) - mov r11, r1 ; save output_ptr for each row + mov r11, r1 ; save dst_ptr for each row cmp r5, #128 ; if filter coef = 128, then skip the filter beq bil_null_1st_filter @@ -140,17 +140,17 @@ ;--------------------------------- ; r0 unsigned short *src_ptr, -; r1 unsigned char *output_ptr, -; r2 int output_pitch, -; r3 unsigned int output_height, -; stack unsigned int output_width, -; stack const short *vp8_filter +; r1 unsigned char *dst_ptr, +; r2 int dst_pitch, +; r3 unsigned int height, +; stack unsigned int width, +; stack const short *vp8_filter ;--------------------------------- |vp8_filter_block2d_bil_second_pass_armv6| PROC stmdb sp!, {r4 - r11, lr} ldr r11, [sp, #40] ; vp8_filter address - ldr r4, [sp, #36] ; output width + ldr r4, [sp, #36] ; width ldr r5, [r11] ; load up filter coefficients mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix diff --git a/vp8/common/arm/bilinearfilter_arm.c b/vp8/common/arm/bilinearfilter_arm.c index 65afb41a1..961d142c9 100644 --- a/vp8/common/arm/bilinearfilter_arm.c +++ b/vp8/common/arm/bilinearfilter_arm.c @@ -10,128 +10,48 @@ #include +#include "filter.h" #include "subpixel.h" -#define BLOCK_HEIGHT_WIDTH 4 -#define VP8_FILTER_WEIGHT 128 -#define VP8_FILTER_SHIFT 7 - -static const short bilinear_filters[8][2] = -{ - { 128, 0 }, - { 112, 16 }, - { 96, 32 }, - { 80, 48 }, - { 64, 64 }, - { 48, 80 }, - { 32, 96 }, - { 16, 112 } -}; - - extern void vp8_filter_block2d_bil_first_pass_armv6 ( - unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter + unsigned char *src_ptr, + unsigned short *dst_ptr, + unsigned int src_pitch, + unsigned int height, + unsigned int width, + const short *vp8_filter ); extern void vp8_filter_block2d_bil_second_pass_armv6 ( unsigned short *src_ptr, - unsigned char *output_ptr, - int output_pitch, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter + unsigned char *dst_ptr, + int dst_pitch, + unsigned int height, + unsigned int width, + const short *vp8_filter ); -#if 0 -void vp8_filter_block2d_bil_first_pass_6 -( - unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -) -{ - unsigned int i, j; - - for ( i=0; i> VP8_FILTER_SHIFT; - src_ptr++; - } - - /* Next row... */ - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -} - -void vp8_filter_block2d_bil_second_pass_6 -( - unsigned short *src_ptr, - unsigned char *output_ptr, - int output_pitch, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -) -{ - unsigned int i,j; - int Temp; - - for ( i=0; i> VP8_FILTER_SHIFT); - src_ptr++; - } - - /* Next row... */ - /*src_ptr += src_pixels_per_line - output_width;*/ - output_ptr += output_pitch; - } -} -#endif - void vp8_filter_block2d_bil_armv6 ( unsigned char *src_ptr, - unsigned char *output_ptr, - unsigned int src_pixels_per_line, + unsigned char *dst_ptr, + unsigned int src_pitch, unsigned int dst_pitch, - const short *HFilter, - const short *VFilter, + const short *HFilter, + const short *VFilter, int Width, int Height ) { - - unsigned short FData[36*16]; /* Temp data bufffer used in filtering */ + unsigned short FData[36*16]; /* Temp data buffer used in filtering */ /* First filter 1-D horizontally... */ - /* pixel_step = 1; */ - vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pixels_per_line, Height + 1, Width, HFilter); + vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); /* then 1-D vertically... */ - vp8_filter_block2d_bil_second_pass_armv6(FData, output_ptr, dst_pitch, Height, Width, VFilter); + vp8_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height, Width, VFilter); } @@ -148,8 +68,8 @@ void vp8_bilinear_predict4x4_armv6 const short *HFilter; const short *VFilter; - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); } @@ -167,8 +87,8 @@ void vp8_bilinear_predict8x8_armv6 const short *HFilter; const short *VFilter; - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8); } @@ -186,8 +106,8 @@ void vp8_bilinear_predict8x4_armv6 const short *HFilter; const short *VFilter; - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4); } @@ -205,8 +125,8 @@ void vp8_bilinear_predict16x16_armv6 const short *HFilter; const short *VFilter; - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); } diff --git a/vp8/common/arm/filter_arm.c b/vp8/common/arm/filter_arm.c index b4f2fe6ca..2612fc18a 100644 --- a/vp8/common/arm/filter_arm.c +++ b/vp8/common/arm/filter_arm.c @@ -11,26 +11,10 @@ #include "vpx_ports/config.h" #include +#include "filter.h" #include "subpixel.h" #include "vpx_ports/mem.h" -#define BLOCK_HEIGHT_WIDTH 4 -#define VP8_FILTER_WEIGHT 128 -#define VP8_FILTER_SHIFT 7 - -DECLARE_ALIGNED(16, static const short, sub_pel_filters[8][6]) = -{ - { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ - { 0, -6, 123, 12, -1, 0 }, - { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */ - { 0, -9, 93, 50, -6, 0 }, - { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */ - { 0, -6, 50, 93, -9, 0 }, - { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ - { 0, -1, 12, 123, -6, 0 }, -}; - - extern void vp8_filter_block2d_first_pass_armv6 ( unsigned char *src_ptr, @@ -93,11 +77,11 @@ void vp8_sixtap_predict_armv6 { const short *HFilter; const short *VFilter; - DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data bufffer used in filtering */ + DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data buffer used in filtering */ - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* Vfilter is null. First pass only */ if (xoffset && !yoffset) @@ -129,47 +113,6 @@ void vp8_sixtap_predict_armv6 } } -#if 0 -void vp8_sixtap_predict8x4_armv6 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */ - - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ - - - /*if (xoffset && !yoffset) - { - vp8_filter_block2d_first_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter ); - }*/ - /* Hfilter is null. Second pass only */ - /*else if (!xoffset && yoffset) - { - vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter ); - } - else - { - if (yoffset & 0x1) - vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter ); - else*/ - - vp8_filter_block2d_first_pass_armv6 ( src_ptr-(2*src_pixels_per_line), FData, src_pixels_per_line, 8, 9, HFilter ); - - vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, 8, VFilter ); - /*}*/ -} -#endif - void vp8_sixtap_predict8x8_armv6 ( unsigned char *src_ptr, @@ -182,10 +125,10 @@ void vp8_sixtap_predict8x8_armv6 { const short *HFilter; const short *VFilter; - DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */ + DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data buffer used in filtering */ - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ if (xoffset && !yoffset) { @@ -224,10 +167,10 @@ void vp8_sixtap_predict16x16_armv6 { const short *HFilter; const short *VFilter; - DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data bufffer used in filtering */ + DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data buffer used in filtering */ - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ if (xoffset && !yoffset) { diff --git a/vp8/common/filter_c.c b/vp8/common/filter.c similarity index 66% rename from vp8/common/filter_c.c rename to vp8/common/filter.c index 399a847d5..6e364a94d 100644 --- a/vp8/common/filter_c.c +++ b/vp8/common/filter.c @@ -10,13 +10,10 @@ #include +#include "filter.h" +#include "vpx_ports/mem.h" -#define BLOCK_HEIGHT_WIDTH 4 -#define VP8_FILTER_WEIGHT 128 -#define VP8_FILTER_SHIFT 7 - - -static const int bilinear_filters[8][2] = +DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = { { 128, 0 }, { 112, 16 }, @@ -28,8 +25,7 @@ static const int bilinear_filters[8][2] = { 16, 112 } }; - -static const short sub_pel_filters[8][6] = +DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = { { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ @@ -40,9 +36,6 @@ static const short sub_pel_filters[8][6] = { 0, -6, 50, 93, -9, 0 }, { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ { 0, -1, 12, 123, -6, 0 }, - - - }; void vp8_filter_block2d_first_pass @@ -146,7 +139,7 @@ void vp8_filter_block2d const short *VFilter ) { - int FData[9*4]; /* Temp data bufffer used in filtering */ + int FData[9*4]; /* Temp data buffer used in filtering */ /* First filter 1-D horizontally... */ vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter); @@ -195,8 +188,8 @@ void vp8_sixtap_predict_c const short *HFilter; const short *VFilter; - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter); } @@ -212,10 +205,10 @@ void vp8_sixtap_predict8x8_c { const short *HFilter; const short *VFilter; - int FData[13*16]; /* Temp data bufffer used in filtering */ + int FData[13*16]; /* Temp data buffer used in filtering */ - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); @@ -238,10 +231,10 @@ void vp8_sixtap_predict8x4_c { const short *HFilter; const short *VFilter; - int FData[13*16]; /* Temp data bufffer used in filtering */ + int FData[13*16]; /* Temp data buffer used in filtering */ - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter); @@ -264,11 +257,11 @@ void vp8_sixtap_predict16x16_c { const short *HFilter; const short *VFilter; - int FData[21*24]; /* Temp data bufffer used in filtering */ + int FData[21*24]; /* Temp data buffer used in filtering */ - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); @@ -283,57 +276,50 @@ void vp8_sixtap_predict16x16_c * * ROUTINE : filter_block2d_bil_first_pass * - * INPUTS : UINT8 *src_ptr : Pointer to source block. - * UINT32 src_pixels_per_line : Stride of input block. - * UINT32 pixel_step : Offset between filter input samples (see notes). - * UINT32 output_height : Input block height. - * UINT32 output_width : Input block width. - * INT32 *vp8_filter : Array of 2 bi-linear filter taps. + * INPUTS : UINT8 *src_ptr : Pointer to source block. + * UINT32 src_stride : Stride of source block. + * UINT32 height : Block height. + * UINT32 width : Block width. + * INT32 *vp8_filter : Array of 2 bi-linear filter taps. * - * OUTPUTS : INT32 *output_ptr : Pointer to filtered block. + * OUTPUTS : INT32 *dst_ptr : Pointer to filtered block. * * RETURNS : void * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in - * either horizontal or vertical direction to produce the - * filtered output block. Used to implement first-pass - * of 2-D separable filter. + * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block + * in the horizontal direction to produce the filtered output + * block. Used to implement first-pass of 2-D separable filter. * * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. * Two filter taps should sum to VP8_FILTER_WEIGHT. - * pixel_step defines whether the filter is applied - * horizontally (pixel_step=1) or vertically (pixel_step=stride). - * It defines the offset required to move from one input - * to the next. * ****************************************************************************/ void vp8_filter_block2d_bil_first_pass ( - unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - int pixel_step, - unsigned int output_height, - unsigned int output_width, - const int *vp8_filter + unsigned char *src_ptr, + unsigned short *dst_ptr, + unsigned int src_stride, + unsigned int height, + unsigned int width, + const short *vp8_filter ) { unsigned int i, j; - for (i = 0; i < output_height; i++) + for (i = 0; i < height; i++) { - for (j = 0; j < output_width; j++) + for (j = 0; j < width; j++) { /* Apply bilinear filter */ - output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + - ((int)src_ptr[pixel_step] * vp8_filter[1]) + - (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; + dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + + ((int)src_ptr[1] * vp8_filter[1]) + + (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; src_ptr++; } /* Next row... */ - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; + src_ptr += src_stride - width; + dst_ptr += width; } } @@ -341,60 +327,51 @@ void vp8_filter_block2d_bil_first_pass * * ROUTINE : filter_block2d_bil_second_pass * - * INPUTS : INT32 *src_ptr : Pointer to source block. - * UINT32 src_pixels_per_line : Stride of input block. - * UINT32 pixel_step : Offset between filter input samples (see notes). - * UINT32 output_height : Input block height. - * UINT32 output_width : Input block width. - * INT32 *vp8_filter : Array of 2 bi-linear filter taps. + * INPUTS : INT32 *src_ptr : Pointer to source block. + * UINT32 dst_pitch : Destination block pitch. + * UINT32 height : Block height. + * UINT32 width : Block width. + * INT32 *vp8_filter : Array of 2 bi-linear filter taps. * - * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block. + * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. * * RETURNS : void * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in - * either horizontal or vertical direction to produce the - * filtered output block. Used to implement second-pass - * of 2-D separable filter. + * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block + * in the vertical direction to produce the filtered output + * block. Used to implement second-pass of 2-D separable filter. * * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. * Two filter taps should sum to VP8_FILTER_WEIGHT. - * pixel_step defines whether the filter is applied - * horizontally (pixel_step=1) or vertically (pixel_step=stride). - * It defines the offset required to move from one input - * to the next. * ****************************************************************************/ void vp8_filter_block2d_bil_second_pass ( unsigned short *src_ptr, - unsigned char *output_ptr, - int output_pitch, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const int *vp8_filter + unsigned char *dst_ptr, + int dst_pitch, + unsigned int height, + unsigned int width, + const short *vp8_filter ) { unsigned int i, j; int Temp; - for (i = 0; i < output_height; i++) + for (i = 0; i < height; i++) { - for (j = 0; j < output_width; j++) + for (j = 0; j < width; j++) { /* Apply filter */ - Temp = ((int)src_ptr[0] * vp8_filter[0]) + - ((int)src_ptr[pixel_step] * vp8_filter[1]) + + Temp = ((int)src_ptr[0] * vp8_filter[0]) + + ((int)src_ptr[width] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2); - output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); + dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); src_ptr++; } /* Next row... */ - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_pitch; + dst_ptr += dst_pitch; } } @@ -404,11 +381,14 @@ void vp8_filter_block2d_bil_second_pass * ROUTINE : filter_block2d_bil * * INPUTS : UINT8 *src_ptr : Pointer to source block. - * UINT32 src_pixels_per_line : Stride of input block. - * INT32 *HFilter : Array of 2 horizontal filter taps. - * INT32 *VFilter : Array of 2 vertical filter taps. + * UINT32 src_pitch : Stride of source block. + * UINT32 dst_pitch : Stride of destination block. + * INT32 *HFilter : Array of 2 horizontal filter taps. + * INT32 *VFilter : Array of 2 vertical filter taps. + * INT32 Width : Block width + * INT32 Height : Block height * - * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block. + * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. * * RETURNS : void * @@ -422,23 +402,23 @@ void vp8_filter_block2d_bil_second_pass void vp8_filter_block2d_bil ( unsigned char *src_ptr, - unsigned char *output_ptr, - unsigned int src_pixels_per_line, + unsigned char *dst_ptr, + unsigned int src_pitch, unsigned int dst_pitch, - const int *HFilter, - const int *VFilter, + const short *HFilter, + const short *VFilter, int Width, int Height ) { - unsigned short FData[17*16]; /* Temp data bufffer used in filtering */ + unsigned short FData[17*16]; /* Temp data buffer used in filtering */ /* First filter 1-D horizontally... */ - vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, Height + 1, Width, HFilter); + vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); /* then 1-D vertically... */ - vp8_filter_block2d_bil_second_pass(FData, output_ptr, dst_pitch, Width, Width, Height, Width, VFilter); + vp8_filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter); } @@ -452,11 +432,11 @@ void vp8_bilinear_predict4x4_c int dst_pitch ) { - const int *HFilter; - const int *VFilter; + const short *HFilter; + const short *VFilter; - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; #if 0 { int i; @@ -490,11 +470,11 @@ void vp8_bilinear_predict8x8_c int dst_pitch ) { - const int *HFilter; - const int *VFilter; + const short *HFilter; + const short *VFilter; - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8); @@ -510,11 +490,11 @@ void vp8_bilinear_predict8x4_c int dst_pitch ) { - const int *HFilter; - const int *VFilter; + const short *HFilter; + const short *VFilter; - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4); @@ -530,11 +510,11 @@ void vp8_bilinear_predict16x16_c int dst_pitch ) { - const int *HFilter; - const int *VFilter; + const short *HFilter; + const short *VFilter; - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); } diff --git a/vp8/common/filter.h b/vp8/common/filter.h new file mode 100644 index 000000000..0f225c25a --- /dev/null +++ b/vp8/common/filter.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef FILTER_H +#define FILTER_H + +#define BLOCK_HEIGHT_WIDTH 4 +#define VP8_FILTER_WEIGHT 128 +#define VP8_FILTER_SHIFT 7 + +extern const short vp8_bilinear_filters[8][2]; +extern const short vp8_sub_pel_filters[8][6]; + +#endif //FILTER_H diff --git a/vp8/common/threading.h b/vp8/common/threading.h index 0dd7bbc4c..bfd4916c6 100644 --- a/vp8/common/threading.h +++ b/vp8/common/threading.h @@ -14,7 +14,7 @@ #define VPXINFINITE 10000 /* 10second. */ -#if CONFIG_OS_SUPPORT +#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD /* Thread management macros */ #ifdef _WIN32 @@ -90,8 +90,6 @@ #define x86_pause_hint() #endif -#else /* CONFIG_OS_SUPPORT = 0 */ -#define THREAD_FUNCTION void * -#endif /* CONFIG_OS_SUPPORT */ +#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */ #endif diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 9bd6bc514..c97fa7f5f 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -484,9 +484,11 @@ static void setup_token_decoder(VP8D_COMP *pbi, bool_decoder++; } +#if CONFIG_MULTITHREAD /* Clamp number of decoder threads */ if (pbi->decoding_thread_count > num_part - 1) pbi->decoding_thread_count = num_part - 1; +#endif } @@ -849,7 +851,9 @@ int vp8_decode_frame(VP8D_COMP *pbi) #endif /* set up frame new frame for intra coded blocks */ +#if CONFIG_MULTITHREAD if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level)) +#endif vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]); vp8_setup_block_dptrs(xd); @@ -869,6 +873,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) vpx_memcpy(&xd->block[0].bmi, &xd->mode_info_context->bmi[0], sizeof(B_MODE_INFO)); +#if CONFIG_MULTITHREAD if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION) { vp8mt_decode_mb_rows(pbi, xd); @@ -883,6 +888,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) vp8_yv12_extend_frame_borders_ptr(&pc->yv12_fb[pc->new_fb_idx]); /*cm->frame_to_show);*/ } else +#endif { int ibc = 0; int num_part = 1 << pc->multi_token_partition; diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c index ad0bf376e..f0ee8e157 100644 --- a/vp8/decoder/onyxd_if.c +++ b/vp8/decoder/onyxd_if.c @@ -113,8 +113,10 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf) pbi->ready_for_new_data = 1; pbi->CPUFreq = 0; /*vp8_get_processor_freq();*/ +#if CONFIG_MULTITHREAD pbi->max_threads = oxcf->max_threads; vp8_decoder_create_threads(pbi); +#endif /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid * unnecessary calling of vp8cx_init_de_quantizer() for every frame. @@ -152,8 +154,8 @@ void vp8dx_remove_decompressor(VP8D_PTR ptr) #if CONFIG_MULTITHREAD if (pbi->b_multithreaded_rd) vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows); -#endif vp8_decoder_remove_threads(pbi); +#endif vp8_remove_common(&pbi->common); vpx_free(pbi); } @@ -410,6 +412,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign return retcode; } +#if CONFIG_MULTITHREAD if (pbi->b_multithreaded_rd && cm->multi_token_partition != ONE_PARTITION) { if (swap_frame_buffers (cm)) @@ -427,6 +430,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign return -1; } } else +#endif { if (swap_frame_buffers (cm)) { diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h index f83c528c4..47b4fe0a5 100644 --- a/vp8/decoder/onyxd_int.h +++ b/vp8/decoder/onyxd_int.h @@ -87,13 +87,14 @@ typedef struct VP8Decompressor unsigned int time_decoding; unsigned int time_loop_filtering; +#if CONFIG_MULTITHREAD + /* variable for threading */ + volatile int b_multithreaded_rd; int max_threads; int current_mb_col_main; int decoding_thread_count; int allocated_decoding_thread_count; - /* variable for threading */ -#if CONFIG_MULTITHREAD int mt_baseline_filter_level[MAX_MB_SEGMENTS]; int sync_range; int *mt_current_mb_col; /* Each row remembers its already decoded column. */ diff --git a/vp8/decoder/reconintra_mt.c b/vp8/decoder/reconintra_mt.c index ad4324b27..854aba35a 100644 --- a/vp8/decoder/reconintra_mt.c +++ b/vp8/decoder/reconintra_mt.c @@ -21,7 +21,6 @@ void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col) { -#if CONFIG_MULTITHREAD unsigned char *yabove_row; /* = x->dst.y_buffer - x->dst.y_stride; */ unsigned char *yleft_col; unsigned char yleft_buf[16]; @@ -146,17 +145,10 @@ void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row case MB_MODE_COUNT: break; } -#else - (void) pbi; - (void) x; - (void) mb_row; - (void) mb_col; -#endif } void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col) { -#if CONFIG_MULTITHREAD unsigned char *yabove_row; /* = x->dst.y_buffer - x->dst.y_stride; */ unsigned char *yleft_col; unsigned char yleft_buf[16]; @@ -289,17 +281,10 @@ void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_r case MB_MODE_COUNT: break; } -#else - (void) pbi; - (void) x; - (void) mb_row; - (void) mb_col; -#endif } void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col) { -#if CONFIG_MULTITHREAD unsigned char *uabove_row; /* = x->dst.u_buffer - x->dst.uv_stride; */ unsigned char *uleft_col; /*[16];*/ unsigned char uleft_buf[8]; @@ -452,17 +437,10 @@ void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_ro case MB_MODE_COUNT: break; } -#else - (void) pbi; - (void) x; - (void) mb_row; - (void) mb_col; -#endif } void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col) { -#if CONFIG_MULTITHREAD unsigned char *uabove_row; /* = x->dst.u_buffer - x->dst.uv_stride; */ unsigned char *uleft_col; /*[16];*/ unsigned char uleft_buf[8]; @@ -621,12 +599,6 @@ void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_ case MB_MODE_COUNT: break; } -#else - (void) pbi; - (void) x; - (void) mb_row; - (void) mb_col; -#endif } @@ -638,7 +610,6 @@ void vp8mt_predict_intra4x4(VP8D_COMP *pbi, int mb_col, int num) { -#if CONFIG_MULTITHREAD int i, r, c; unsigned char *Above; /* = *(x->base_dst) + x->dst - x->dst_stride; */ @@ -935,15 +906,6 @@ void vp8mt_predict_intra4x4(VP8D_COMP *pbi, } -#else - (void) pbi; - (void) xd; - (void) b_mode; - (void) predictor; - (void) mb_row; - (void) mb_col; - (void) num; -#endif } /* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and @@ -951,7 +913,6 @@ void vp8mt_predict_intra4x4(VP8D_COMP *pbi, */ void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col) { -#if CONFIG_MULTITHREAD unsigned char *above_right; /* = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16; */ unsigned int *src_ptr; unsigned int *dst_ptr0; @@ -973,10 +934,4 @@ void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row *dst_ptr0 = *src_ptr; *dst_ptr1 = *src_ptr; *dst_ptr2 = *src_ptr; -#else - (void) pbi; - (void) x; - (void) mb_row; - (void) mb_col; -#endif } diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index ec2cb2b07..271249a8d 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -38,7 +38,6 @@ extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel); void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count) { -#if CONFIG_MULTITHREAD VP8_COMMON *const pc = & pbi->common; int i, j; @@ -88,18 +87,11 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC for (i=0; i< pc->mb_rows; i++) pbi->mt_current_mb_col[i]=-1; -#else - (void) pbi; - (void) xd; - (void) mbrd; - (void) count; -#endif } void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col) { -#if CONFIG_MULTITHREAD int eobtotal = 0; int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs; VP8_COMMON *pc = &pbi->common; @@ -222,18 +214,11 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb (xd->qcoeff+16*16, xd->block[16].dequant, xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs+16); -#else - (void) pbi; - (void) xd; - (void) mb_row; - (void) mb_col; -#endif } THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data) { -#if CONFIG_MULTITHREAD int ithread = ((DECODETHREAD_DATA *)p_data)->ithread; VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1); MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2); @@ -438,9 +423,6 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data) sem_post(&pbi->h_event_end_decoding); } } -#else - (void) p_data; -#endif return 0 ; } @@ -448,7 +430,6 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data) void vp8_decoder_create_threads(VP8D_COMP *pbi) { -#if CONFIG_MULTITHREAD int core_count = 0; int ithread; @@ -482,16 +463,11 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) pbi->allocated_decoding_thread_count = pbi->decoding_thread_count; } - -#else - (void) pbi; -#endif } void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) { -#if CONFIG_MULTITHREAD VP8_COMMON *const pc = & pbi->common; int i; @@ -589,15 +565,11 @@ void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) pbi->mt_vleft_col = NULL ; } } -#else - (void) pbi; -#endif } void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) { -#if CONFIG_MULTITHREAD VP8_COMMON *const pc = & pbi->common; int i; int uv_width; @@ -646,17 +618,11 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) for (i=0; i< pc->mb_rows; i++) CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); } -#else - (void) pbi; - (void) width; -#endif } void vp8_decoder_remove_threads(VP8D_COMP *pbi) { -#if CONFIG_MULTITHREAD - /* shutdown MB Decoding thread; */ if (pbi->b_multithreaded_rd) { @@ -702,15 +668,11 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi) pbi->de_thread_data = NULL; } } -#else - (void) pbi; -#endif } void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl) { -#if CONFIG_MULTITHREAD VP8_COMMON *cm = &pbi->common; MACROBLOCKD *mbd = &pbi->mb; /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/ /*frame_to_show;*/ @@ -752,16 +714,11 @@ void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl) vp8_init_loop_filter(cm); else if (frame_type != cm->last_frame_type) vp8_frame_init_loop_filter(lfi, frame_type); -#else - (void) pbi; - (void) default_filt_lvl; -#endif } void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) { -#if CONFIG_MULTITHREAD int mb_row; VP8_COMMON *pc = &pbi->common; @@ -981,8 +938,4 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) } sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ -#else - (void) pbi; - (void) xd; -#endif } diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index c129ee01b..419646c30 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -1736,10 +1736,12 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) { vp8_start_encode(&cpi->bc2, cx_data + bc->pos); - if (!cpi->b_multi_threaded) - pack_tokens(&cpi->bc2, cpi->tok, cpi->tok_count); - else +#if CONFIG_MULTITHREAD + if (cpi->b_multi_threaded) pack_mb_row_tokens(cpi, &cpi->bc2); + else +#endif + pack_tokens(&cpi->bc2, cpi->tok, cpi->tok_count); vp8_stop_encode(&cpi->bc2); oh.first_partition_length_in_bytes = cpi->bc.pos ; diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 88e8d02b8..2950bb45f 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -853,28 +853,9 @@ void vp8_encode_frame(VP8_COMP *cpi) struct vpx_usec_timer emr_timer; vpx_usec_timer_start(&emr_timer); - if (!cpi->b_multi_threaded) - { - // for each macroblock row in image - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) - { - - vp8_zero(cm->left_context) - - encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); - - // adjust to the next row of mbs - x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; - x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; - x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; - } - - cpi->tok_count = tp - cpi->tok; - - } - else - { #if CONFIG_MULTITHREAD + if (cpi->b_multi_threaded) + { int i; vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count); @@ -939,7 +920,25 @@ void vp8_encode_frame(VP8_COMP *cpi) x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum; } + } + else #endif + { + // for each macroblock row in image + for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) + { + + vp8_zero(cm->left_context) + + encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); + + // adjust to the next row of mbs + x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; + x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; + x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; + } + + cpi->tok_count = tp - cpi->tok; } @@ -1304,7 +1303,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16); - Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4); + Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4, Error16x16); rate += (Error4x4 < Error16x16) ? rate4x4 : rate16x16; } diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c index cce753013..4cb4c6e55 100644 --- a/vp8/encoder/encodemv.c +++ b/vp8/encoder/encodemv.c @@ -128,7 +128,7 @@ static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc) while (--i > 3); - if (x & 240) + if (x & 0xFFF0) cost += vp8_cost_bit(p [MVPbits + 3], (x >> 3) & 1); } diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index d3f3bc4ca..967d2a6a9 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -13,6 +13,8 @@ #include "common.h" #include "extend.h" +#if CONFIG_MULTITHREAD + extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset); @@ -25,7 +27,6 @@ extern void vp8_setup_block_ptrs(MACROBLOCK *x); static THREAD_FUNCTION thread_encoding_proc(void *p_data) { -#if CONFIG_MULTITHREAD int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread; VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1); MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2); @@ -247,10 +248,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) } } -#else - (void) p_data; -#endif - //printf("exit thread %d\n", ithread); return 0; } @@ -436,10 +433,6 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) cpi->processor_core_count = 32; //vp8_get_proc_core_count(); - CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows)); - -#if CONFIG_MULTITHREAD - if (cpi->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) { int ithread; @@ -488,13 +481,10 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) } -#endif } void vp8cx_remove_encoder_threads(VP8_COMP *cpi) { -#if CONFIG_MULTITHREAD - if (cpi->b_multi_threaded) { //shutdown other threads @@ -521,7 +511,5 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi) vpx_free(cpi->en_thread_data); vpx_free(cpi->mt_current_mb_col); } - -#endif - vpx_free(cpi->tplist); } +#endif diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 1821f6c53..60ccc4e6e 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -331,6 +331,9 @@ static void setup_features(VP8_COMP *cpi) void vp8_dealloc_compressor_data(VP8_COMP *cpi) { + vpx_free(cpi->tplist); + cpi->tplist = NULL; + // Delete last frame MV storage buffers if (cpi->lfmv != 0) vpx_free(cpi->lfmv); @@ -1542,6 +1545,8 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) else cpi->mt_sync_range = 16; #endif + + CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows)); } @@ -2492,7 +2497,9 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) init_mv_ref_counts(); #endif +#if CONFIG_MULTITHREAD vp8cx_create_encoder_threads(cpi); +#endif cpi->fn_ptr[BLOCK_16X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16); cpi->fn_ptr[BLOCK_16X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16); @@ -2767,7 +2774,9 @@ void vp8_remove_compressor(VP8_PTR *ptr) } +#if CONFIG_MULTITHREAD vp8cx_remove_encoder_threads(cpi); +#endif vp8_dealloc_compressor_data(cpi); vpx_free(cpi->mb.ss); diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index d0e48eee2..39d7e95c5 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -593,6 +593,7 @@ typedef struct int cyclic_refresh_q; signed char *cyclic_refresh_map; +#if CONFIG_MULTITHREAD // multithread data int * mt_current_mb_col; int mt_sync_range; @@ -600,13 +601,10 @@ typedef struct int b_multi_threaded; int encoding_thread_count; -#if CONFIG_MULTITHREAD pthread_t *h_encoding_thread; -#endif MB_ROW_COMP *mb_row_ei; ENCODETHREAD_DATA *en_thread_data; -#if CONFIG_MULTITHREAD //events sem_t *h_event_start_encoding; sem_t h_event_end_encoding; diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 39d694dff..80af8fa74 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -645,7 +645,7 @@ static void macro_block_yrd( MACROBLOCK *mb, *Rate = vp8_rdcost_mby(mb); } -static void rd_pick_intra4x4block( +static int rd_pick_intra4x4block( VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be, @@ -711,16 +711,20 @@ static void rd_pick_intra4x4block( b->bmi.mode = (B_PREDICTION_MODE)(*best_mode); vp8_encode_intra4x4block_rd(IF_RTCD(&cpi->rtcd), x, be, b, b->bmi.mode); + return best_rd; + } -int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int *rate_y, int *Distortion) +int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, + int *rate_y, int *Distortion, int best_rd) { MACROBLOCKD *const xd = &mb->e_mbd; int i; int cost = mb->mbmode_cost [xd->frame_type] [B_PRED]; int distortion = 0; int tot_rate_y = 0; + int total_rd = 0; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; @@ -742,7 +746,7 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d); - rd_pick_intra4x4block( + total_rd += rd_pick_intra4x4block( cpi, mb, mb->block + i, xd->block + i, &best_mode, A, L, ta + vp8_block2above[i], tl + vp8_block2left[i], &r, &ry, &d); @@ -751,8 +755,14 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int distortion += d; tot_rate_y += ry; mic->bmi[i].mode = xd->block[i].bmi.mode = best_mode; + + if(total_rd >= best_rd) + break; } + if(total_rd >= best_rd) + return INT_MAX; + *Rate = cost; *rate_y += tot_rate_y; *Distortion = distortion; @@ -2025,15 +2035,28 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int switch (this_mode) { case B_PRED: + { + int tmp_rd; + // Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED]; - vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion); + tmp_rd = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd); rate2 += rate; distortion2 += distortion; - rate2 += uv_intra_rate; - rate_uv = uv_intra_rate_tokenonly; - distortion2 += uv_intra_distortion; - distortion_uv = uv_intra_distortion; - break; + + if(tmp_rd < best_yrd) + { + rate2 += uv_intra_rate; + rate_uv = uv_intra_rate_tokenonly; + distortion2 += uv_intra_distortion; + distortion_uv = uv_intra_distortion; + } + else + { + this_rd = INT_MAX; + disable_skip = 1; + } + } + break; case SPLITMV: { diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h index d87440998..72ba9a0b5 100644 --- a/vp8/encoder/rdopt.h +++ b/vp8/encoder/rdopt.h @@ -12,7 +12,7 @@ #ifndef __INC_RDOPT_H #define __INC_RDOPT_H void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue); -int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *rate, int *rate_to, int *distortion); +int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *rate, int *rate_to, int *distortion, int best_rd); int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *returnrate, int *rate_to, int *returndistortion); int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_to, int *distortion); extern int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra); diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index f28daaff9..3c6d1a4d9 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -70,7 +70,7 @@ static void vp8_temporal_filter_predictors_mb_c // U & V mv_row >>= 1; mv_col >>= 1; - stride >>= 1; + stride = (stride + 1) >> 1; offset = (mv_row >> 3) * stride + (mv_col >> 3); uptr = u_mb_ptr + offset; vptr = v_mb_ptr + offset; diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index 270006ed8..bf9fb513c 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -35,7 +35,7 @@ VP8_COMMON_SRCS-yes += common/entropy.c VP8_COMMON_SRCS-yes += common/entropymode.c VP8_COMMON_SRCS-yes += common/entropymv.c VP8_COMMON_SRCS-yes += common/extend.c -VP8_COMMON_SRCS-yes += common/filter_c.c +VP8_COMMON_SRCS-yes += common/filter.c VP8_COMMON_SRCS-yes += common/findnearmv.c VP8_COMMON_SRCS-yes += common/generic/systemdependent.c VP8_COMMON_SRCS-yes += common/idctllm.c diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index 5b1fc581e..7e8211ae4 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -934,8 +934,8 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx) ctx->preview_img.x_chroma_shift = 1; ctx->preview_img.y_chroma_shift = 1; - ctx->preview_img.d_w = ctx->cfg.g_w; - ctx->preview_img.d_h = ctx->cfg.g_h; + ctx->preview_img.d_w = sd.y_width; + ctx->preview_img.d_h = sd.y_height; ctx->preview_img.stride[VPX_PLANE_Y] = sd.y_stride; ctx->preview_img.stride[VPX_PLANE_U] = sd.uv_stride; ctx->preview_img.stride[VPX_PLANE_V] = sd.uv_stride; diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index 932f145e6..1ec26e69e 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -42,7 +42,7 @@ VP8_CX_SRCS-yes += encoder/encodeframe.c VP8_CX_SRCS-yes += encoder/encodeintra.c VP8_CX_SRCS-yes += encoder/encodemb.c VP8_CX_SRCS-yes += encoder/encodemv.c -VP8_CX_SRCS-yes += encoder/ethreading.c +VP8_CX_SRCS-$(CONFIG_MULTITHREAD) += encoder/ethreading.c VP8_CX_SRCS-yes += encoder/firstpass.c VP8_CX_SRCS-yes += encoder/generic/csystemdependent.c VP8_CX_SRCS-yes += encoder/block.h diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk index 1acd67453..62f6211f6 100644 --- a/vp8/vp8dx.mk +++ b/vp8/vp8dx.mk @@ -65,7 +65,7 @@ VP8_DX_SRCS-yes += decoder/detokenize.h VP8_DX_SRCS-yes += decoder/onyxd_int.h VP8_DX_SRCS-yes += decoder/treereader.h VP8_DX_SRCS-yes += decoder/onyxd_if.c -VP8_DX_SRCS-yes += decoder/threading.c +VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c VP8_DX_SRCS-yes += decoder/idct_blk.c VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.h VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.c