pjs/media/libvpx/I1bad27ea.patch

625 строки
19 KiB
Diff

# HG changeset patch
# Parent 5a1a0398f8503451582602525c3e7b35def5d0b9
# User Timothy B. Terriberry <tterribe@vt.edu>
Fix variance overflow
Upstream Change-Id: I1bad27ea0720067def6d71a6da5f789508cec265
diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
+++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
@@ -139,16 +139,16 @@ loop
subs r12, r12, #1
bne loop
; return stuff
ldr r6, [sp, #40] ; get address of sse
mul r0, r8, r8 ; sum * sum
str r11, [r6] ; store sse
- sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
+ sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
ldmfd sp!, {r4-r12, pc}
ENDP
END
diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
+++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
@@ -164,17 +164,17 @@ loop
subs r12, r12, #1
bne loop
; return stuff
ldr r6, [sp, #40] ; get address of sse
mul r0, r8, r8 ; sum * sum
str r11, [r6] ; store sse
- sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
+ sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
ldmfd sp!, {r4-r12, pc}
ENDP
c80808080
DCD 0x80808080
diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
+++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
@@ -205,17 +205,17 @@ loop
smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
bne loop
; return stuff
ldr r6, [sp, #40] ; get address of sse
mul r0, r8, r8 ; sum * sum
str r11, [r6] ; store sse
- sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
+ sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
ldmfd sp!, {r4-r12, pc}
ENDP
c80808080
DCD 0x80808080
diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
+++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
@@ -166,17 +166,17 @@ loop
subs r12, r12, #1
bne loop
; return stuff
ldr r6, [sp, #40] ; get address of sse
mul r0, r8, r8 ; sum * sum
str r11, [r6] ; store sse
- sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
+ sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
ldmfd sp!, {r4-r12, pc}
ENDP
c80808080
DCD 0x80808080
diff --git a/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm b/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm
--- a/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm
+++ b/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm
@@ -72,24 +72,24 @@ variance16x16_neon_loop
vpaddl.u32 q1, q10
vadd.s64 d0, d0, d1
vadd.u64 d1, d2, d3
;vmov.32 r0, d0[0] ;this instruction costs a lot
;vmov.32 r1, d1[0]
;mul r0, r0, r0
;str r1, [r12]
- ;sub r0, r1, r0, asr #8
+ ;sub r0, r1, r0, lsr #8
- ;sum is in [-255x256, 255x256]. sumxsum is 32-bit. Shift to right should
- ;have sign-bit exension, which is vshr.s. Have to use s32 to make it right.
+ ; while sum is signed, sum * sum is always positive and must be treated as
+ ; unsigned to avoid propagating the sign bit.
vmull.s32 q5, d0, d0
vst1.32 {d1[0]}, [r12] ;store sse
- vshr.s32 d10, d10, #8
- vsub.s32 d0, d1, d10
+ vshr.u32 d10, d10, #8
+ vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
bx lr
ENDP
;================================
;unsigned int vp8_variance16x8_c(
@@ -140,18 +140,18 @@ variance16x8_neon_loop
ldr r12, [sp] ;load *sse from stack
vpaddl.u32 q1, q10
vadd.s64 d0, d0, d1
vadd.u64 d1, d2, d3
vmull.s32 q5, d0, d0
vst1.32 {d1[0]}, [r12] ;store sse
- vshr.s32 d10, d10, #7
- vsub.s32 d0, d1, d10
+ vshr.u32 d10, d10, #7
+ vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
bx lr
ENDP
;=================================
;unsigned int vp8_variance8x16_c(
@@ -195,18 +195,18 @@ variance8x16_neon_loop
ldr r12, [sp] ;load *sse from stack
vpaddl.u32 q1, q10
vadd.s64 d0, d0, d1
vadd.u64 d1, d2, d3
vmull.s32 q5, d0, d0
vst1.32 {d1[0]}, [r12] ;store sse
- vshr.s32 d10, d10, #7
- vsub.s32 d0, d1, d10
+ vshr.u32 d10, d10, #7
+ vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
bx lr
ENDP
;==================================
; r0 unsigned char *src_ptr
@@ -260,17 +260,17 @@ variance8x8_neon_loop
ldr r12, [sp] ;load *sse from stack
vpaddl.u32 q1, q10
vadd.s64 d0, d0, d1
vadd.u64 d1, d2, d3
vmull.s32 q5, d0, d0
vst1.32 {d1[0]}, [r12] ;store sse
- vshr.s32 d10, d10, #6
- vsub.s32 d0, d1, d10
+ vshr.u32 d10, d10, #6
+ vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
bx lr
ENDP
END
diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
--- a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
+++ b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
@@ -400,18 +400,18 @@ sub_pixel_variance16x16_neon_loop
vpaddl.s32 q0, q8 ;accumulate sum
vpaddl.u32 q1, q10
vadd.s64 d0, d0, d1
vadd.u64 d1, d2, d3
vmull.s32 q5, d0, d0
vst1.32 {d1[0]}, [r6] ;store sse
- vshr.s32 d10, d10, #8
- vsub.s32 d0, d1, d10
+ vshr.u32 d10, d10, #8
+ vsub.u32 d0, d1, d10
add sp, sp, #528
vmov.32 r0, d0[0] ;return
pop {r4-r6,pc}
ENDP
diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
--- a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+++ b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
@@ -107,18 +107,18 @@ vp8_filt_fpo16x16s_4_0_loop_neon
vpaddl.s32 q0, q8 ;accumulate sum
vpaddl.u32 q1, q10
vadd.s64 d0, d0, d1
vadd.u64 d1, d2, d3
vmull.s32 q5, d0, d0
vst1.32 {d1[0]}, [lr] ;store sse
- vshr.s32 d10, d10, #8
- vsub.s32 d0, d1, d10
+ vshr.u32 d10, d10, #8
+ vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
pop {pc}
ENDP
;================================================
;unsigned int vp8_variance_halfpixvar16x16_v_neon
;(
@@ -203,18 +203,18 @@ vp8_filt_spo16x16s_0_4_loop_neon
vpaddl.s32 q0, q8 ;accumulate sum
vpaddl.u32 q1, q10
vadd.s64 d0, d0, d1
vadd.u64 d1, d2, d3
vmull.s32 q5, d0, d0
vst1.32 {d1[0]}, [lr] ;store sse
- vshr.s32 d10, d10, #8
- vsub.s32 d0, d1, d10
+ vshr.u32 d10, d10, #8
+ vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
pop {pc}
ENDP
;================================================
;unsigned int vp8_variance_halfpixvar16x16_hv_neon
;(
@@ -322,18 +322,18 @@ vp8_filt16x16s_4_4_loop_neon
vpaddl.s32 q0, q13 ;accumulate sum
vpaddl.u32 q1, q15
vadd.s64 d0, d0, d1
vadd.u64 d1, d2, d3
vmull.s32 q5, d0, d0
vst1.32 {d1[0]}, [lr] ;store sse
- vshr.s32 d10, d10, #8
- vsub.s32 d0, d1, d10
+ vshr.u32 d10, d10, #8
+ vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
pop {pc}
ENDP
;==============================
; r0 unsigned char *src_ptr,
; r1 int src_pixels_per_line,
@@ -555,18 +555,18 @@ sub_pixel_variance16x16s_neon_loop
vpaddl.s32 q0, q8 ;accumulate sum
vpaddl.u32 q1, q10
vadd.s64 d0, d0, d1
vadd.u64 d1, d2, d3
vmull.s32 q5, d0, d0
vst1.32 {d1[0]}, [lr] ;store sse
- vshr.s32 d10, d10, #8
- vsub.s32 d0, d1, d10
+ vshr.u32 d10, d10, #8
+ vsub.u32 d0, d1, d10
add sp, sp, #256
vmov.32 r0, d0[0] ;return
pop {r4, pc}
ENDP
END
diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
--- a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
+++ b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
@@ -201,18 +201,18 @@ sub_pixel_variance8x8_neon_loop
vpaddl.s32 q0, q8 ;accumulate sum
vpaddl.u32 q1, q10
vadd.s64 d0, d0, d1
vadd.u64 d1, d2, d3
vmull.s32 q5, d0, d0
vst1.32 {d1[0]}, [lr] ;store sse
- vshr.s32 d10, d10, #6
- vsub.s32 d0, d1, d10
+ vshr.u32 d10, d10, #6
+ vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
pop {r4-r5, pc}
ENDP
;-----------------
diff --git a/media/libvpx/vp8/encoder/variance_c.c b/media/libvpx/vp8/encoder/variance_c.c
--- a/media/libvpx/vp8/encoder/variance_c.c
+++ b/media/libvpx/vp8/encoder/variance_c.c
@@ -70,82 +70,82 @@ unsigned int vp8_variance16x16_c(
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
*sse = var;
- return (var - ((avg * avg) >> 8));
+ return (var - ((unsigned int)(avg * avg) >> 8));
}
unsigned int vp8_variance8x16_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
*sse = var;
- return (var - ((avg * avg) >> 7));
+ return (var - ((unsigned int)(avg * avg) >> 7));
}
unsigned int vp8_variance16x8_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
*sse = var;
- return (var - ((avg * avg) >> 7));
+ return (var - ((unsigned int)(avg * avg) >> 7));
}
unsigned int vp8_variance8x8_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
*sse = var;
- return (var - ((avg * avg) >> 6));
+ return (var - ((unsigned int)(avg * avg) >> 6));
}
unsigned int vp8_variance4x4_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
*sse = var;
- return (var - ((avg * avg) >> 4));
+ return (var - ((unsigned int)(avg * avg) >> 4));
}
unsigned int vp8_mse16x16_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
diff --git a/media/libvpx/vp8/encoder/x86/variance_mmx.c b/media/libvpx/vp8/encoder/x86/variance_mmx.c
--- a/media/libvpx/vp8/encoder/x86/variance_mmx.c
+++ b/media/libvpx/vp8/encoder/x86/variance_mmx.c
@@ -86,34 +86,34 @@ unsigned int vp8_variance4x4_mmx(
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
*sse = var;
- return (var - ((avg * avg) >> 4));
+ return (var - ((unsigned int)(avg * avg) >> 4));
}
unsigned int vp8_variance8x8_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
*sse = var;
- return (var - ((avg * avg) >> 6));
+ return (var - ((unsigned int)(avg * avg) >> 6));
}
unsigned int vp8_mse16x16_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
@@ -148,17 +148,17 @@ unsigned int vp8_variance16x16_mmx(
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
var = sse0 + sse1 + sse2 + sse3;
avg = sum0 + sum1 + sum2 + sum3;
*sse = var;
- return (var - ((avg * avg) >> 8));
+ return (var - ((unsigned int)(avg * avg) >> 8));
}
unsigned int vp8_variance16x8_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
@@ -167,17 +167,17 @@ unsigned int vp8_variance16x8_mmx(
int sum0, sum1, avg;
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
var = sse0 + sse1;
avg = sum0 + sum1;
*sse = var;
- return (var - ((avg * avg) >> 7));
+ return (var - ((unsigned int)(avg * avg) >> 7));
}
unsigned int vp8_variance8x16_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
@@ -189,17 +189,17 @@ unsigned int vp8_variance8x16_mmx(
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
var = sse0 + sse1;
avg = sum0 + sum1;
*sse = var;
- return (var - ((avg * avg) >> 7));
+ return (var - ((unsigned int)(avg * avg) >> 7));
}
unsigned int vp8_sub_pixel_variance4x4_mmx
(
const unsigned char *src_ptr,
int src_pixels_per_line,
diff --git a/media/libvpx/vp8/encoder/x86/variance_sse2.c b/media/libvpx/vp8/encoder/x86/variance_sse2.c
--- a/media/libvpx/vp8/encoder/x86/variance_sse2.c
+++ b/media/libvpx/vp8/encoder/x86/variance_sse2.c
@@ -143,34 +143,34 @@ unsigned int vp8_variance4x4_wmt(
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
*sse = var;
- return (var - ((avg * avg) >> 4));
+ return (var - ((unsigned int)(avg * avg) >> 4));
}
unsigned int vp8_variance8x8_wmt
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
*sse = var;
- return (var - ((avg * avg) >> 6));
+ return (var - ((unsigned int)(avg * avg) >> 6));
}
unsigned int vp8_variance16x16_wmt
(
const unsigned char *src_ptr,
int source_stride,
@@ -215,17 +215,17 @@ unsigned int vp8_variance16x8_wmt
int sum0, sum1, avg;
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
var = sse0 + sse1;
avg = sum0 + sum1;
*sse = var;
- return (var - ((avg * avg) >> 7));
+ return (var - ((unsigned int)(avg * avg) >> 7));
}
unsigned int vp8_variance8x16_wmt
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
@@ -236,17 +236,17 @@ unsigned int vp8_variance8x16_wmt
int sum0, sum1, avg;
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
var = sse0 + sse1;
avg = sum0 + sum1;
*sse = var;
- return (var - ((avg * avg) >> 7));
+ return (var - ((unsigned int)(avg * avg) >> 7));
}
unsigned int vp8_sub_pixel_variance4x4_wmt
(
const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
diff --git a/media/libvpx/vp8/encoder/x86/variance_ssse3.c b/media/libvpx/vp8/encoder/x86/variance_ssse3.c
--- a/media/libvpx/vp8/encoder/x86/variance_ssse3.c
+++ b/media/libvpx/vp8/encoder/x86/variance_ssse3.c
@@ -107,17 +107,17 @@ unsigned int vp8_sub_pixel_variance16x16
vp8_filter_block2d_bil_var_ssse3(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
xoffset, yoffset,
&xsum0, &xxsum0);
}
*sse = xxsum0;
- return (xxsum0 - ((xsum0 * xsum0) >> 8));
+ return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8));
}
unsigned int vp8_sub_pixel_variance16x8_ssse3
(
const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
@@ -156,10 +156,10 @@ unsigned int vp8_sub_pixel_variance16x8_
vp8_filter_block2d_bil_var_ssse3(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
xoffset, yoffset,
&xsum0, &xxsum0);
}
*sse = xxsum0;
- return (xxsum0 - ((xsum0 * xsum0) >> 7));
+ return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 7));
}