зеркало из https://github.com/mozilla/gecko-dev.git
625 строки
19 KiB
Diff
625 строки
19 KiB
Diff
|
# HG changeset patch
|
||
|
# Parent 5a1a0398f8503451582602525c3e7b35def5d0b9
|
||
|
# User Timothy B. Terriberry <tterribe@vt.edu>
|
||
|
Fix variance overflow
|
||
|
|
||
|
Upstream Change-Id: I1bad27ea0720067def6d71a6da5f789508cec265
|
||
|
|
||
|
diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
|
||
|
--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
|
||
|
+++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
|
||
|
@@ -139,16 +139,16 @@ loop
|
||
|
subs r12, r12, #1
|
||
|
|
||
|
bne loop
|
||
|
|
||
|
; return stuff
|
||
|
ldr r6, [sp, #40] ; get address of sse
|
||
|
mul r0, r8, r8 ; sum * sum
|
||
|
str r11, [r6] ; store sse
|
||
|
- sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
|
||
|
+ sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
|
||
|
|
||
|
ldmfd sp!, {r4-r12, pc}
|
||
|
|
||
|
ENDP
|
||
|
|
||
|
END
|
||
|
|
||
|
diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
|
||
|
--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
|
||
|
+++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
|
||
|
@@ -164,17 +164,17 @@ loop
|
||
|
subs r12, r12, #1
|
||
|
|
||
|
bne loop
|
||
|
|
||
|
; return stuff
|
||
|
ldr r6, [sp, #40] ; get address of sse
|
||
|
mul r0, r8, r8 ; sum * sum
|
||
|
str r11, [r6] ; store sse
|
||
|
- sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
|
||
|
+ sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
|
||
|
|
||
|
ldmfd sp!, {r4-r12, pc}
|
||
|
|
||
|
ENDP
|
||
|
|
||
|
c80808080
|
||
|
DCD 0x80808080
|
||
|
|
||
|
diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
|
||
|
--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
|
||
|
+++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
|
||
|
@@ -205,17 +205,17 @@ loop
|
||
|
smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
|
||
|
|
||
|
bne loop
|
||
|
|
||
|
; return stuff
|
||
|
ldr r6, [sp, #40] ; get address of sse
|
||
|
mul r0, r8, r8 ; sum * sum
|
||
|
str r11, [r6] ; store sse
|
||
|
- sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
|
||
|
+ sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
|
||
|
|
||
|
ldmfd sp!, {r4-r12, pc}
|
||
|
|
||
|
ENDP
|
||
|
|
||
|
c80808080
|
||
|
DCD 0x80808080
|
||
|
|
||
|
diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
|
||
|
--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
|
||
|
+++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
|
||
|
@@ -166,17 +166,17 @@ loop
|
||
|
subs r12, r12, #1
|
||
|
|
||
|
bne loop
|
||
|
|
||
|
; return stuff
|
||
|
ldr r6, [sp, #40] ; get address of sse
|
||
|
mul r0, r8, r8 ; sum * sum
|
||
|
str r11, [r6] ; store sse
|
||
|
- sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
|
||
|
+ sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
|
||
|
|
||
|
ldmfd sp!, {r4-r12, pc}
|
||
|
|
||
|
ENDP
|
||
|
|
||
|
c80808080
|
||
|
DCD 0x80808080
|
||
|
|
||
|
diff --git a/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm b/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm
|
||
|
--- a/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm
|
||
|
+++ b/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm
|
||
|
@@ -72,24 +72,24 @@ variance16x16_neon_loop
|
||
|
vpaddl.u32 q1, q10
|
||
|
vadd.s64 d0, d0, d1
|
||
|
vadd.u64 d1, d2, d3
|
||
|
|
||
|
;vmov.32 r0, d0[0] ;this instruction costs a lot
|
||
|
;vmov.32 r1, d1[0]
|
||
|
;mul r0, r0, r0
|
||
|
;str r1, [r12]
|
||
|
- ;sub r0, r1, r0, asr #8
|
||
|
+ ;sub r0, r1, r0, lsr #8
|
||
|
|
||
|
- ;sum is in [-255x256, 255x256]. sumxsum is 32-bit. Shift to right should
|
||
|
- ;have sign-bit exension, which is vshr.s. Have to use s32 to make it right.
|
||
|
+ ; while sum is signed, sum * sum is always positive and must be treated as
|
||
|
+ ; unsigned to avoid propagating the sign bit.
|
||
|
vmull.s32 q5, d0, d0
|
||
|
vst1.32 {d1[0]}, [r12] ;store sse
|
||
|
- vshr.s32 d10, d10, #8
|
||
|
- vsub.s32 d0, d1, d10
|
||
|
+ vshr.u32 d10, d10, #8
|
||
|
+ vsub.u32 d0, d1, d10
|
||
|
|
||
|
vmov.32 r0, d0[0] ;return
|
||
|
bx lr
|
||
|
|
||
|
ENDP
|
||
|
|
||
|
;================================
|
||
|
;unsigned int vp8_variance16x8_c(
|
||
|
@@ -140,18 +140,18 @@ variance16x8_neon_loop
|
||
|
ldr r12, [sp] ;load *sse from stack
|
||
|
|
||
|
vpaddl.u32 q1, q10
|
||
|
vadd.s64 d0, d0, d1
|
||
|
vadd.u64 d1, d2, d3
|
||
|
|
||
|
vmull.s32 q5, d0, d0
|
||
|
vst1.32 {d1[0]}, [r12] ;store sse
|
||
|
- vshr.s32 d10, d10, #7
|
||
|
- vsub.s32 d0, d1, d10
|
||
|
+ vshr.u32 d10, d10, #7
|
||
|
+ vsub.u32 d0, d1, d10
|
||
|
|
||
|
vmov.32 r0, d0[0] ;return
|
||
|
bx lr
|
||
|
|
||
|
ENDP
|
||
|
|
||
|
;=================================
|
||
|
;unsigned int vp8_variance8x16_c(
|
||
|
@@ -195,18 +195,18 @@ variance8x16_neon_loop
|
||
|
ldr r12, [sp] ;load *sse from stack
|
||
|
|
||
|
vpaddl.u32 q1, q10
|
||
|
vadd.s64 d0, d0, d1
|
||
|
vadd.u64 d1, d2, d3
|
||
|
|
||
|
vmull.s32 q5, d0, d0
|
||
|
vst1.32 {d1[0]}, [r12] ;store sse
|
||
|
- vshr.s32 d10, d10, #7
|
||
|
- vsub.s32 d0, d1, d10
|
||
|
+ vshr.u32 d10, d10, #7
|
||
|
+ vsub.u32 d0, d1, d10
|
||
|
|
||
|
vmov.32 r0, d0[0] ;return
|
||
|
bx lr
|
||
|
|
||
|
ENDP
|
||
|
|
||
|
;==================================
|
||
|
; r0 unsigned char *src_ptr
|
||
|
@@ -260,17 +260,17 @@ variance8x8_neon_loop
|
||
|
ldr r12, [sp] ;load *sse from stack
|
||
|
|
||
|
vpaddl.u32 q1, q10
|
||
|
vadd.s64 d0, d0, d1
|
||
|
vadd.u64 d1, d2, d3
|
||
|
|
||
|
vmull.s32 q5, d0, d0
|
||
|
vst1.32 {d1[0]}, [r12] ;store sse
|
||
|
- vshr.s32 d10, d10, #6
|
||
|
- vsub.s32 d0, d1, d10
|
||
|
+ vshr.u32 d10, d10, #6
|
||
|
+ vsub.u32 d0, d1, d10
|
||
|
|
||
|
vmov.32 r0, d0[0] ;return
|
||
|
bx lr
|
||
|
|
||
|
ENDP
|
||
|
|
||
|
END
|
||
|
diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
|
||
|
--- a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
|
||
|
+++ b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
|
||
|
@@ -400,18 +400,18 @@ sub_pixel_variance16x16_neon_loop
|
||
|
vpaddl.s32 q0, q8 ;accumulate sum
|
||
|
|
||
|
vpaddl.u32 q1, q10
|
||
|
vadd.s64 d0, d0, d1
|
||
|
vadd.u64 d1, d2, d3
|
||
|
|
||
|
vmull.s32 q5, d0, d0
|
||
|
vst1.32 {d1[0]}, [r6] ;store sse
|
||
|
- vshr.s32 d10, d10, #8
|
||
|
- vsub.s32 d0, d1, d10
|
||
|
+ vshr.u32 d10, d10, #8
|
||
|
+ vsub.u32 d0, d1, d10
|
||
|
|
||
|
add sp, sp, #528
|
||
|
vmov.32 r0, d0[0] ;return
|
||
|
|
||
|
pop {r4-r6,pc}
|
||
|
|
||
|
ENDP
|
||
|
|
||
|
diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
|
||
|
--- a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
|
||
|
+++ b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
|
||
|
@@ -107,18 +107,18 @@ vp8_filt_fpo16x16s_4_0_loop_neon
|
||
|
vpaddl.s32 q0, q8 ;accumulate sum
|
||
|
|
||
|
vpaddl.u32 q1, q10
|
||
|
vadd.s64 d0, d0, d1
|
||
|
vadd.u64 d1, d2, d3
|
||
|
|
||
|
vmull.s32 q5, d0, d0
|
||
|
vst1.32 {d1[0]}, [lr] ;store sse
|
||
|
- vshr.s32 d10, d10, #8
|
||
|
- vsub.s32 d0, d1, d10
|
||
|
+ vshr.u32 d10, d10, #8
|
||
|
+ vsub.u32 d0, d1, d10
|
||
|
|
||
|
vmov.32 r0, d0[0] ;return
|
||
|
pop {pc}
|
||
|
ENDP
|
||
|
|
||
|
;================================================
|
||
|
;unsigned int vp8_variance_halfpixvar16x16_v_neon
|
||
|
;(
|
||
|
@@ -203,18 +203,18 @@ vp8_filt_spo16x16s_0_4_loop_neon
|
||
|
vpaddl.s32 q0, q8 ;accumulate sum
|
||
|
|
||
|
vpaddl.u32 q1, q10
|
||
|
vadd.s64 d0, d0, d1
|
||
|
vadd.u64 d1, d2, d3
|
||
|
|
||
|
vmull.s32 q5, d0, d0
|
||
|
vst1.32 {d1[0]}, [lr] ;store sse
|
||
|
- vshr.s32 d10, d10, #8
|
||
|
- vsub.s32 d0, d1, d10
|
||
|
+ vshr.u32 d10, d10, #8
|
||
|
+ vsub.u32 d0, d1, d10
|
||
|
|
||
|
vmov.32 r0, d0[0] ;return
|
||
|
pop {pc}
|
||
|
ENDP
|
||
|
|
||
|
;================================================
|
||
|
;unsigned int vp8_variance_halfpixvar16x16_hv_neon
|
||
|
;(
|
||
|
@@ -322,18 +322,18 @@ vp8_filt16x16s_4_4_loop_neon
|
||
|
vpaddl.s32 q0, q13 ;accumulate sum
|
||
|
|
||
|
vpaddl.u32 q1, q15
|
||
|
vadd.s64 d0, d0, d1
|
||
|
vadd.u64 d1, d2, d3
|
||
|
|
||
|
vmull.s32 q5, d0, d0
|
||
|
vst1.32 {d1[0]}, [lr] ;store sse
|
||
|
- vshr.s32 d10, d10, #8
|
||
|
- vsub.s32 d0, d1, d10
|
||
|
+ vshr.u32 d10, d10, #8
|
||
|
+ vsub.u32 d0, d1, d10
|
||
|
|
||
|
vmov.32 r0, d0[0] ;return
|
||
|
pop {pc}
|
||
|
ENDP
|
||
|
|
||
|
;==============================
|
||
|
; r0 unsigned char *src_ptr,
|
||
|
; r1 int src_pixels_per_line,
|
||
|
@@ -555,18 +555,18 @@ sub_pixel_variance16x16s_neon_loop
|
||
|
vpaddl.s32 q0, q8 ;accumulate sum
|
||
|
|
||
|
vpaddl.u32 q1, q10
|
||
|
vadd.s64 d0, d0, d1
|
||
|
vadd.u64 d1, d2, d3
|
||
|
|
||
|
vmull.s32 q5, d0, d0
|
||
|
vst1.32 {d1[0]}, [lr] ;store sse
|
||
|
- vshr.s32 d10, d10, #8
|
||
|
- vsub.s32 d0, d1, d10
|
||
|
+ vshr.u32 d10, d10, #8
|
||
|
+ vsub.u32 d0, d1, d10
|
||
|
|
||
|
add sp, sp, #256
|
||
|
vmov.32 r0, d0[0] ;return
|
||
|
|
||
|
pop {r4, pc}
|
||
|
ENDP
|
||
|
|
||
|
END
|
||
|
diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
|
||
|
--- a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
|
||
|
+++ b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
|
||
|
@@ -201,18 +201,18 @@ sub_pixel_variance8x8_neon_loop
|
||
|
vpaddl.s32 q0, q8 ;accumulate sum
|
||
|
|
||
|
vpaddl.u32 q1, q10
|
||
|
vadd.s64 d0, d0, d1
|
||
|
vadd.u64 d1, d2, d3
|
||
|
|
||
|
vmull.s32 q5, d0, d0
|
||
|
vst1.32 {d1[0]}, [lr] ;store sse
|
||
|
- vshr.s32 d10, d10, #6
|
||
|
- vsub.s32 d0, d1, d10
|
||
|
+ vshr.u32 d10, d10, #6
|
||
|
+ vsub.u32 d0, d1, d10
|
||
|
|
||
|
vmov.32 r0, d0[0] ;return
|
||
|
pop {r4-r5, pc}
|
||
|
|
||
|
ENDP
|
||
|
|
||
|
;-----------------
|
||
|
|
||
|
diff --git a/media/libvpx/vp8/encoder/variance_c.c b/media/libvpx/vp8/encoder/variance_c.c
|
||
|
--- a/media/libvpx/vp8/encoder/variance_c.c
|
||
|
+++ b/media/libvpx/vp8/encoder/variance_c.c
|
||
|
@@ -70,82 +70,82 @@ unsigned int vp8_variance16x16_c(
|
||
|
unsigned int *sse)
|
||
|
{
|
||
|
unsigned int var;
|
||
|
int avg;
|
||
|
|
||
|
|
||
|
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
|
||
|
*sse = var;
|
||
|
- return (var - ((avg * avg) >> 8));
|
||
|
+ return (var - ((unsigned int)(avg * avg) >> 8));
|
||
|
}
|
||
|
|
||
|
unsigned int vp8_variance8x16_c(
|
||
|
const unsigned char *src_ptr,
|
||
|
int source_stride,
|
||
|
const unsigned char *ref_ptr,
|
||
|
int recon_stride,
|
||
|
unsigned int *sse)
|
||
|
{
|
||
|
unsigned int var;
|
||
|
int avg;
|
||
|
|
||
|
|
||
|
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
|
||
|
*sse = var;
|
||
|
- return (var - ((avg * avg) >> 7));
|
||
|
+ return (var - ((unsigned int)(avg * avg) >> 7));
|
||
|
}
|
||
|
|
||
|
unsigned int vp8_variance16x8_c(
|
||
|
const unsigned char *src_ptr,
|
||
|
int source_stride,
|
||
|
const unsigned char *ref_ptr,
|
||
|
int recon_stride,
|
||
|
unsigned int *sse)
|
||
|
{
|
||
|
unsigned int var;
|
||
|
int avg;
|
||
|
|
||
|
|
||
|
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
|
||
|
*sse = var;
|
||
|
- return (var - ((avg * avg) >> 7));
|
||
|
+ return (var - ((unsigned int)(avg * avg) >> 7));
|
||
|
}
|
||
|
|
||
|
|
||
|
unsigned int vp8_variance8x8_c(
|
||
|
const unsigned char *src_ptr,
|
||
|
int source_stride,
|
||
|
const unsigned char *ref_ptr,
|
||
|
int recon_stride,
|
||
|
unsigned int *sse)
|
||
|
{
|
||
|
unsigned int var;
|
||
|
int avg;
|
||
|
|
||
|
|
||
|
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
|
||
|
*sse = var;
|
||
|
- return (var - ((avg * avg) >> 6));
|
||
|
+ return (var - ((unsigned int)(avg * avg) >> 6));
|
||
|
}
|
||
|
|
||
|
unsigned int vp8_variance4x4_c(
|
||
|
const unsigned char *src_ptr,
|
||
|
int source_stride,
|
||
|
const unsigned char *ref_ptr,
|
||
|
int recon_stride,
|
||
|
unsigned int *sse)
|
||
|
{
|
||
|
unsigned int var;
|
||
|
int avg;
|
||
|
|
||
|
|
||
|
variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
|
||
|
*sse = var;
|
||
|
- return (var - ((avg * avg) >> 4));
|
||
|
+ return (var - ((unsigned int)(avg * avg) >> 4));
|
||
|
}
|
||
|
|
||
|
|
||
|
unsigned int vp8_mse16x16_c(
|
||
|
const unsigned char *src_ptr,
|
||
|
int source_stride,
|
||
|
const unsigned char *ref_ptr,
|
||
|
int recon_stride,
|
||
|
diff --git a/media/libvpx/vp8/encoder/x86/variance_mmx.c b/media/libvpx/vp8/encoder/x86/variance_mmx.c
|
||
|
--- a/media/libvpx/vp8/encoder/x86/variance_mmx.c
|
||
|
+++ b/media/libvpx/vp8/encoder/x86/variance_mmx.c
|
||
|
@@ -86,34 +86,34 @@ unsigned int vp8_variance4x4_mmx(
|
||
|
int recon_stride,
|
||
|
unsigned int *sse)
|
||
|
{
|
||
|
unsigned int var;
|
||
|
int avg;
|
||
|
|
||
|
vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
|
||
|
*sse = var;
|
||
|
- return (var - ((avg * avg) >> 4));
|
||
|
+ return (var - ((unsigned int)(avg * avg) >> 4));
|
||
|
|
||
|
}
|
||
|
|
||
|
unsigned int vp8_variance8x8_mmx(
|
||
|
const unsigned char *src_ptr,
|
||
|
int source_stride,
|
||
|
const unsigned char *ref_ptr,
|
||
|
int recon_stride,
|
||
|
unsigned int *sse)
|
||
|
{
|
||
|
unsigned int var;
|
||
|
int avg;
|
||
|
|
||
|
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
|
||
|
*sse = var;
|
||
|
|
||
|
- return (var - ((avg * avg) >> 6));
|
||
|
+ return (var - ((unsigned int)(avg * avg) >> 6));
|
||
|
|
||
|
}
|
||
|
|
||
|
unsigned int vp8_mse16x16_mmx(
|
||
|
const unsigned char *src_ptr,
|
||
|
int source_stride,
|
||
|
const unsigned char *ref_ptr,
|
||
|
int recon_stride,
|
||
|
@@ -148,17 +148,17 @@ unsigned int vp8_variance16x16_mmx(
|
||
|
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||
|
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
||
|
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
|
||
|
vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
|
||
|
|
||
|
var = sse0 + sse1 + sse2 + sse3;
|
||
|
avg = sum0 + sum1 + sum2 + sum3;
|
||
|
*sse = var;
|
||
|
- return (var - ((avg * avg) >> 8));
|
||
|
+ return (var - ((unsigned int)(avg * avg) >> 8));
|
||
|
}
|
||
|
|
||
|
unsigned int vp8_variance16x8_mmx(
|
||
|
const unsigned char *src_ptr,
|
||
|
int source_stride,
|
||
|
const unsigned char *ref_ptr,
|
||
|
int recon_stride,
|
||
|
unsigned int *sse)
|
||
|
@@ -167,17 +167,17 @@ unsigned int vp8_variance16x8_mmx(
|
||
|
int sum0, sum1, avg;
|
||
|
|
||
|
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||
|
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
||
|
|
||
|
var = sse0 + sse1;
|
||
|
avg = sum0 + sum1;
|
||
|
*sse = var;
|
||
|
- return (var - ((avg * avg) >> 7));
|
||
|
+ return (var - ((unsigned int)(avg * avg) >> 7));
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
unsigned int vp8_variance8x16_mmx(
|
||
|
const unsigned char *src_ptr,
|
||
|
int source_stride,
|
||
|
const unsigned char *ref_ptr,
|
||
|
@@ -189,17 +189,17 @@ unsigned int vp8_variance8x16_mmx(
|
||
|
|
||
|
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||
|
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
|
||
|
|
||
|
var = sse0 + sse1;
|
||
|
avg = sum0 + sum1;
|
||
|
*sse = var;
|
||
|
|
||
|
- return (var - ((avg * avg) >> 7));
|
||
|
+ return (var - ((unsigned int)(avg * avg) >> 7));
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
unsigned int vp8_sub_pixel_variance4x4_mmx
|
||
|
(
|
||
|
const unsigned char *src_ptr,
|
||
|
int src_pixels_per_line,
|
||
|
diff --git a/media/libvpx/vp8/encoder/x86/variance_sse2.c b/media/libvpx/vp8/encoder/x86/variance_sse2.c
|
||
|
--- a/media/libvpx/vp8/encoder/x86/variance_sse2.c
|
||
|
+++ b/media/libvpx/vp8/encoder/x86/variance_sse2.c
|
||
|
@@ -143,34 +143,34 @@ unsigned int vp8_variance4x4_wmt(
|
||
|
int recon_stride,
|
||
|
unsigned int *sse)
|
||
|
{
|
||
|
unsigned int var;
|
||
|
int avg;
|
||
|
|
||
|
vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
|
||
|
*sse = var;
|
||
|
- return (var - ((avg * avg) >> 4));
|
||
|
+ return (var - ((unsigned int)(avg * avg) >> 4));
|
||
|
|
||
|
}
|
||
|
|
||
|
unsigned int vp8_variance8x8_wmt
|
||
|
(
|
||
|
const unsigned char *src_ptr,
|
||
|
int source_stride,
|
||
|
const unsigned char *ref_ptr,
|
||
|
int recon_stride,
|
||
|
unsigned int *sse)
|
||
|
{
|
||
|
unsigned int var;
|
||
|
int avg;
|
||
|
|
||
|
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
|
||
|
*sse = var;
|
||
|
- return (var - ((avg * avg) >> 6));
|
||
|
+ return (var - ((unsigned int)(avg * avg) >> 6));
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
unsigned int vp8_variance16x16_wmt
|
||
|
(
|
||
|
const unsigned char *src_ptr,
|
||
|
int source_stride,
|
||
|
@@ -215,17 +215,17 @@ unsigned int vp8_variance16x8_wmt
|
||
|
int sum0, sum1, avg;
|
||
|
|
||
|
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||
|
vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
||
|
|
||
|
var = sse0 + sse1;
|
||
|
avg = sum0 + sum1;
|
||
|
*sse = var;
|
||
|
- return (var - ((avg * avg) >> 7));
|
||
|
+ return (var - ((unsigned int)(avg * avg) >> 7));
|
||
|
|
||
|
}
|
||
|
|
||
|
unsigned int vp8_variance8x16_wmt
|
||
|
(
|
||
|
const unsigned char *src_ptr,
|
||
|
int source_stride,
|
||
|
const unsigned char *ref_ptr,
|
||
|
@@ -236,17 +236,17 @@ unsigned int vp8_variance8x16_wmt
|
||
|
int sum0, sum1, avg;
|
||
|
|
||
|
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||
|
vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
|
||
|
|
||
|
var = sse0 + sse1;
|
||
|
avg = sum0 + sum1;
|
||
|
*sse = var;
|
||
|
- return (var - ((avg * avg) >> 7));
|
||
|
+ return (var - ((unsigned int)(avg * avg) >> 7));
|
||
|
|
||
|
}
|
||
|
|
||
|
unsigned int vp8_sub_pixel_variance4x4_wmt
|
||
|
(
|
||
|
const unsigned char *src_ptr,
|
||
|
int src_pixels_per_line,
|
||
|
int xoffset,
|
||
|
diff --git a/media/libvpx/vp8/encoder/x86/variance_ssse3.c b/media/libvpx/vp8/encoder/x86/variance_ssse3.c
|
||
|
--- a/media/libvpx/vp8/encoder/x86/variance_ssse3.c
|
||
|
+++ b/media/libvpx/vp8/encoder/x86/variance_ssse3.c
|
||
|
@@ -107,17 +107,17 @@ unsigned int vp8_sub_pixel_variance16x16
|
||
|
vp8_filter_block2d_bil_var_ssse3(
|
||
|
src_ptr, src_pixels_per_line,
|
||
|
dst_ptr, dst_pixels_per_line, 16,
|
||
|
xoffset, yoffset,
|
||
|
&xsum0, &xxsum0);
|
||
|
}
|
||
|
|
||
|
*sse = xxsum0;
|
||
|
- return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||
|
+ return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8));
|
||
|
}
|
||
|
|
||
|
unsigned int vp8_sub_pixel_variance16x8_ssse3
|
||
|
(
|
||
|
const unsigned char *src_ptr,
|
||
|
int src_pixels_per_line,
|
||
|
int xoffset,
|
||
|
int yoffset,
|
||
|
@@ -156,10 +156,10 @@ unsigned int vp8_sub_pixel_variance16x8_
|
||
|
vp8_filter_block2d_bil_var_ssse3(
|
||
|
src_ptr, src_pixels_per_line,
|
||
|
dst_ptr, dst_pixels_per_line, 8,
|
||
|
xoffset, yoffset,
|
||
|
&xsum0, &xxsum0);
|
||
|
}
|
||
|
|
||
|
*sse = xxsum0;
|
||
|
- return (xxsum0 - ((xsum0 * xsum0) >> 7));
|
||
|
+ return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 7));
|
||
|
}
|