зеркало из https://github.com/mozilla/pjs.git
165 строки
5.8 KiB
Diff
165 строки
5.8 KiB
Diff
# HG changeset patch
|
|
# Parent f7a8c8a419870421138438970a0514e79353ae34
|
|
# User Timothy B. Terriberry <tterribe@vt.edu>
|
|
Bug 730903 - Fix text relocations in libvpx variance functions.
|
|
|
|
diff --git a/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm b/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
|
|
--- a/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
|
|
+++ b/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
|
|
@@ -4,16 +4,21 @@
|
|
; Use of this source code is governed by a BSD-style license
|
|
; that can be found in the LICENSE file in the root of the source
|
|
; tree. An additional intellectual property rights grant can be found
|
|
; in the file PATENTS. All contributing project authors may
|
|
; be found in the AUTHORS file in the root of the source tree.
|
|
;
|
|
|
|
|
|
+bilinear_taps_coeff
|
|
+ DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
|
|
+
|
|
+;-----------------
|
|
+
|
|
EXPORT |vp8_sub_pixel_variance16x16_neon_func|
|
|
ARM
|
|
REQUIRE8
|
|
PRESERVE8
|
|
|
|
AREA ||.text||, CODE, READONLY, ALIGN=2
|
|
; r0 unsigned char *src_ptr,
|
|
; r1 int src_pixels_per_line,
|
|
@@ -22,17 +27,17 @@
|
|
; stack(r4) unsigned char *dst_ptr,
|
|
; stack(r5) int dst_pixels_per_line,
|
|
; stack(r6) unsigned int *sse
|
|
;note: most of the code is copied from bilinear_predict16x16_neon and vp8_variance16x16_neon.
|
|
|
|
|vp8_sub_pixel_variance16x16_neon_func| PROC
|
|
push {r4-r6, lr}
|
|
|
|
- ldr r12, _BilinearTaps_coeff_
|
|
+ adr r12, bilinear_taps_coeff
|
|
ldr r4, [sp, #16] ;load *dst_ptr from stack
|
|
ldr r5, [sp, #20] ;load dst_pixels_per_line from stack
|
|
ldr r6, [sp, #24] ;load *sse from stack
|
|
|
|
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
|
beq secondpass_bfilter16x16_only
|
|
|
|
add r2, r12, r2, lsl #3 ;calculate filter location
|
|
@@ -410,16 +415,9 @@ sub_pixel_variance16x16_neon_loop
|
|
|
|
add sp, sp, #528
|
|
vmov.32 r0, d0[0] ;return
|
|
|
|
pop {r4-r6,pc}
|
|
|
|
ENDP
|
|
|
|
-;-----------------
|
|
-
|
|
-_BilinearTaps_coeff_
|
|
- DCD bilinear_taps_coeff
|
|
-bilinear_taps_coeff
|
|
- DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
|
|
-
|
|
END
|
|
diff --git a/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm b/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
|
|
--- a/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
|
|
+++ b/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
|
|
@@ -22,17 +22,17 @@
|
|
; stack(r4) unsigned char *dst_ptr,
|
|
; stack(r5) int dst_pixels_per_line,
|
|
; stack(r6) unsigned int *sse
|
|
;note: most of the code is copied from bilinear_predict8x8_neon and vp8_variance8x8_neon.
|
|
|
|
|vp8_sub_pixel_variance8x8_neon| PROC
|
|
push {r4-r5, lr}
|
|
|
|
- ldr r12, _BilinearTaps_coeff_
|
|
+ adr r12, bilinear_taps_coeff
|
|
ldr r4, [sp, #12] ;load *dst_ptr from stack
|
|
ldr r5, [sp, #16] ;load dst_pixels_per_line from stack
|
|
ldr lr, [sp, #20] ;load *sse from stack
|
|
|
|
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
|
beq skip_firstpass_filter
|
|
|
|
;First pass: output_height lines x output_width columns (9x8)
|
|
@@ -211,14 +211,12 @@ sub_pixel_variance8x8_neon_loop
|
|
|
|
vmov.32 r0, d0[0] ;return
|
|
pop {r4-r5, pc}
|
|
|
|
ENDP
|
|
|
|
;-----------------
|
|
|
|
-_BilinearTaps_coeff_
|
|
- DCD bilinear_taps_coeff
|
|
bilinear_taps_coeff
|
|
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
|
|
|
|
END
|
|
diff --git a/media/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.asm b/media/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.asm
|
|
--- a/media/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.asm
|
|
+++ b/media/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.asm
|
|
@@ -93,17 +93,17 @@
|
|
vsub.s16 q10, q12 ; x2=(y^sz)-sz = (y^sz)-(-1) (2's complement)
|
|
vsub.s16 q11, q13
|
|
|
|
ldr r6, [r3, #vp8_blockd_qcoeff]
|
|
|
|
vmul.s16 q2, q6, q4 ; x * Dequant
|
|
vmul.s16 q3, q7, q5
|
|
|
|
- ldr r0, _inv_zig_zag_ ; load ptr of inverse zigzag table
|
|
+ adr r0, inv_zig_zag ; load ptr of inverse zigzag table
|
|
|
|
vceq.s16 q8, q8 ; set q8 to all 1
|
|
|
|
vst1.s16 {q10, q11}, [r6] ; store: qcoeff = x2
|
|
|
|
vmul.s16 q12, q6, q10 ; x2 * Dequant
|
|
vmul.s16 q13, q7, q11
|
|
|
|
@@ -176,17 +176,17 @@
|
|
vshr.s16 q3, q1, #15
|
|
|
|
vld1.s16 {q14, q15}, [r5@128]; load round_ptr [0-15]
|
|
vld1.s16 {q8, q9}, [r4@128] ; load quant_ptr [0-15]
|
|
|
|
vadd.s16 q12, q14 ; x + Round
|
|
vadd.s16 q13, q15
|
|
|
|
- ldr r0, _inv_zig_zag_ ; load ptr of inverse zigzag table
|
|
+ adr r0, inv_zig_zag ; load ptr of inverse zigzag table
|
|
|
|
vqdmulh.s16 q12, q8 ; y = ((Round+abs(z)) * Quant) >> 16
|
|
vqdmulh.s16 q13, q9
|
|
|
|
vld1.16 {q10, q11}, [r0@128]; load inverse scan order
|
|
|
|
vceq.s16 q8, q8 ; set q8 to all 1
|
|
|
|
@@ -242,19 +242,16 @@ zero_output
|
|
vst1.s16 {q0, q1}, [r7@128] ; dqcoeff = 0
|
|
|
|
ldmfd sp!, {r4-r7}
|
|
bx lr
|
|
|
|
ENDP
|
|
|
|
; default inverse zigzag table is defined in vp8/common/entropy.c
|
|
-_inv_zig_zag_
|
|
- DCD inv_zig_zag
|
|
-
|
|
ALIGN 16 ; enable use of @128 bit aligned loads
|
|
inv_zig_zag
|
|
DCW 0x0001, 0x0002, 0x0006, 0x0007
|
|
DCW 0x0003, 0x0005, 0x0008, 0x000d
|
|
DCW 0x0004, 0x0009, 0x000c, 0x000e
|
|
DCW 0x000a, 0x000b, 0x000f, 0x0010
|
|
|
|
END
|