nasm: address labels 'rel label' vice 'wrt rip'
nasm does not support `label wrt rip', it requires `rel label'. It is still fully compatible with yasm. Provide nasm compatibility. No binary change by this patch with yasm on {x86_64,i686}-fedora13-linux-gnu. Few longer opcodes with nasm on {x86_64,i686}-fedora13-linux-gnu have been checked as safe. Change-Id: I488773a4e930a56e43b0cc72d867ee5291215f50
This commit is contained in:
Родитель
e114f699f6
Коммит
5cdc3a4c29
|
@ -58,11 +58,11 @@ sym(vp8_short_idct4x4llm_mmx):
|
|||
movq mm5, mm1
|
||||
paddw mm2, mm0 ; a1 =0+2
|
||||
|
||||
pmulhw mm5, [x_s1sqr2 GLOBAL] ;
|
||||
pmulhw mm5, [GLOBAL(x_s1sqr2)] ;
|
||||
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movq mm7, mm3 ;
|
||||
pmulhw mm7, [x_c1sqr2less1 GLOBAL] ;
|
||||
pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ;
|
||||
|
||||
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw mm7, mm5 ; c1
|
||||
|
@ -70,10 +70,10 @@ sym(vp8_short_idct4x4llm_mmx):
|
|||
movq mm5, mm1
|
||||
movq mm4, mm3
|
||||
|
||||
pmulhw mm5, [x_c1sqr2less1 GLOBAL]
|
||||
pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw mm5, mm1
|
||||
|
||||
pmulhw mm3, [x_s1sqr2 GLOBAL]
|
||||
pmulhw mm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw mm3, mm4
|
||||
|
||||
paddw mm3, mm5 ; d1
|
||||
|
@ -113,11 +113,11 @@ sym(vp8_short_idct4x4llm_mmx):
|
|||
movq mm5, mm1
|
||||
paddw mm2, mm0 ; a1 =0+2
|
||||
|
||||
pmulhw mm5, [x_s1sqr2 GLOBAL] ;
|
||||
pmulhw mm5, [GLOBAL(x_s1sqr2)] ;
|
||||
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movq mm7, mm3 ;
|
||||
pmulhw mm7, [x_c1sqr2less1 GLOBAL] ;
|
||||
pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ;
|
||||
|
||||
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw mm7, mm5 ; c1
|
||||
|
@ -125,16 +125,16 @@ sym(vp8_short_idct4x4llm_mmx):
|
|||
movq mm5, mm1
|
||||
movq mm4, mm3
|
||||
|
||||
pmulhw mm5, [x_c1sqr2less1 GLOBAL]
|
||||
pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw mm5, mm1
|
||||
|
||||
pmulhw mm3, [x_s1sqr2 GLOBAL]
|
||||
pmulhw mm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw mm3, mm4
|
||||
|
||||
paddw mm3, mm5 ; d1
|
||||
paddw mm0, [fours GLOBAL]
|
||||
paddw mm0, [GLOBAL(fours)]
|
||||
|
||||
paddw mm2, [fours GLOBAL]
|
||||
paddw mm2, [GLOBAL(fours)]
|
||||
movq mm6, mm2 ; a1
|
||||
|
||||
movq mm4, mm0 ; b1
|
||||
|
@ -196,7 +196,7 @@ sym(vp8_short_idct4x4llm_1_mmx):
|
|||
mov rax, arg(0) ;input
|
||||
movd mm0, [rax]
|
||||
|
||||
paddw mm0, [fours GLOBAL]
|
||||
paddw mm0, [GLOBAL(fours)]
|
||||
mov rdx, arg(1) ;output
|
||||
|
||||
psraw mm0, 3
|
||||
|
@ -239,7 +239,7 @@ sym(vp8_dc_only_idct_add_mmx):
|
|||
|
||||
movd mm5, arg(0) ;input_dc
|
||||
|
||||
paddw mm5, [fours GLOBAL]
|
||||
paddw mm5, [GLOBAL(fours)]
|
||||
|
||||
psraw mm5, 3
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ sym(idct_dequant_0_2x_sse2):
|
|||
pshufhw xmm4, xmm4, 00000000b
|
||||
|
||||
mov rax, arg(2) ; pre
|
||||
paddw xmm4, [fours GLOBAL]
|
||||
paddw xmm4, [GLOBAL(fours)]
|
||||
|
||||
movsxd rcx, dword ptr arg(5) ; blk_stride
|
||||
psraw xmm4, 3
|
||||
|
@ -160,11 +160,11 @@ sym(idct_dequant_full_2x_sse2):
|
|||
movdqa xmm5, xmm1
|
||||
paddw xmm2, xmm0 ; a1 = 0+2
|
||||
|
||||
pmulhw xmm5, [x_s1sqr2 GLOBAL]
|
||||
pmulhw xmm5, [GLOBAL(x_s1sqr2)]
|
||||
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
|
||||
pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
|
||||
|
||||
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw xmm7, xmm5 ; c1
|
||||
|
@ -172,10 +172,10 @@ sym(idct_dequant_full_2x_sse2):
|
|||
movdqa xmm5, xmm1
|
||||
movdqa xmm4, xmm3
|
||||
|
||||
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
|
||||
pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw xmm5, xmm1
|
||||
|
||||
pmulhw xmm3, [x_s1sqr2 GLOBAL]
|
||||
pmulhw xmm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw xmm3, xmm4
|
||||
|
||||
paddw xmm3, xmm5 ; d1
|
||||
|
@ -229,11 +229,11 @@ sym(idct_dequant_full_2x_sse2):
|
|||
movdqa xmm5, xmm1
|
||||
paddw xmm2, xmm0 ; a1 = 0+2
|
||||
|
||||
pmulhw xmm5, [x_s1sqr2 GLOBAL]
|
||||
pmulhw xmm5, [GLOBAL(x_s1sqr2)]
|
||||
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
|
||||
pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
|
||||
|
||||
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw xmm7, xmm5 ; c1
|
||||
|
@ -241,16 +241,16 @@ sym(idct_dequant_full_2x_sse2):
|
|||
movdqa xmm5, xmm1
|
||||
movdqa xmm4, xmm3
|
||||
|
||||
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
|
||||
pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw xmm5, xmm1
|
||||
|
||||
pmulhw xmm3, [x_s1sqr2 GLOBAL]
|
||||
pmulhw xmm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw xmm3, xmm4
|
||||
|
||||
paddw xmm3, xmm5 ; d1
|
||||
paddw xmm0, [fours GLOBAL]
|
||||
paddw xmm0, [GLOBAL(fours)]
|
||||
|
||||
paddw xmm2, [fours GLOBAL]
|
||||
paddw xmm2, [GLOBAL(fours)]
|
||||
movdqa xmm6, xmm2 ; a1
|
||||
|
||||
movdqa xmm4, xmm0 ; b1
|
||||
|
@ -394,7 +394,7 @@ sym(idct_dequant_dc_0_2x_sse2):
|
|||
punpckldq xmm4, xmm4
|
||||
|
||||
; Rounding to dequant and downshift
|
||||
paddw xmm4, [fours GLOBAL]
|
||||
paddw xmm4, [GLOBAL(fours)]
|
||||
psraw xmm4, 3
|
||||
|
||||
; Predict buffer needs to be expanded from bytes to words
|
||||
|
@ -505,11 +505,11 @@ sym(idct_dequant_dc_full_2x_sse2):
|
|||
movdqa xmm5, xmm1
|
||||
paddw xmm2, xmm0 ; a1 = 0+2
|
||||
|
||||
pmulhw xmm5, [x_s1sqr2 GLOBAL]
|
||||
pmulhw xmm5, [GLOBAL(x_s1sqr2)]
|
||||
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
|
||||
pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
|
||||
|
||||
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw xmm7, xmm5 ; c1
|
||||
|
@ -517,10 +517,10 @@ sym(idct_dequant_dc_full_2x_sse2):
|
|||
movdqa xmm5, xmm1
|
||||
movdqa xmm4, xmm3
|
||||
|
||||
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
|
||||
pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw xmm5, xmm1
|
||||
|
||||
pmulhw xmm3, [x_s1sqr2 GLOBAL]
|
||||
pmulhw xmm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw xmm3, xmm4
|
||||
|
||||
paddw xmm3, xmm5 ; d1
|
||||
|
@ -574,11 +574,11 @@ sym(idct_dequant_dc_full_2x_sse2):
|
|||
movdqa xmm5, xmm1
|
||||
paddw xmm2, xmm0 ; a1 = 0+2
|
||||
|
||||
pmulhw xmm5, [x_s1sqr2 GLOBAL]
|
||||
pmulhw xmm5, [GLOBAL(x_s1sqr2)]
|
||||
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
|
||||
pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
|
||||
|
||||
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw xmm7, xmm5 ; c1
|
||||
|
@ -586,16 +586,16 @@ sym(idct_dequant_dc_full_2x_sse2):
|
|||
movdqa xmm5, xmm1
|
||||
movdqa xmm4, xmm3
|
||||
|
||||
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
|
||||
pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw xmm5, xmm1
|
||||
|
||||
pmulhw xmm3, [x_s1sqr2 GLOBAL]
|
||||
pmulhw xmm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw xmm3, xmm4
|
||||
|
||||
paddw xmm3, xmm5 ; d1
|
||||
paddw xmm0, [fours GLOBAL]
|
||||
paddw xmm0, [GLOBAL(fours)]
|
||||
|
||||
paddw xmm2, [fours GLOBAL]
|
||||
paddw xmm2, [GLOBAL(fours)]
|
||||
movdqa xmm6, xmm2 ; a1
|
||||
|
||||
movdqa xmm4, xmm0 ; b1
|
||||
|
|
|
@ -111,7 +111,7 @@ next8_h:
|
|||
psubusb mm3, mm2 ; q1-=p1
|
||||
psubusb mm2, mm4 ; p1-=q1
|
||||
por mm2, mm3 ; abs(p1-q1)
|
||||
pand mm2, [tfe GLOBAL] ; set lsb of each byte to zero
|
||||
pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
psrlw mm2, 1 ; abs(p1-q1)/2
|
||||
|
||||
movq mm6, mm5 ; p0
|
||||
|
@ -150,12 +150,12 @@ next8_h:
|
|||
; start work on filters
|
||||
movq mm2, [rsi+2*rax] ; p1
|
||||
movq mm7, [rdi] ; q1
|
||||
pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values
|
||||
pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values
|
||||
pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
|
||||
pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
|
||||
psubsb mm2, mm7 ; p1 - q1
|
||||
pand mm2, mm4 ; high var mask (hvm)(p1 - q1)
|
||||
pxor mm6, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor mm0, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
movq mm3, mm0 ; q0
|
||||
psubsb mm0, mm6 ; q0 - p0
|
||||
paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1)
|
||||
|
@ -163,8 +163,8 @@ next8_h:
|
|||
paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1)
|
||||
pand mm1, mm2 ; mask filter values we don't care about
|
||||
movq mm2, mm1
|
||||
paddsb mm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
|
||||
paddsb mm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
|
||||
paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
|
||||
paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
|
||||
|
||||
pxor mm0, mm0 ;
|
||||
pxor mm5, mm5
|
||||
|
@ -185,29 +185,29 @@ next8_h:
|
|||
movq mm5, mm0 ; save results
|
||||
|
||||
packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
|
||||
paddsw mm5, [ones GLOBAL]
|
||||
paddsw mm1, [ones GLOBAL]
|
||||
paddsw mm5, [GLOBAL(ones)]
|
||||
paddsw mm1, [GLOBAL(ones)]
|
||||
psraw mm5, 1 ; partial shifted one more time for 2nd tap
|
||||
psraw mm1, 1 ; partial shifted one more time for 2nd tap
|
||||
packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
|
||||
pandn mm4, mm5 ; high edge variance additive
|
||||
|
||||
paddsb mm6, mm2 ; p0+= p0 add
|
||||
pxor mm6, [t80 GLOBAL] ; unoffset
|
||||
pxor mm6, [GLOBAL(t80)] ; unoffset
|
||||
movq [rsi+rax], mm6 ; write back
|
||||
|
||||
movq mm6, [rsi+2*rax] ; p1
|
||||
pxor mm6, [t80 GLOBAL] ; reoffset
|
||||
pxor mm6, [GLOBAL(t80)] ; reoffset
|
||||
paddsb mm6, mm4 ; p1+= p1 add
|
||||
pxor mm6, [t80 GLOBAL] ; unoffset
|
||||
pxor mm6, [GLOBAL(t80)] ; unoffset
|
||||
movq [rsi+2*rax], mm6 ; write back
|
||||
|
||||
psubsb mm3, mm0 ; q0-= q0 add
|
||||
pxor mm3, [t80 GLOBAL] ; unoffset
|
||||
pxor mm3, [GLOBAL(t80)] ; unoffset
|
||||
movq [rsi], mm3 ; write back
|
||||
|
||||
psubsb mm7, mm4 ; q1-= q1 add
|
||||
pxor mm7, [t80 GLOBAL] ; unoffset
|
||||
pxor mm7, [GLOBAL(t80)] ; unoffset
|
||||
movq [rdi], mm7 ; write back
|
||||
|
||||
add rsi,8
|
||||
|
@ -403,7 +403,7 @@ next8_v:
|
|||
psubusb mm5, mm1 ; q1-=p1
|
||||
psubusb mm1, mm2 ; p1-=q1
|
||||
por mm5, mm1 ; abs(p1-q1)
|
||||
pand mm5, [tfe GLOBAL] ; set lsb of each byte to zero
|
||||
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
psrlw mm5, 1 ; abs(p1-q1)/2
|
||||
|
||||
mov rdx, arg(2) ;flimit ;
|
||||
|
@ -455,14 +455,14 @@ next8_v:
|
|||
movq mm6, [rdx+8] ; p0
|
||||
movq mm0, [rdx+16] ; q0
|
||||
|
||||
pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values
|
||||
pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values
|
||||
pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
|
||||
pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
|
||||
|
||||
psubsb mm2, mm7 ; p1 - q1
|
||||
pand mm2, mm4 ; high var mask (hvm)(p1 - q1)
|
||||
|
||||
pxor mm6, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor mm0, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
|
||||
movq mm3, mm0 ; q0
|
||||
psubsb mm0, mm6 ; q0 - p0
|
||||
|
@ -474,9 +474,9 @@ next8_v:
|
|||
pand mm1, mm2 ; mask filter values we don't care about
|
||||
|
||||
movq mm2, mm1
|
||||
paddsb mm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
|
||||
paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
|
||||
|
||||
paddsb mm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
|
||||
paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
|
||||
pxor mm0, mm0 ;
|
||||
|
||||
pxor mm5, mm5
|
||||
|
@ -503,9 +503,9 @@ next8_v:
|
|||
movq mm5, mm0 ; save results
|
||||
|
||||
packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
|
||||
paddsw mm5, [ones GLOBAL]
|
||||
paddsw mm5, [GLOBAL(ones)]
|
||||
|
||||
paddsw mm1, [ones GLOBAL]
|
||||
paddsw mm1, [GLOBAL(ones)]
|
||||
psraw mm5, 1 ; partial shifted one more time for 2nd tap
|
||||
|
||||
psraw mm1, 1 ; partial shifted one more time for 2nd tap
|
||||
|
@ -514,22 +514,22 @@ next8_v:
|
|||
pandn mm4, mm5 ; high edge variance additive
|
||||
|
||||
paddsb mm6, mm2 ; p0+= p0 add
|
||||
pxor mm6, [t80 GLOBAL] ; unoffset
|
||||
pxor mm6, [GLOBAL(t80)] ; unoffset
|
||||
|
||||
; mm6=p0 ;
|
||||
movq mm1, [rdx] ; p1
|
||||
pxor mm1, [t80 GLOBAL] ; reoffset
|
||||
pxor mm1, [GLOBAL(t80)] ; reoffset
|
||||
|
||||
paddsb mm1, mm4 ; p1+= p1 add
|
||||
pxor mm1, [t80 GLOBAL] ; unoffset
|
||||
pxor mm1, [GLOBAL(t80)] ; unoffset
|
||||
; mm6 = p0 mm1 = p1
|
||||
|
||||
psubsb mm3, mm0 ; q0-= q0 add
|
||||
pxor mm3, [t80 GLOBAL] ; unoffset
|
||||
pxor mm3, [GLOBAL(t80)] ; unoffset
|
||||
|
||||
; mm3 = q0
|
||||
psubsb mm7, mm4 ; q1-= q1 add
|
||||
pxor mm7, [t80 GLOBAL] ; unoffset
|
||||
pxor mm7, [GLOBAL(t80)] ; unoffset
|
||||
; mm7 = q1
|
||||
|
||||
; tranpose and write back
|
||||
|
@ -708,7 +708,7 @@ next8_mbh:
|
|||
psubusb mm3, mm2 ; q1-=p1
|
||||
psubusb mm2, mm4 ; p1-=q1
|
||||
por mm2, mm3 ; abs(p1-q1)
|
||||
pand mm2, [tfe GLOBAL] ; set lsb of each byte to zero
|
||||
pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
psrlw mm2, 1 ; abs(p1-q1)/2
|
||||
|
||||
movq mm6, mm5 ; p0
|
||||
|
@ -753,12 +753,12 @@ next8_mbh:
|
|||
; start work on filters
|
||||
movq mm2, [rsi+2*rax] ; p1
|
||||
movq mm7, [rdi] ; q1
|
||||
pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values
|
||||
pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values
|
||||
pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
|
||||
pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
|
||||
psubsb mm2, mm7 ; p1 - q1
|
||||
|
||||
pxor mm6, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor mm0, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
movq mm3, mm0 ; q0
|
||||
psubsb mm0, mm6 ; q0 - p0
|
||||
paddsb mm2, mm0 ; 1 * (q0 - p0) + (p1 - q1)
|
||||
|
@ -772,7 +772,7 @@ next8_mbh:
|
|||
pand mm2, mm4; ; Filter2 = vp8_filter & hev
|
||||
|
||||
movq mm5, mm2 ;
|
||||
paddsb mm5, [t3 GLOBAL];
|
||||
paddsb mm5, [GLOBAL(t3)];
|
||||
|
||||
pxor mm0, mm0 ; 0
|
||||
pxor mm7, mm7 ; 0
|
||||
|
@ -785,7 +785,7 @@ next8_mbh:
|
|||
|
||||
movq mm5, mm0 ; Filter2
|
||||
|
||||
paddsb mm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4)
|
||||
paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4)
|
||||
pxor mm0, mm0 ; 0
|
||||
pxor mm7, mm7 ; 0
|
||||
|
||||
|
@ -818,10 +818,10 @@ next8_mbh:
|
|||
pxor mm2, mm2
|
||||
punpcklbw mm1, mm4
|
||||
punpckhbw mm2, mm4
|
||||
pmulhw mm1, [s27 GLOBAL]
|
||||
pmulhw mm2, [s27 GLOBAL]
|
||||
paddw mm1, [s63 GLOBAL]
|
||||
paddw mm2, [s63 GLOBAL]
|
||||
pmulhw mm1, [GLOBAL(s27)]
|
||||
pmulhw mm2, [GLOBAL(s27)]
|
||||
paddw mm1, [GLOBAL(s63)]
|
||||
paddw mm2, [GLOBAL(s63)]
|
||||
psraw mm1, 7
|
||||
psraw mm2, 7
|
||||
packsswb mm1, mm2
|
||||
|
@ -829,8 +829,8 @@ next8_mbh:
|
|||
psubsb mm3, mm1
|
||||
paddsb mm6, mm1
|
||||
|
||||
pxor mm3, [t80 GLOBAL]
|
||||
pxor mm6, [t80 GLOBAL]
|
||||
pxor mm3, [GLOBAL(t80)]
|
||||
pxor mm6, [GLOBAL(t80)]
|
||||
movq [rsi+rax], mm6
|
||||
movq [rsi], mm3
|
||||
|
||||
|
@ -844,10 +844,10 @@ next8_mbh:
|
|||
pxor mm2, mm2
|
||||
punpcklbw mm1, mm4
|
||||
punpckhbw mm2, mm4
|
||||
pmulhw mm1, [s18 GLOBAL]
|
||||
pmulhw mm2, [s18 GLOBAL]
|
||||
paddw mm1, [s63 GLOBAL]
|
||||
paddw mm2, [s63 GLOBAL]
|
||||
pmulhw mm1, [GLOBAL(s18)]
|
||||
pmulhw mm2, [GLOBAL(s18)]
|
||||
paddw mm1, [GLOBAL(s63)]
|
||||
paddw mm2, [GLOBAL(s63)]
|
||||
psraw mm1, 7
|
||||
psraw mm2, 7
|
||||
packsswb mm1, mm2
|
||||
|
@ -855,14 +855,14 @@ next8_mbh:
|
|||
movq mm3, [rdi]
|
||||
movq mm6, [rsi+rax*2] ; p1
|
||||
|
||||
pxor mm3, [t80 GLOBAL]
|
||||
pxor mm6, [t80 GLOBAL]
|
||||
pxor mm3, [GLOBAL(t80)]
|
||||
pxor mm6, [GLOBAL(t80)]
|
||||
|
||||
paddsb mm6, mm1
|
||||
psubsb mm3, mm1
|
||||
|
||||
pxor mm6, [t80 GLOBAL]
|
||||
pxor mm3, [t80 GLOBAL]
|
||||
pxor mm6, [GLOBAL(t80)]
|
||||
pxor mm3, [GLOBAL(t80)]
|
||||
movq [rdi], mm3
|
||||
movq [rsi+rax*2], mm6
|
||||
|
||||
|
@ -876,10 +876,10 @@ next8_mbh:
|
|||
pxor mm2, mm2
|
||||
punpcklbw mm1, mm4
|
||||
punpckhbw mm2, mm4
|
||||
pmulhw mm1, [s9 GLOBAL]
|
||||
pmulhw mm2, [s9 GLOBAL]
|
||||
paddw mm1, [s63 GLOBAL]
|
||||
paddw mm2, [s63 GLOBAL]
|
||||
pmulhw mm1, [GLOBAL(s9)]
|
||||
pmulhw mm2, [GLOBAL(s9)]
|
||||
paddw mm1, [GLOBAL(s63)]
|
||||
paddw mm2, [GLOBAL(s63)]
|
||||
psraw mm1, 7
|
||||
psraw mm2, 7
|
||||
packsswb mm1, mm2
|
||||
|
@ -889,14 +889,14 @@ next8_mbh:
|
|||
neg rax
|
||||
movq mm3, [rdi+rax ]
|
||||
|
||||
pxor mm6, [t80 GLOBAL]
|
||||
pxor mm3, [t80 GLOBAL]
|
||||
pxor mm6, [GLOBAL(t80)]
|
||||
pxor mm3, [GLOBAL(t80)]
|
||||
|
||||
paddsb mm6, mm1
|
||||
psubsb mm3, mm1
|
||||
|
||||
pxor mm6, [t80 GLOBAL]
|
||||
pxor mm3, [t80 GLOBAL]
|
||||
pxor mm6, [GLOBAL(t80)]
|
||||
pxor mm3, [GLOBAL(t80)]
|
||||
movq [rdi+rax ], mm3
|
||||
neg rax
|
||||
movq [rdi+rax*4], mm6
|
||||
|
@ -1105,7 +1105,7 @@ next8_mbv:
|
|||
psubusb mm5, mm1 ; q1-=p1
|
||||
psubusb mm1, mm2 ; p1-=q1
|
||||
por mm5, mm1 ; abs(p1-q1)
|
||||
pand mm5, [tfe GLOBAL] ; set lsb of each byte to zero
|
||||
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
psrlw mm5, 1 ; abs(p1-q1)/2
|
||||
|
||||
mov rdx, arg(2) ;flimit ;
|
||||
|
@ -1155,14 +1155,14 @@ next8_mbv:
|
|||
; start work on filters
|
||||
movq mm2, [rdx+16] ; p1
|
||||
movq mm7, [rdx+40] ; q1
|
||||
pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values
|
||||
pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values
|
||||
pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
|
||||
pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
|
||||
psubsb mm2, mm7 ; p1 - q1
|
||||
|
||||
movq mm6, [rdx+24] ; p0
|
||||
movq mm0, [rdx+32] ; q0
|
||||
pxor mm6, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor mm0, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
|
||||
movq mm3, mm0 ; q0
|
||||
psubsb mm0, mm6 ; q0 - p0
|
||||
|
@ -1176,7 +1176,7 @@ next8_mbv:
|
|||
pand mm2, mm4; ; Filter2 = vp8_filter & hev
|
||||
|
||||
movq mm5, mm2 ;
|
||||
paddsb mm5, [t3 GLOBAL];
|
||||
paddsb mm5, [GLOBAL(t3)];
|
||||
|
||||
pxor mm0, mm0 ; 0
|
||||
pxor mm7, mm7 ; 0
|
||||
|
@ -1189,7 +1189,7 @@ next8_mbv:
|
|||
|
||||
movq mm5, mm0 ; Filter2
|
||||
|
||||
paddsb mm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4)
|
||||
paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4)
|
||||
pxor mm0, mm0 ; 0
|
||||
pxor mm7, mm7 ; 0
|
||||
|
||||
|
@ -1222,10 +1222,10 @@ next8_mbv:
|
|||
pxor mm2, mm2
|
||||
punpcklbw mm1, mm4
|
||||
punpckhbw mm2, mm4
|
||||
pmulhw mm1, [s27 GLOBAL]
|
||||
pmulhw mm2, [s27 GLOBAL]
|
||||
paddw mm1, [s63 GLOBAL]
|
||||
paddw mm2, [s63 GLOBAL]
|
||||
pmulhw mm1, [GLOBAL(s27)]
|
||||
pmulhw mm2, [GLOBAL(s27)]
|
||||
paddw mm1, [GLOBAL(s63)]
|
||||
paddw mm2, [GLOBAL(s63)]
|
||||
psraw mm1, 7
|
||||
psraw mm2, 7
|
||||
packsswb mm1, mm2
|
||||
|
@ -1233,8 +1233,8 @@ next8_mbv:
|
|||
psubsb mm3, mm1
|
||||
paddsb mm6, mm1
|
||||
|
||||
pxor mm3, [t80 GLOBAL]
|
||||
pxor mm6, [t80 GLOBAL]
|
||||
pxor mm3, [GLOBAL(t80)]
|
||||
pxor mm6, [GLOBAL(t80)]
|
||||
movq [rdx+24], mm6
|
||||
movq [rdx+32], mm3
|
||||
|
||||
|
@ -1248,24 +1248,24 @@ next8_mbv:
|
|||
pxor mm2, mm2
|
||||
punpcklbw mm1, mm4
|
||||
punpckhbw mm2, mm4
|
||||
pmulhw mm1, [s18 GLOBAL]
|
||||
pmulhw mm2, [s18 GLOBAL]
|
||||
paddw mm1, [s63 GLOBAL]
|
||||
paddw mm2, [s63 GLOBAL]
|
||||
pmulhw mm1, [GLOBAL(s18)]
|
||||
pmulhw mm2, [GLOBAL(s18)]
|
||||
paddw mm1, [GLOBAL(s63)]
|
||||
paddw mm2, [GLOBAL(s63)]
|
||||
psraw mm1, 7
|
||||
psraw mm2, 7
|
||||
packsswb mm1, mm2
|
||||
|
||||
movq mm3, [rdx + 40]
|
||||
movq mm6, [rdx + 16] ; p1
|
||||
pxor mm3, [t80 GLOBAL]
|
||||
pxor mm6, [t80 GLOBAL]
|
||||
pxor mm3, [GLOBAL(t80)]
|
||||
pxor mm6, [GLOBAL(t80)]
|
||||
|
||||
paddsb mm6, mm1
|
||||
psubsb mm3, mm1
|
||||
|
||||
pxor mm6, [t80 GLOBAL]
|
||||
pxor mm3, [t80 GLOBAL]
|
||||
pxor mm6, [GLOBAL(t80)]
|
||||
pxor mm3, [GLOBAL(t80)]
|
||||
movq [rdx + 40], mm3
|
||||
movq [rdx + 16], mm6
|
||||
|
||||
|
@ -1279,10 +1279,10 @@ next8_mbv:
|
|||
pxor mm2, mm2
|
||||
punpcklbw mm1, mm4
|
||||
punpckhbw mm2, mm4
|
||||
pmulhw mm1, [s9 GLOBAL]
|
||||
pmulhw mm2, [s9 GLOBAL]
|
||||
paddw mm1, [s63 GLOBAL]
|
||||
paddw mm2, [s63 GLOBAL]
|
||||
pmulhw mm1, [GLOBAL(s9)]
|
||||
pmulhw mm2, [GLOBAL(s9)]
|
||||
paddw mm1, [GLOBAL(s63)]
|
||||
paddw mm2, [GLOBAL(s63)]
|
||||
psraw mm1, 7
|
||||
psraw mm2, 7
|
||||
packsswb mm1, mm2
|
||||
|
@ -1290,14 +1290,14 @@ next8_mbv:
|
|||
movq mm6, [rdx+ 8]
|
||||
movq mm3, [rdx+48]
|
||||
|
||||
pxor mm6, [t80 GLOBAL]
|
||||
pxor mm3, [t80 GLOBAL]
|
||||
pxor mm6, [GLOBAL(t80)]
|
||||
pxor mm3, [GLOBAL(t80)]
|
||||
|
||||
paddsb mm6, mm1
|
||||
psubsb mm3, mm1
|
||||
|
||||
pxor mm6, [t80 GLOBAL] ; mm6 = 71 61 51 41 31 21 11 01
|
||||
pxor mm3, [t80 GLOBAL] ; mm3 = 76 66 56 46 36 26 15 06
|
||||
pxor mm6, [GLOBAL(t80)] ; mm6 = 71 61 51 41 31 21 11 01
|
||||
pxor mm3, [GLOBAL(t80)] ; mm3 = 76 66 56 46 36 26 15 06
|
||||
|
||||
; tranpose and write back
|
||||
movq mm0, [rdx] ; mm0 = 70 60 50 40 30 20 10 00
|
||||
|
@ -1432,7 +1432,7 @@ nexts8_h:
|
|||
psubusb mm0, mm1 ; q1-=p1
|
||||
psubusb mm1, mm4 ; p1-=q1
|
||||
por mm1, mm0 ; abs(p1-q1)
|
||||
pand mm1, [tfe GLOBAL] ; set lsb of each byte to zero
|
||||
pand mm1, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
psrlw mm1, 1 ; abs(p1-q1)/2
|
||||
|
||||
movq mm5, [rsi+rax] ; p0
|
||||
|
@ -1450,12 +1450,12 @@ nexts8_h:
|
|||
pcmpeqb mm5, mm3
|
||||
|
||||
; start work on filters
|
||||
pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values
|
||||
pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values
|
||||
pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
|
||||
pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
|
||||
psubsb mm2, mm7 ; p1 - q1
|
||||
|
||||
pxor mm6, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor mm0, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
movq mm3, mm0 ; q0
|
||||
psubsb mm0, mm6 ; q0 - p0
|
||||
paddsb mm2, mm0 ; p1 - q1 + 1 * (q0 - p0)
|
||||
|
@ -1464,7 +1464,7 @@ nexts8_h:
|
|||
pand mm5, mm2 ; mask filter values we don't care about
|
||||
|
||||
; do + 4 side
|
||||
paddsb mm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4
|
||||
paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
|
||||
|
||||
movq mm0, mm5 ; get a copy of filters
|
||||
psllw mm0, 8 ; shift left 8
|
||||
|
@ -1477,12 +1477,12 @@ nexts8_h:
|
|||
por mm0, mm1 ; put the two together to get result
|
||||
|
||||
psubsb mm3, mm0 ; q0-= q0 add
|
||||
pxor mm3, [t80 GLOBAL] ; unoffset
|
||||
pxor mm3, [GLOBAL(t80)] ; unoffset
|
||||
movq [rsi], mm3 ; write back
|
||||
|
||||
|
||||
; now do +3 side
|
||||
psubsb mm5, [t1s GLOBAL] ; +3 instead of +4
|
||||
psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4
|
||||
|
||||
movq mm0, mm5 ; get a copy of filters
|
||||
psllw mm0, 8 ; shift left 8
|
||||
|
@ -1494,7 +1494,7 @@ nexts8_h:
|
|||
|
||||
|
||||
paddsb mm6, mm0 ; p0+= p0 add
|
||||
pxor mm6, [t80 GLOBAL] ; unoffset
|
||||
pxor mm6, [GLOBAL(t80)] ; unoffset
|
||||
movq [rsi+rax], mm6 ; write back
|
||||
|
||||
add rsi,8
|
||||
|
@ -1589,7 +1589,7 @@ nexts8_v:
|
|||
psubusb mm7, mm6 ; q1-=p1
|
||||
psubusb mm6, mm3 ; p1-=q1
|
||||
por mm6, mm7 ; abs(p1-q1)
|
||||
pand mm6, [tfe GLOBAL] ; set lsb of each byte to zero
|
||||
pand mm6, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
psrlw mm6, 1 ; abs(p1-q1)/2
|
||||
|
||||
movq mm5, mm1 ; p0
|
||||
|
@ -1617,16 +1617,16 @@ nexts8_v:
|
|||
movq t0, mm0
|
||||
movq t1, mm3
|
||||
|
||||
pxor mm0, [t80 GLOBAL] ; p1 offset to convert to signed values
|
||||
pxor mm3, [t80 GLOBAL] ; q1 offset to convert to signed values
|
||||
pxor mm0, [GLOBAL(t80)] ; p1 offset to convert to signed values
|
||||
pxor mm3, [GLOBAL(t80)] ; q1 offset to convert to signed values
|
||||
|
||||
psubsb mm0, mm3 ; p1 - q1
|
||||
movq mm6, mm1 ; p0
|
||||
|
||||
movq mm7, mm2 ; q0
|
||||
pxor mm6, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
|
||||
pxor mm7, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor mm7, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
movq mm3, mm7 ; offseted ; q0
|
||||
|
||||
psubsb mm7, mm6 ; q0 - p0
|
||||
|
@ -1637,7 +1637,7 @@ nexts8_v:
|
|||
|
||||
pand mm5, mm0 ; mask filter values we don't care about
|
||||
|
||||
paddsb mm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4
|
||||
paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
|
||||
|
||||
movq mm0, mm5 ; get a copy of filters
|
||||
psllw mm0, 8 ; shift left 8
|
||||
|
@ -1651,10 +1651,10 @@ nexts8_v:
|
|||
por mm0, mm7 ; put the two together to get result
|
||||
|
||||
psubsb mm3, mm0 ; q0-= q0sz add
|
||||
pxor mm3, [t80 GLOBAL] ; unoffset
|
||||
pxor mm3, [GLOBAL(t80)] ; unoffset
|
||||
|
||||
; now do +3 side
|
||||
psubsb mm5, [t1s GLOBAL] ; +3 instead of +4
|
||||
psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4
|
||||
|
||||
movq mm0, mm5 ; get a copy of filters
|
||||
psllw mm0, 8 ; shift left 8
|
||||
|
@ -1666,7 +1666,7 @@ nexts8_v:
|
|||
por mm0, mm5 ; put the two together to get result
|
||||
|
||||
paddsb mm6, mm0 ; p0+= p0 add
|
||||
pxor mm6, [t80 GLOBAL] ; unoffset
|
||||
pxor mm6, [GLOBAL(t80)] ; unoffset
|
||||
|
||||
|
||||
movq mm0, t0
|
||||
|
|
|
@ -126,7 +126,7 @@
|
|||
movdqa xmm4, XMMWORD PTR [rdx] ; flimit
|
||||
|
||||
movdqa xmm3, xmm0 ; q0
|
||||
pand xmm2, [tfe GLOBAL] ; set lsb of each byte to zero
|
||||
pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
|
||||
mov rdx, arg(4) ; hev get thresh
|
||||
|
||||
|
@ -182,14 +182,14 @@
|
|||
movdqa xmm0, [rdx+32] ; q0
|
||||
%endif
|
||||
|
||||
pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values
|
||||
pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values
|
||||
pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
|
||||
pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
|
||||
|
||||
psubsb xmm2, xmm7 ; p1 - q1
|
||||
pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
|
||||
pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1)
|
||||
pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
|
||||
movdqa xmm3, xmm0 ; q0
|
||||
psubsb xmm0, xmm6 ; q0 - p0
|
||||
|
@ -204,8 +204,8 @@
|
|||
|
||||
movdqa xmm2, xmm1
|
||||
|
||||
paddsb xmm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
|
||||
paddsb xmm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
|
||||
paddsb xmm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
|
||||
paddsb xmm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
|
||||
|
||||
punpckhbw xmm5, xmm2 ; axbxcxdx
|
||||
punpcklbw xmm2, xmm2 ; exfxgxhx
|
||||
|
@ -223,9 +223,9 @@
|
|||
movdqa xmm5, xmm0 ; save results
|
||||
|
||||
packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
|
||||
paddsw xmm5, [ones GLOBAL]
|
||||
paddsw xmm5, [GLOBAL(ones)]
|
||||
|
||||
paddsw xmm1, [ones GLOBAL]
|
||||
paddsw xmm1, [GLOBAL(ones)]
|
||||
psraw xmm5, 1 ; partial shifted one more time for 2nd tap
|
||||
|
||||
psraw xmm1, 1 ; partial shifted one more time for 2nd tap
|
||||
|
@ -241,18 +241,18 @@
|
|||
movdqa xmm1, [rdx] ; p1
|
||||
%endif
|
||||
pandn xmm4, xmm5 ; high edge variance additive
|
||||
pxor xmm6, [t80 GLOBAL] ; unoffset
|
||||
pxor xmm6, [GLOBAL(t80)] ; unoffset
|
||||
|
||||
pxor xmm1, [t80 GLOBAL] ; reoffset
|
||||
pxor xmm1, [GLOBAL(t80)] ; reoffset
|
||||
psubsb xmm3, xmm0 ; q0-= q0 add
|
||||
|
||||
paddsb xmm1, xmm4 ; p1+= p1 add
|
||||
pxor xmm3, [t80 GLOBAL] ; unoffset
|
||||
pxor xmm3, [GLOBAL(t80)] ; unoffset
|
||||
|
||||
pxor xmm1, [t80 GLOBAL] ; unoffset
|
||||
pxor xmm1, [GLOBAL(t80)] ; unoffset
|
||||
psubsb xmm7, xmm4 ; q1-= q1 add
|
||||
|
||||
pxor xmm7, [t80 GLOBAL] ; unoffset
|
||||
pxor xmm7, [GLOBAL(t80)] ; unoffset
|
||||
%if %1 == 0
|
||||
lea rsi, [rsi + rcx*2]
|
||||
lea rdi, [rdi + rcx*2]
|
||||
|
@ -401,10 +401,10 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
|
|||
movdqa xmm0, [rdx+64] ; q0
|
||||
%endif
|
||||
|
||||
pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values
|
||||
pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values
|
||||
pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
|
||||
pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
|
||||
pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
|
||||
psubsb xmm2, xmm7 ; p1 - q1
|
||||
movdqa xmm3, xmm0 ; q0
|
||||
|
@ -431,14 +431,14 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
|
|||
movdqa xmm5, xmm2
|
||||
|
||||
punpckhbw xmm1, xmm4 ; Filter 2 (lo)
|
||||
paddsb xmm5, [t3 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 3)
|
||||
paddsb xmm5, [GLOBAL(t3)] ; vp8_signed_char_clamp(Filter2 + 3)
|
||||
|
||||
pmulhw xmm1, [s9 GLOBAL] ; Filter 2 (lo) * 9
|
||||
pmulhw xmm1, [GLOBAL(s9)] ; Filter 2 (lo) * 9
|
||||
|
||||
pmulhw xmm0, [s9 GLOBAL] ; Filter 2 (hi) * 9
|
||||
pmulhw xmm0, [GLOBAL(s9)] ; Filter 2 (hi) * 9
|
||||
|
||||
punpckhbw xmm7, xmm5 ; axbxcxdx
|
||||
paddsb xmm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4)
|
||||
paddsb xmm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4)
|
||||
|
||||
punpcklbw xmm5, xmm5 ; exfxgxhx
|
||||
psraw xmm7, 11 ; sign extended shift right by 3
|
||||
|
@ -462,9 +462,9 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
|
|||
movdqa xmm5, xmm0
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
paddw xmm0, [s63 GLOBAL] ; Filter 2 (hi) * 9 + 63
|
||||
paddw xmm0, [GLOBAL(s63)] ; Filter 2 (hi) * 9 + 63
|
||||
|
||||
paddw xmm1, [s63 GLOBAL] ; Filter 2 (lo) * 9 + 63
|
||||
paddw xmm1, [GLOBAL(s63)] ; Filter 2 (lo) * 9 + 63
|
||||
paddw xmm5, xmm5 ; Filter 2 (hi) * 18
|
||||
|
||||
paddw xmm7, xmm7 ; Filter 2 (lo) * 18
|
||||
|
@ -510,26 +510,26 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
|
|||
movdqa xmm7, XMMWORD PTR [rdx+16] ; p2
|
||||
%endif
|
||||
|
||||
pxor xmm3, [t80 GLOBAL] ; *oq0 = sq^0x80
|
||||
pxor xmm6, [t80 GLOBAL] ; *oq0 = sp^0x80
|
||||
pxor xmm3, [GLOBAL(t80)] ; *oq0 = sq^0x80
|
||||
pxor xmm6, [GLOBAL(t80)] ; *oq0 = sp^0x80
|
||||
|
||||
pxor xmm1, [t80 GLOBAL]
|
||||
pxor xmm4, [t80 GLOBAL]
|
||||
pxor xmm1, [GLOBAL(t80)]
|
||||
pxor xmm4, [GLOBAL(t80)]
|
||||
|
||||
psubsb xmm1, xmm2 ; sq = vp8_signed_char_clamp(qs1 - u2)
|
||||
paddsb xmm4, xmm2 ; sp = vp8_signed_char_clamp(ps1 - u2)
|
||||
|
||||
pxor xmm1, [t80 GLOBAL] ; *oq1 = sq^0x80;
|
||||
pxor xmm4, [t80 GLOBAL] ; *op1 = sp^0x80;
|
||||
pxor xmm1, [GLOBAL(t80)] ; *oq1 = sq^0x80;
|
||||
pxor xmm4, [GLOBAL(t80)] ; *op1 = sp^0x80;
|
||||
|
||||
pxor xmm7, [t80 GLOBAL]
|
||||
pxor xmm5, [t80 GLOBAL]
|
||||
pxor xmm7, [GLOBAL(t80)]
|
||||
pxor xmm5, [GLOBAL(t80)]
|
||||
|
||||
paddsb xmm7, xmm0 ; sp = vp8_signed_char_clamp(ps2 - u)
|
||||
psubsb xmm5, xmm0 ; sq = vp8_signed_char_clamp(qs2 - u)
|
||||
|
||||
pxor xmm7, [t80 GLOBAL] ; *op2 = sp^0x80;
|
||||
pxor xmm5, [t80 GLOBAL] ; *oq2 = sq^0x80;
|
||||
pxor xmm7, [GLOBAL(t80)] ; *op2 = sp^0x80;
|
||||
pxor xmm5, [GLOBAL(t80)] ; *oq2 = sq^0x80;
|
||||
|
||||
%if %1 == 0
|
||||
lea rsi, [rsi+rcx*2]
|
||||
|
@ -915,7 +915,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
|||
por xmm5, xmm1 ; abs(p1-q1)
|
||||
movdqa xmm1, xmm3 ; p0
|
||||
|
||||
pand xmm5, [tfe GLOBAL] ; set lsb of each byte to zero
|
||||
pand xmm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
psubusb xmm1, xmm6 ; p0-q0
|
||||
|
||||
psrlw xmm5, 1 ; abs(p1-q1)/2
|
||||
|
@ -1415,7 +1415,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
|||
psubusb xmm0, xmm1 ; q1-=p1
|
||||
psubusb xmm1, xmm4 ; p1-=q1
|
||||
por xmm1, xmm0 ; abs(p1-q1)
|
||||
pand xmm1, [tfe GLOBAL] ; set lsb of each byte to zero
|
||||
pand xmm1, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
psrlw xmm1, 1 ; abs(p1-q1)/2
|
||||
|
||||
movdqu xmm5, [rsi+rax] ; p0
|
||||
|
@ -1433,12 +1433,12 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
|||
pcmpeqb xmm5, xmm3
|
||||
|
||||
; start work on filters
|
||||
pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values
|
||||
pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values
|
||||
pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
|
||||
pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
|
||||
psubsb xmm2, xmm7 ; p1 - q1
|
||||
|
||||
pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
movdqa xmm3, xmm0 ; q0
|
||||
psubsb xmm0, xmm6 ; q0 - p0
|
||||
paddsb xmm2, xmm0 ; p1 - q1 + 1 * (q0 - p0)
|
||||
|
@ -1447,7 +1447,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
|||
pand xmm5, xmm2 ; mask filter values we don't care about
|
||||
|
||||
; do + 4 side
|
||||
paddsb xmm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4
|
||||
paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
|
||||
|
||||
movdqa xmm0, xmm5 ; get a copy of filters
|
||||
psllw xmm0, 8 ; shift left 8
|
||||
|
@ -1460,11 +1460,11 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
|||
por xmm0, xmm1 ; put the two together to get result
|
||||
|
||||
psubsb xmm3, xmm0 ; q0-= q0 add
|
||||
pxor xmm3, [t80 GLOBAL] ; unoffset
|
||||
pxor xmm3, [GLOBAL(t80)] ; unoffset
|
||||
movdqu [rsi], xmm3 ; write back
|
||||
|
||||
; now do +3 side
|
||||
psubsb xmm5, [t1s GLOBAL] ; +3 instead of +4
|
||||
psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4
|
||||
|
||||
movdqa xmm0, xmm5 ; get a copy of filters
|
||||
psllw xmm0, 8 ; shift left 8
|
||||
|
@ -1476,7 +1476,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
|||
|
||||
|
||||
paddsb xmm6, xmm0 ; p0+= p0 add
|
||||
pxor xmm6, [t80 GLOBAL] ; unoffset
|
||||
pxor xmm6, [GLOBAL(t80)] ; unoffset
|
||||
movdqu [rsi+rax], xmm6 ; write back
|
||||
|
||||
; begin epilog
|
||||
|
@ -1596,7 +1596,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
|||
psubusb xmm7, xmm0 ; q1-=p1
|
||||
psubusb xmm6, xmm3 ; p1-=q1
|
||||
por xmm6, xmm7 ; abs(p1-q1)
|
||||
pand xmm6, [tfe GLOBAL] ; set lsb of each byte to zero
|
||||
pand xmm6, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
psrlw xmm6, 1 ; abs(p1-q1)/2
|
||||
|
||||
movdqa xmm5, xmm1 ; p0
|
||||
|
@ -1622,16 +1622,16 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
|||
movdqa t0, xmm0
|
||||
movdqa t1, xmm3
|
||||
|
||||
pxor xmm0, [t80 GLOBAL] ; p1 offset to convert to signed values
|
||||
pxor xmm3, [t80 GLOBAL] ; q1 offset to convert to signed values
|
||||
pxor xmm0, [GLOBAL(t80)] ; p1 offset to convert to signed values
|
||||
pxor xmm3, [GLOBAL(t80)] ; q1 offset to convert to signed values
|
||||
|
||||
psubsb xmm0, xmm3 ; p1 - q1
|
||||
movdqa xmm6, xmm1 ; p0
|
||||
|
||||
movdqa xmm7, xmm2 ; q0
|
||||
pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
|
||||
pxor xmm7, [t80 GLOBAL] ; offset to convert to signed values
|
||||
pxor xmm7, [GLOBAL(t80)] ; offset to convert to signed values
|
||||
movdqa xmm3, xmm7 ; offseted ; q0
|
||||
|
||||
psubsb xmm7, xmm6 ; q0 - p0
|
||||
|
@ -1643,7 +1643,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
|||
pand xmm5, xmm0 ; mask filter values we don't care about
|
||||
|
||||
|
||||
paddsb xmm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4
|
||||
paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
|
||||
|
||||
movdqa xmm0, xmm5 ; get a copy of filters
|
||||
psllw xmm0, 8 ; shift left 8
|
||||
|
@ -1658,10 +1658,10 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
|||
por xmm0, xmm7 ; put the two together to get result
|
||||
|
||||
psubsb xmm3, xmm0 ; q0-= q0sz add
|
||||
pxor xmm3, [t80 GLOBAL] ; unoffset q0
|
||||
pxor xmm3, [GLOBAL(t80)] ; unoffset q0
|
||||
|
||||
; now do +3 side
|
||||
psubsb xmm5, [t1s GLOBAL] ; +3 instead of +4
|
||||
psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4
|
||||
movdqa xmm0, xmm5 ; get a copy of filters
|
||||
|
||||
psllw xmm0, 8 ; shift left 8
|
||||
|
@ -1674,7 +1674,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
|||
por xmm0, xmm5 ; put the two together to get result
|
||||
|
||||
paddsb xmm6, xmm0 ; p0+= p0 add
|
||||
pxor xmm6, [t80 GLOBAL] ; unoffset p0
|
||||
pxor xmm6, [GLOBAL(t80)] ; unoffset p0
|
||||
|
||||
movdqa xmm0, t0 ; p1
|
||||
movdqa xmm4, t1 ; q1
|
||||
|
|
|
@ -37,16 +37,16 @@ sym(vp8_post_proc_down_and_across_mmx):
|
|||
%if ABI_IS_32BIT=1 && CONFIG_PIC=1
|
||||
; move the global rd onto the stack, since we don't have enough registers
|
||||
; to do PIC addressing
|
||||
movq mm0, [rd GLOBAL]
|
||||
movq mm0, [GLOBAL(rd)]
|
||||
sub rsp, 8
|
||||
movq [rsp], mm0
|
||||
%define RD [rsp]
|
||||
%else
|
||||
%define RD [rd GLOBAL]
|
||||
%define RD [GLOBAL(rd)]
|
||||
%endif
|
||||
|
||||
push rbx
|
||||
lea rbx, [Blur GLOBAL]
|
||||
lea rbx, [GLOBAL(Blur)]
|
||||
movd mm2, dword ptr arg(6) ;flimit
|
||||
punpcklwd mm2, mm2
|
||||
punpckldq mm2, mm2
|
||||
|
@ -286,7 +286,7 @@ sym(vp8_mbpost_proc_down_mmx):
|
|||
%define flimit2 [rsp+128]
|
||||
|
||||
%if ABI_IS_32BIT=0
|
||||
lea r8, [sym(vp8_rv) GLOBAL]
|
||||
lea r8, [GLOBAL(sym(vp8_rv))]
|
||||
%endif
|
||||
|
||||
;rows +=8;
|
||||
|
@ -404,7 +404,7 @@ loop_row:
|
|||
and rcx, 127
|
||||
%if ABI_IS_32BIT=1 && CONFIG_PIC=1
|
||||
push rax
|
||||
lea rax, [sym(vp8_rv) GLOBAL]
|
||||
lea rax, [GLOBAL(sym(vp8_rv))]
|
||||
movq mm4, [rax + rcx*2] ;vp8_rv[rcx*2]
|
||||
pop rax
|
||||
%elif ABI_IS_32BIT=0
|
||||
|
|
|
@ -36,12 +36,12 @@ sym(vp8_post_proc_down_and_across_xmm):
|
|||
ALIGN_STACK 16, rax
|
||||
; move the global rd onto the stack, since we don't have enough registers
|
||||
; to do PIC addressing
|
||||
movdqa xmm0, [rd42 GLOBAL]
|
||||
movdqa xmm0, [GLOBAL(rd42)]
|
||||
sub rsp, 16
|
||||
movdqa [rsp], xmm0
|
||||
%define RD42 [rsp]
|
||||
%else
|
||||
%define RD42 [rd42 GLOBAL]
|
||||
%define RD42 [GLOBAL(rd42)]
|
||||
%endif
|
||||
|
||||
|
||||
|
@ -275,7 +275,7 @@ sym(vp8_mbpost_proc_down_xmm):
|
|||
%define flimit4 [rsp+128]
|
||||
|
||||
%if ABI_IS_32BIT=0
|
||||
lea r8, [sym(vp8_rv) GLOBAL]
|
||||
lea r8, [GLOBAL(sym(vp8_rv))]
|
||||
%endif
|
||||
|
||||
;rows +=8;
|
||||
|
@ -393,7 +393,7 @@ loop_row:
|
|||
and rcx, 127
|
||||
%if ABI_IS_32BIT=1 && CONFIG_PIC=1
|
||||
push rax
|
||||
lea rax, [sym(vp8_rv) GLOBAL]
|
||||
lea rax, [GLOBAL(sym(vp8_rv))]
|
||||
movdqu xmm4, [rax + rcx*2] ;vp8_rv[rcx*2]
|
||||
pop rax
|
||||
%elif ABI_IS_32BIT=0
|
||||
|
@ -579,7 +579,7 @@ nextcol4:
|
|||
punpcklwd xmm1, xmm0
|
||||
|
||||
paddd xmm1, xmm6
|
||||
paddd xmm1, [four8s GLOBAL]
|
||||
paddd xmm1, [GLOBAL(four8s)]
|
||||
|
||||
psrad xmm1, 4
|
||||
packssdw xmm1, xmm0
|
||||
|
|
|
@ -84,7 +84,7 @@ nextrow:
|
|||
pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers
|
||||
paddsw mm3, mm5 ; mm3 += mm5
|
||||
|
||||
paddsw mm3, [rd GLOBAL] ; mm3 += round value
|
||||
paddsw mm3, [GLOBAL(rd)] ; mm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128
|
||||
packuswb mm3, mm0 ; pack and unpack to saturate
|
||||
punpcklbw mm3, mm0 ;
|
||||
|
@ -136,7 +136,7 @@ sym(vp8_filter_block1d_v6_mmx):
|
|||
push rdi
|
||||
; end prolog
|
||||
|
||||
movq mm5, [rd GLOBAL]
|
||||
movq mm5, [GLOBAL(rd)]
|
||||
push rbx
|
||||
mov rbx, arg(6) ;vp8_filter
|
||||
movq mm1, [rbx + 16] ; do both the negative taps first!!!
|
||||
|
@ -225,7 +225,7 @@ sym(vp8_filter_block1dc_v6_mmx):
|
|||
push rdi
|
||||
; end prolog
|
||||
|
||||
movq mm5, [rd GLOBAL]
|
||||
movq mm5, [GLOBAL(rd)]
|
||||
push rbx
|
||||
mov rbx, arg(7) ;vp8_filter
|
||||
movq mm1, [rbx + 16] ; do both the negative taps first!!!
|
||||
|
@ -320,7 +320,7 @@ sym(vp8_bilinear_predict8x8_mmx):
|
|||
mov rdi, arg(4) ;dst_ptr ;
|
||||
|
||||
shl rax, 5 ; offset * 32
|
||||
lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL]
|
||||
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
|
||||
|
||||
add rax, rcx ; HFilter
|
||||
mov rsi, arg(0) ;src_ptr ;
|
||||
|
@ -363,10 +363,10 @@ sym(vp8_bilinear_predict8x8_mmx):
|
|||
paddw mm3, mm5 ;
|
||||
paddw mm4, mm6 ;
|
||||
|
||||
paddw mm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw mm4, [rd GLOBAL] ;
|
||||
paddw mm4, [GLOBAL(rd)] ;
|
||||
psraw mm4, VP8_FILTER_SHIFT ;
|
||||
|
||||
movq mm7, mm3 ;
|
||||
|
@ -404,10 +404,10 @@ next_row_8x8:
|
|||
pmullw mm5, [rax] ;
|
||||
pmullw mm6, [rax] ;
|
||||
|
||||
paddw mm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw mm4, [rd GLOBAL] ;
|
||||
paddw mm4, [GLOBAL(rd)] ;
|
||||
psraw mm4, VP8_FILTER_SHIFT ;
|
||||
|
||||
movq mm7, mm3 ;
|
||||
|
@ -421,10 +421,10 @@ next_row_8x8:
|
|||
paddw mm4, mm6 ;
|
||||
|
||||
|
||||
paddw mm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw mm4, [rd GLOBAL] ;
|
||||
paddw mm4, [GLOBAL(rd)] ;
|
||||
psraw mm4, VP8_FILTER_SHIFT ;
|
||||
|
||||
packuswb mm3, mm4
|
||||
|
@ -476,7 +476,7 @@ sym(vp8_bilinear_predict8x4_mmx):
|
|||
movsxd rax, dword ptr arg(2) ;xoffset
|
||||
mov rdi, arg(4) ;dst_ptr ;
|
||||
|
||||
lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL]
|
||||
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
|
||||
shl rax, 5
|
||||
|
||||
mov rsi, arg(0) ;src_ptr ;
|
||||
|
@ -518,10 +518,10 @@ sym(vp8_bilinear_predict8x4_mmx):
|
|||
paddw mm3, mm5 ;
|
||||
paddw mm4, mm6 ;
|
||||
|
||||
paddw mm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw mm4, [rd GLOBAL] ;
|
||||
paddw mm4, [GLOBAL(rd)] ;
|
||||
psraw mm4, VP8_FILTER_SHIFT ;
|
||||
|
||||
movq mm7, mm3 ;
|
||||
|
@ -559,10 +559,10 @@ next_row_8x4:
|
|||
pmullw mm5, [rax] ;
|
||||
pmullw mm6, [rax] ;
|
||||
|
||||
paddw mm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw mm4, [rd GLOBAL] ;
|
||||
paddw mm4, [GLOBAL(rd)] ;
|
||||
psraw mm4, VP8_FILTER_SHIFT ;
|
||||
|
||||
movq mm7, mm3 ;
|
||||
|
@ -576,10 +576,10 @@ next_row_8x4:
|
|||
paddw mm4, mm6 ;
|
||||
|
||||
|
||||
paddw mm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw mm4, [rd GLOBAL] ;
|
||||
paddw mm4, [GLOBAL(rd)] ;
|
||||
psraw mm4, VP8_FILTER_SHIFT ;
|
||||
|
||||
packuswb mm3, mm4
|
||||
|
@ -631,7 +631,7 @@ sym(vp8_bilinear_predict4x4_mmx):
|
|||
movsxd rax, dword ptr arg(2) ;xoffset
|
||||
mov rdi, arg(4) ;dst_ptr ;
|
||||
|
||||
lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL]
|
||||
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
|
||||
shl rax, 5
|
||||
|
||||
add rax, rcx ; HFilter
|
||||
|
@ -662,7 +662,7 @@ sym(vp8_bilinear_predict4x4_mmx):
|
|||
pmullw mm5, mm2 ;
|
||||
|
||||
paddw mm3, mm5 ;
|
||||
paddw mm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
|
@ -686,7 +686,7 @@ next_row_4x4:
|
|||
punpcklbw mm5, mm0 ;
|
||||
|
||||
pmullw mm5, [rax] ;
|
||||
paddw mm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
movq mm7, mm3 ;
|
||||
|
@ -697,7 +697,7 @@ next_row_4x4:
|
|||
paddw mm3, mm5 ;
|
||||
|
||||
|
||||
paddw mm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
packuswb mm3, mm0
|
||||
|
|
|
@ -107,7 +107,7 @@ filter_block1d8_h6_rowloop:
|
|||
paddsw xmm4, xmm6
|
||||
|
||||
paddsw xmm4, xmm1
|
||||
paddsw xmm4, [rd GLOBAL]
|
||||
paddsw xmm4, [GLOBAL(rd)]
|
||||
|
||||
psraw xmm4, 7
|
||||
|
||||
|
@ -231,7 +231,7 @@ filter_block1d16_h6_sse2_rowloop:
|
|||
paddsw xmm4, xmm6
|
||||
|
||||
paddsw xmm4, xmm1
|
||||
paddsw xmm4, [rd GLOBAL]
|
||||
paddsw xmm4, [GLOBAL(rd)]
|
||||
|
||||
psraw xmm4, 7
|
||||
|
||||
|
@ -284,7 +284,7 @@ filter_block1d16_h6_sse2_rowloop:
|
|||
paddsw xmm4, xmm6
|
||||
|
||||
paddsw xmm4, xmm2
|
||||
paddsw xmm4, [rd GLOBAL]
|
||||
paddsw xmm4, [GLOBAL(rd)]
|
||||
|
||||
psraw xmm4, 7
|
||||
|
||||
|
@ -351,7 +351,7 @@ sym(vp8_filter_block1d8_v6_sse2):
|
|||
movsxd rcx, DWORD PTR arg(5) ;[output_height]
|
||||
pxor xmm0, xmm0 ; clear xmm0
|
||||
|
||||
movdqa xmm7, XMMWORD PTR [rd GLOBAL]
|
||||
movdqa xmm7, XMMWORD PTR [GLOBAL(rd)]
|
||||
%if ABI_IS_32BIT=0
|
||||
movsxd r8, dword ptr arg(2) ; dst_ptich
|
||||
%endif
|
||||
|
@ -489,7 +489,7 @@ vp8_filter_block1d16_v6_sse2_loop:
|
|||
pmullw xmm5, [rax + 80]
|
||||
pmullw xmm6, [rax + 80]
|
||||
|
||||
movdqa xmm7, XMMWORD PTR [rd GLOBAL]
|
||||
movdqa xmm7, XMMWORD PTR [GLOBAL(rd)]
|
||||
pxor xmm0, xmm0 ; clear xmm0
|
||||
|
||||
paddsw xmm1, xmm3
|
||||
|
@ -608,7 +608,7 @@ filter_block1d8_h6_only_rowloop:
|
|||
paddsw xmm4, xmm6
|
||||
|
||||
paddsw xmm4, xmm1
|
||||
paddsw xmm4, [rd GLOBAL]
|
||||
paddsw xmm4, [GLOBAL(rd)]
|
||||
|
||||
psraw xmm4, 7
|
||||
|
||||
|
@ -723,7 +723,7 @@ filter_block1d16_h6_only_sse2_rowloop:
|
|||
paddsw xmm4, xmm6
|
||||
|
||||
paddsw xmm4, xmm1
|
||||
paddsw xmm4, [rd GLOBAL]
|
||||
paddsw xmm4, [GLOBAL(rd)]
|
||||
|
||||
psraw xmm4, 7
|
||||
|
||||
|
@ -773,7 +773,7 @@ filter_block1d16_h6_only_sse2_rowloop:
|
|||
paddsw xmm4, xmm6
|
||||
|
||||
paddsw xmm4, xmm2
|
||||
paddsw xmm4, [rd GLOBAL]
|
||||
paddsw xmm4, [GLOBAL(rd)]
|
||||
|
||||
psraw xmm4, 7
|
||||
|
||||
|
@ -832,7 +832,7 @@ sym(vp8_filter_block1d8_v6_only_sse2):
|
|||
|
||||
pxor xmm0, xmm0 ; clear xmm0
|
||||
|
||||
movdqa xmm7, XMMWORD PTR [rd GLOBAL]
|
||||
movdqa xmm7, XMMWORD PTR [GLOBAL(rd)]
|
||||
%if ABI_IS_32BIT=0
|
||||
movsxd r8, dword ptr arg(3) ; dst_ptich
|
||||
%endif
|
||||
|
@ -978,7 +978,7 @@ sym(vp8_bilinear_predict16x16_sse2):
|
|||
;const short *HFilter = bilinear_filters_mmx[xoffset]
|
||||
;const short *VFilter = bilinear_filters_mmx[yoffset]
|
||||
|
||||
lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL]
|
||||
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
|
||||
movsxd rax, dword ptr arg(2) ;xoffset
|
||||
|
||||
cmp rax, 0 ;skip first_pass filter if xoffset=0
|
||||
|
@ -1033,10 +1033,10 @@ sym(vp8_bilinear_predict16x16_sse2):
|
|||
paddw xmm3, xmm5
|
||||
paddw xmm4, xmm6
|
||||
|
||||
paddw xmm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw xmm4, [rd GLOBAL]
|
||||
paddw xmm4, [GLOBAL(rd)]
|
||||
psraw xmm4, VP8_FILTER_SHIFT
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
|
@ -1074,10 +1074,10 @@ next_row:
|
|||
pmullw xmm5, [rax]
|
||||
pmullw xmm6, [rax]
|
||||
|
||||
paddw xmm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw xmm4, [rd GLOBAL]
|
||||
paddw xmm4, [GLOBAL(rd)]
|
||||
psraw xmm4, VP8_FILTER_SHIFT
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
|
@ -1089,10 +1089,10 @@ next_row:
|
|||
paddw xmm3, xmm5
|
||||
paddw xmm4, xmm6
|
||||
|
||||
paddw xmm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw xmm4, [rd GLOBAL]
|
||||
paddw xmm4, [GLOBAL(rd)]
|
||||
psraw xmm4, VP8_FILTER_SHIFT
|
||||
|
||||
packuswb xmm3, xmm4
|
||||
|
@ -1154,10 +1154,10 @@ next_row_spo:
|
|||
paddw xmm3, xmm5
|
||||
paddw xmm4, xmm6
|
||||
|
||||
paddw xmm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw xmm4, [rd GLOBAL]
|
||||
paddw xmm4, [GLOBAL(rd)]
|
||||
psraw xmm4, VP8_FILTER_SHIFT
|
||||
|
||||
packuswb xmm3, xmm4
|
||||
|
@ -1198,10 +1198,10 @@ next_row_fpo:
|
|||
paddw xmm3, xmm5
|
||||
paddw xmm4, xmm6
|
||||
|
||||
paddw xmm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw xmm4, [rd GLOBAL]
|
||||
paddw xmm4, [GLOBAL(rd)]
|
||||
psraw xmm4, VP8_FILTER_SHIFT
|
||||
|
||||
packuswb xmm3, xmm4
|
||||
|
@ -1249,7 +1249,7 @@ sym(vp8_bilinear_predict8x8_sse2):
|
|||
|
||||
;const short *HFilter = bilinear_filters_mmx[xoffset]
|
||||
;const short *VFilter = bilinear_filters_mmx[yoffset]
|
||||
lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL]
|
||||
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
|
||||
|
@ -1315,7 +1315,7 @@ sym(vp8_bilinear_predict8x8_sse2):
|
|||
|
||||
paddw xmm3, xmm4
|
||||
|
||||
paddw xmm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
|
@ -1334,7 +1334,7 @@ next_row8x8:
|
|||
paddw xmm3, xmm4
|
||||
pmullw xmm7, xmm5
|
||||
|
||||
paddw xmm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
|
@ -1344,7 +1344,7 @@ next_row8x8:
|
|||
|
||||
movdqa xmm7, xmm4
|
||||
|
||||
paddw xmm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
packuswb xmm3, xmm0
|
||||
|
|
|
@ -48,9 +48,9 @@ sym(vp8_filter_block1d8_h6_ssse3):
|
|||
xor rsi, rsi
|
||||
shl rdx, 4
|
||||
|
||||
movdqa xmm7, [rd GLOBAL]
|
||||
movdqa xmm7, [GLOBAL(rd)]
|
||||
|
||||
lea rax, [k0_k5 GLOBAL]
|
||||
lea rax, [GLOBAL(k0_k5)]
|
||||
add rax, rdx
|
||||
mov rdi, arg(2) ;output_ptr
|
||||
|
||||
|
@ -80,9 +80,9 @@ filter_block1d8_h6_rowloop_ssse3:
|
|||
pmaddubsw xmm0, xmm4
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
pshufb xmm1, [shuf2bfrom1 GLOBAL]
|
||||
pshufb xmm1, [GLOBAL(shuf2bfrom1)]
|
||||
|
||||
pshufb xmm2, [shuf3bfrom1 GLOBAL]
|
||||
pshufb xmm2, [GLOBAL(shuf3bfrom1)]
|
||||
pmaddubsw xmm1, xmm5
|
||||
|
||||
lea rdi, [rdi + rdx]
|
||||
|
@ -115,8 +115,8 @@ vp8_filter_block1d8_h4_ssse3:
|
|||
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
|
||||
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
|
||||
|
||||
movdqa xmm3, XMMWORD PTR [shuf2bfrom1 GLOBAL]
|
||||
movdqa xmm4, XMMWORD PTR [shuf3bfrom1 GLOBAL]
|
||||
movdqa xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)]
|
||||
movdqa xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)]
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
|
||||
|
@ -189,7 +189,7 @@ sym(vp8_filter_block1d16_h6_ssse3):
|
|||
xor rsi, rsi
|
||||
shl rdx, 4 ;
|
||||
|
||||
lea rax, [k0_k5 GLOBAL]
|
||||
lea rax, [GLOBAL(k0_k5)]
|
||||
add rax, rdx
|
||||
|
||||
mov rdi, arg(2) ;output_ptr
|
||||
|
@ -219,9 +219,9 @@ filter_block1d16_h6_rowloop_ssse3:
|
|||
pmaddubsw xmm0, xmm4
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
pshufb xmm1, [shuf2bfrom1 GLOBAL]
|
||||
pshufb xmm1, [GLOBAL(shuf2bfrom1)]
|
||||
|
||||
pshufb xmm2, [shuf3bfrom1 GLOBAL]
|
||||
pshufb xmm2, [GLOBAL(shuf3bfrom1)]
|
||||
movq xmm3, MMWORD PTR [rsi + 6]
|
||||
|
||||
pmaddubsw xmm1, xmm5
|
||||
|
@ -237,10 +237,10 @@ filter_block1d16_h6_rowloop_ssse3:
|
|||
paddsw xmm0, xmm2
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
paddsw xmm0, [rd GLOBAL]
|
||||
paddsw xmm0, [GLOBAL(rd)]
|
||||
|
||||
pshufb xmm1, [shuf2bfrom1 GLOBAL]
|
||||
pshufb xmm2, [shuf3bfrom1 GLOBAL]
|
||||
pshufb xmm1, [GLOBAL(shuf2bfrom1)]
|
||||
pshufb xmm2, [GLOBAL(shuf3bfrom1)]
|
||||
|
||||
psraw xmm0, 7
|
||||
pmaddubsw xmm1, xmm5
|
||||
|
@ -253,7 +253,7 @@ filter_block1d16_h6_rowloop_ssse3:
|
|||
|
||||
paddsw xmm3, xmm2
|
||||
|
||||
paddsw xmm3, [rd GLOBAL]
|
||||
paddsw xmm3, [GLOBAL(rd)]
|
||||
|
||||
psraw xmm3, 7
|
||||
|
||||
|
@ -288,18 +288,18 @@ filter_block1d16_h4_rowloop_ssse3:
|
|||
movdqu xmm1, XMMWORD PTR [rsi - 2]
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
pshufb xmm1, [shuf2b GLOBAL]
|
||||
pshufb xmm2, [shuf3b GLOBAL]
|
||||
pshufb xmm1, [GLOBAL(shuf2b)]
|
||||
pshufb xmm2, [GLOBAL(shuf3b)]
|
||||
pmaddubsw xmm1, xmm5
|
||||
|
||||
movdqu xmm3, XMMWORD PTR [rsi + 6]
|
||||
|
||||
pmaddubsw xmm2, xmm6
|
||||
movdqa xmm0, xmm3
|
||||
pshufb xmm3, [shuf3b GLOBAL]
|
||||
pshufb xmm0, [shuf2b GLOBAL]
|
||||
pshufb xmm3, [GLOBAL(shuf3b)]
|
||||
pshufb xmm0, [GLOBAL(shuf2b)]
|
||||
|
||||
paddsw xmm1, [rd GLOBAL]
|
||||
paddsw xmm1, [GLOBAL(rd)]
|
||||
paddsw xmm1, xmm2
|
||||
|
||||
pmaddubsw xmm0, xmm5
|
||||
|
@ -309,7 +309,7 @@ filter_block1d16_h4_rowloop_ssse3:
|
|||
packuswb xmm1, xmm1
|
||||
lea rsi, [rsi + rax]
|
||||
paddsw xmm3, xmm0
|
||||
paddsw xmm3, [rd GLOBAL]
|
||||
paddsw xmm3, [GLOBAL(rd)]
|
||||
psraw xmm3, 7
|
||||
packuswb xmm3, xmm3
|
||||
|
||||
|
@ -353,9 +353,9 @@ sym(vp8_filter_block1d4_h6_ssse3):
|
|||
xor rsi, rsi
|
||||
shl rdx, 4 ;
|
||||
|
||||
lea rax, [k0_k5 GLOBAL]
|
||||
lea rax, [GLOBAL(k0_k5)]
|
||||
add rax, rdx
|
||||
movdqa xmm7, [rd GLOBAL]
|
||||
movdqa xmm7, [GLOBAL(rd)]
|
||||
|
||||
cmp esi, DWORD PTR [rax]
|
||||
je vp8_filter_block1d4_h4_ssse3
|
||||
|
@ -376,12 +376,12 @@ filter_block1d4_h6_rowloop_ssse3:
|
|||
movdqu xmm0, XMMWORD PTR [rsi - 2]
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
pshufb xmm0, [shuf1b GLOBAL]
|
||||
pshufb xmm0, [GLOBAL(shuf1b)]
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
pshufb xmm1, [shuf2b GLOBAL]
|
||||
pshufb xmm1, [GLOBAL(shuf2b)]
|
||||
pmaddubsw xmm0, xmm4
|
||||
pshufb xmm2, [shuf3b GLOBAL]
|
||||
pshufb xmm2, [GLOBAL(shuf3b)]
|
||||
pmaddubsw xmm1, xmm5
|
||||
|
||||
;--
|
||||
|
@ -413,8 +413,8 @@ filter_block1d4_h6_rowloop_ssse3:
|
|||
vp8_filter_block1d4_h4_ssse3:
|
||||
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
|
||||
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
|
||||
movdqa xmm0, XMMWORD PTR [shuf2b GLOBAL]
|
||||
movdqa xmm3, XMMWORD PTR [shuf3b GLOBAL]
|
||||
movdqa xmm0, XMMWORD PTR [GLOBAL(shuf2b)]
|
||||
movdqa xmm3, XMMWORD PTR [GLOBAL(shuf3b)]
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;output_ptr
|
||||
|
@ -427,8 +427,8 @@ filter_block1d4_h4_rowloop_ssse3:
|
|||
movdqu xmm1, XMMWORD PTR [rsi - 2]
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
pshufb xmm1, xmm0 ;;[shuf2b GLOBAL]
|
||||
pshufb xmm2, xmm3 ;;[shuf3b GLOBAL]
|
||||
pshufb xmm1, xmm0 ;;[GLOBAL(shuf2b)]
|
||||
pshufb xmm2, xmm3 ;;[GLOBAL(shuf3b)]
|
||||
pmaddubsw xmm1, xmm5
|
||||
|
||||
;--
|
||||
|
@ -480,7 +480,7 @@ sym(vp8_filter_block1d16_v6_ssse3):
|
|||
xor rsi, rsi
|
||||
shl rdx, 4 ;
|
||||
|
||||
lea rax, [k0_k5 GLOBAL]
|
||||
lea rax, [GLOBAL(k0_k5)]
|
||||
add rax, rdx
|
||||
|
||||
cmp esi, DWORD PTR [rax]
|
||||
|
@ -521,7 +521,7 @@ vp8_filter_block1d16_v6_ssse3_loop:
|
|||
|
||||
paddsw xmm2, xmm3
|
||||
paddsw xmm2, xmm1
|
||||
paddsw xmm2, [rd GLOBAL]
|
||||
paddsw xmm2, [GLOBAL(rd)]
|
||||
psraw xmm2, 7
|
||||
packuswb xmm2, xmm2
|
||||
|
||||
|
@ -548,7 +548,7 @@ vp8_filter_block1d16_v6_ssse3_loop:
|
|||
;--
|
||||
paddsw xmm2, xmm3
|
||||
paddsw xmm2, xmm1
|
||||
paddsw xmm2, [rd GLOBAL]
|
||||
paddsw xmm2, [GLOBAL(rd)]
|
||||
psraw xmm2, 7
|
||||
packuswb xmm2, xmm2
|
||||
|
||||
|
@ -601,7 +601,7 @@ vp8_filter_block1d16_v4_ssse3_loop:
|
|||
movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D
|
||||
movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E
|
||||
|
||||
paddsw xmm2, [rd GLOBAL]
|
||||
paddsw xmm2, [GLOBAL(rd)]
|
||||
paddsw xmm2, xmm3
|
||||
psraw xmm2, 7
|
||||
packuswb xmm2, xmm2
|
||||
|
@ -612,7 +612,7 @@ vp8_filter_block1d16_v4_ssse3_loop:
|
|||
pmaddubsw xmm1, xmm6
|
||||
pmaddubsw xmm5, xmm7
|
||||
|
||||
movdqa xmm4, [rd GLOBAL]
|
||||
movdqa xmm4, [GLOBAL(rd)]
|
||||
add rsi, rdx
|
||||
add rax, rdx
|
||||
;--
|
||||
|
@ -665,7 +665,7 @@ sym(vp8_filter_block1d8_v6_ssse3):
|
|||
xor rsi, rsi
|
||||
shl rdx, 4 ;
|
||||
|
||||
lea rax, [k0_k5 GLOBAL]
|
||||
lea rax, [GLOBAL(k0_k5)]
|
||||
add rax, rdx
|
||||
|
||||
movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
|
||||
|
@ -698,7 +698,7 @@ vp8_filter_block1d8_v6_ssse3_loop:
|
|||
punpcklbw xmm3, xmm0 ;C E
|
||||
|
||||
movq xmm0, MMWORD PTR [rax + rdx * 4] ;F
|
||||
movdqa xmm4, [rd GLOBAL]
|
||||
movdqa xmm4, [GLOBAL(rd)]
|
||||
|
||||
pmaddubsw xmm3, xmm6
|
||||
punpcklbw xmm1, xmm0 ;A F
|
||||
|
@ -735,7 +735,7 @@ vp8_filter_block1d8_v6_ssse3_loop:
|
|||
vp8_filter_block1d8_v4_ssse3:
|
||||
movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
|
||||
movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
|
||||
movdqa xmm5, [rd GLOBAL]
|
||||
movdqa xmm5, [GLOBAL(rd)]
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
|
||||
|
@ -802,7 +802,7 @@ sym(vp8_filter_block1d4_v6_ssse3):
|
|||
xor rsi, rsi
|
||||
shl rdx, 4 ;
|
||||
|
||||
lea rax, [k0_k5 GLOBAL]
|
||||
lea rax, [GLOBAL(k0_k5)]
|
||||
add rax, rdx
|
||||
|
||||
movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
|
||||
|
@ -836,7 +836,7 @@ vp8_filter_block1d4_v6_ssse3_loop:
|
|||
|
||||
movd mm0, DWORD PTR [rax + rdx * 4] ;F
|
||||
|
||||
movq mm4, [rd GLOBAL]
|
||||
movq mm4, [GLOBAL(rd)]
|
||||
|
||||
pmaddubsw mm3, mm6
|
||||
punpcklbw mm1, mm0 ;A F
|
||||
|
@ -873,7 +873,7 @@ vp8_filter_block1d4_v6_ssse3_loop:
|
|||
vp8_filter_block1d4_v4_ssse3:
|
||||
movq mm6, MMWORD PTR [rax+256] ;k2_k4
|
||||
movq mm7, MMWORD PTR [rax+128] ;k1_k3
|
||||
movq mm5, MMWORD PTR [rd GLOBAL]
|
||||
movq mm5, MMWORD PTR [GLOBAL(rd)]
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
|
||||
|
@ -938,7 +938,7 @@ sym(vp8_bilinear_predict16x16_ssse3):
|
|||
push rdi
|
||||
; end prolog
|
||||
|
||||
lea rcx, [vp8_bilinear_filters_ssse3 GLOBAL]
|
||||
lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)]
|
||||
movsxd rax, dword ptr arg(2) ; xoffset
|
||||
|
||||
cmp rax, 0 ; skip first_pass filter if xoffset=0
|
||||
|
@ -985,10 +985,10 @@ sym(vp8_bilinear_predict16x16_ssse3):
|
|||
punpcklbw xmm4, xmm5 ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16
|
||||
pmaddubsw xmm4, xmm1 ; 01 03 05 07 09 11 13 15
|
||||
|
||||
paddw xmm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw xmm4, [rd GLOBAL] ; xmm4 += round value
|
||||
paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value
|
||||
psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
|
@ -1009,10 +1009,10 @@ sym(vp8_bilinear_predict16x16_ssse3):
|
|||
punpcklbw xmm4, xmm5
|
||||
pmaddubsw xmm4, xmm1
|
||||
|
||||
paddw xmm6, [rd GLOBAL] ; xmm6 += round value
|
||||
paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value
|
||||
psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128
|
||||
|
||||
paddw xmm4, [rd GLOBAL] ; xmm4 += round value
|
||||
paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value
|
||||
psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128
|
||||
|
||||
packuswb xmm6, xmm4
|
||||
|
@ -1024,10 +1024,10 @@ sym(vp8_bilinear_predict16x16_ssse3):
|
|||
punpckhbw xmm7, xmm6
|
||||
pmaddubsw xmm7, xmm2
|
||||
|
||||
paddw xmm5, [rd GLOBAL] ; xmm5 += round value
|
||||
paddw xmm5, [GLOBAL(rd)] ; xmm5 += round value
|
||||
psraw xmm5, VP8_FILTER_SHIFT ; xmm5 /= 128
|
||||
|
||||
paddw xmm7, [rd GLOBAL] ; xmm7 += round value
|
||||
paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value
|
||||
psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128
|
||||
|
||||
packuswb xmm5, xmm7
|
||||
|
@ -1082,19 +1082,19 @@ b16x16_sp_only:
|
|||
punpcklbw xmm5, xmm6
|
||||
|
||||
pmaddubsw xmm3, xmm1
|
||||
paddw xmm4, [rd GLOBAL]
|
||||
paddw xmm4, [GLOBAL(rd)]
|
||||
|
||||
pmaddubsw xmm5, xmm1
|
||||
paddw xmm2, [rd GLOBAL]
|
||||
paddw xmm2, [GLOBAL(rd)]
|
||||
|
||||
psraw xmm4, VP8_FILTER_SHIFT
|
||||
psraw xmm2, VP8_FILTER_SHIFT
|
||||
|
||||
packuswb xmm4, xmm2
|
||||
paddw xmm3, [rd GLOBAL]
|
||||
paddw xmm3, [GLOBAL(rd)]
|
||||
|
||||
movdqa [rdi], xmm4 ; store row 0
|
||||
paddw xmm5, [rd GLOBAL]
|
||||
paddw xmm5, [GLOBAL(rd)]
|
||||
|
||||
psraw xmm3, VP8_FILTER_SHIFT
|
||||
psraw xmm5, VP8_FILTER_SHIFT
|
||||
|
@ -1134,7 +1134,7 @@ b16x16_fp_only:
|
|||
pmaddubsw xmm3, xmm1
|
||||
movq xmm5, [rsi]
|
||||
|
||||
paddw xmm2, [rd GLOBAL]
|
||||
paddw xmm2, [GLOBAL(rd)]
|
||||
movq xmm7, [rsi+1]
|
||||
|
||||
movq xmm6, [rsi+8]
|
||||
|
@ -1143,7 +1143,7 @@ b16x16_fp_only:
|
|||
punpcklbw xmm5, xmm7
|
||||
movq xmm7, [rsi+9]
|
||||
|
||||
paddw xmm3, [rd GLOBAL]
|
||||
paddw xmm3, [GLOBAL(rd)]
|
||||
pmaddubsw xmm5, xmm1
|
||||
|
||||
psraw xmm3, VP8_FILTER_SHIFT
|
||||
|
@ -1153,12 +1153,12 @@ b16x16_fp_only:
|
|||
pmaddubsw xmm6, xmm1
|
||||
|
||||
movdqa [rdi], xmm2 ; store the results in the destination
|
||||
paddw xmm5, [rd GLOBAL]
|
||||
paddw xmm5, [GLOBAL(rd)]
|
||||
|
||||
lea rdi, [rdi + rdx] ; dst_pitch
|
||||
psraw xmm5, VP8_FILTER_SHIFT
|
||||
|
||||
paddw xmm6, [rd GLOBAL]
|
||||
paddw xmm6, [GLOBAL(rd)]
|
||||
psraw xmm6, VP8_FILTER_SHIFT
|
||||
|
||||
packuswb xmm5, xmm6
|
||||
|
@ -1204,7 +1204,7 @@ sym(vp8_bilinear_predict8x8_ssse3):
|
|||
ALIGN_STACK 16, rax
|
||||
sub rsp, 144 ; reserve 144 bytes
|
||||
|
||||
lea rcx, [vp8_bilinear_filters_ssse3 GLOBAL]
|
||||
lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)]
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
|
||||
|
@ -1269,7 +1269,7 @@ sym(vp8_bilinear_predict8x8_ssse3):
|
|||
punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08
|
||||
pmaddubsw xmm3, xmm0 ; 00 02 04 06 08 10 12 14
|
||||
|
||||
paddw xmm3, [rd GLOBAL] ; xmm3 += round value
|
||||
paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
|
@ -1286,7 +1286,7 @@ sym(vp8_bilinear_predict8x8_ssse3):
|
|||
punpcklbw xmm6, xmm5
|
||||
pmaddubsw xmm6, xmm0
|
||||
|
||||
paddw xmm6, [rd GLOBAL] ; xmm6 += round value
|
||||
paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value
|
||||
psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128
|
||||
|
||||
packuswb xmm6, xmm6
|
||||
|
@ -1294,7 +1294,7 @@ sym(vp8_bilinear_predict8x8_ssse3):
|
|||
punpcklbw xmm7, xmm6
|
||||
pmaddubsw xmm7, xmm1
|
||||
|
||||
paddw xmm7, [rd GLOBAL] ; xmm7 += round value
|
||||
paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value
|
||||
psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128
|
||||
|
||||
packuswb xmm7, xmm7
|
||||
|
@ -1347,21 +1347,21 @@ b8x8_sp_only:
|
|||
punpcklbw xmm6, xmm7
|
||||
|
||||
pmaddubsw xmm6, xmm0
|
||||
paddw xmm1, [rd GLOBAL]
|
||||
paddw xmm1, [GLOBAL(rd)]
|
||||
|
||||
paddw xmm2, [rd GLOBAL]
|
||||
paddw xmm2, [GLOBAL(rd)]
|
||||
psraw xmm1, VP8_FILTER_SHIFT
|
||||
|
||||
paddw xmm3, [rd GLOBAL]
|
||||
paddw xmm3, [GLOBAL(rd)]
|
||||
psraw xmm2, VP8_FILTER_SHIFT
|
||||
|
||||
paddw xmm4, [rd GLOBAL]
|
||||
paddw xmm4, [GLOBAL(rd)]
|
||||
psraw xmm3, VP8_FILTER_SHIFT
|
||||
|
||||
paddw xmm5, [rd GLOBAL]
|
||||
paddw xmm5, [GLOBAL(rd)]
|
||||
psraw xmm4, VP8_FILTER_SHIFT
|
||||
|
||||
paddw xmm6, [rd GLOBAL]
|
||||
paddw xmm6, [GLOBAL(rd)]
|
||||
psraw xmm5, VP8_FILTER_SHIFT
|
||||
|
||||
psraw xmm6, VP8_FILTER_SHIFT
|
||||
|
@ -1395,10 +1395,10 @@ b8x8_sp_only:
|
|||
punpcklbw xmm1, xmm2
|
||||
|
||||
pmaddubsw xmm1, xmm0
|
||||
paddw xmm7, [rd GLOBAL]
|
||||
paddw xmm7, [GLOBAL(rd)]
|
||||
|
||||
psraw xmm7, VP8_FILTER_SHIFT
|
||||
paddw xmm1, [rd GLOBAL]
|
||||
paddw xmm1, [GLOBAL(rd)]
|
||||
|
||||
psraw xmm1, VP8_FILTER_SHIFT
|
||||
packuswb xmm7, xmm7
|
||||
|
@ -1447,16 +1447,16 @@ b8x8_fp_only:
|
|||
punpcklbw xmm7, xmm2
|
||||
pmaddubsw xmm7, xmm0
|
||||
|
||||
paddw xmm1, [rd GLOBAL]
|
||||
paddw xmm1, [GLOBAL(rd)]
|
||||
psraw xmm1, VP8_FILTER_SHIFT
|
||||
|
||||
paddw xmm3, [rd GLOBAL]
|
||||
paddw xmm3, [GLOBAL(rd)]
|
||||
psraw xmm3, VP8_FILTER_SHIFT
|
||||
|
||||
paddw xmm5, [rd GLOBAL]
|
||||
paddw xmm5, [GLOBAL(rd)]
|
||||
psraw xmm5, VP8_FILTER_SHIFT
|
||||
|
||||
paddw xmm7, [rd GLOBAL]
|
||||
paddw xmm7, [GLOBAL(rd)]
|
||||
psraw xmm7, VP8_FILTER_SHIFT
|
||||
|
||||
packuswb xmm1, xmm1
|
||||
|
|
|
@ -98,11 +98,11 @@ sym(vp8_dequant_idct_add_mmx):
|
|||
movq mm5, mm1
|
||||
paddw mm2, mm0 ; a1 =0+2
|
||||
|
||||
pmulhw mm5, [x_s1sqr2 GLOBAL];
|
||||
pmulhw mm5, [GLOBAL(x_s1sqr2)];
|
||||
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movq mm7, mm3 ;
|
||||
pmulhw mm7, [x_c1sqr2less1 GLOBAL];
|
||||
pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
|
||||
|
||||
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw mm7, mm5 ; c1
|
||||
|
@ -110,10 +110,10 @@ sym(vp8_dequant_idct_add_mmx):
|
|||
movq mm5, mm1
|
||||
movq mm4, mm3
|
||||
|
||||
pmulhw mm5, [x_c1sqr2less1 GLOBAL]
|
||||
pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw mm5, mm1
|
||||
|
||||
pmulhw mm3, [x_s1sqr2 GLOBAL]
|
||||
pmulhw mm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw mm3, mm4
|
||||
|
||||
paddw mm3, mm5 ; d1
|
||||
|
@ -153,11 +153,11 @@ sym(vp8_dequant_idct_add_mmx):
|
|||
movq mm5, mm1
|
||||
paddw mm2, mm0 ; a1 =0+2
|
||||
|
||||
pmulhw mm5, [x_s1sqr2 GLOBAL];
|
||||
pmulhw mm5, [GLOBAL(x_s1sqr2)];
|
||||
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movq mm7, mm3 ;
|
||||
pmulhw mm7, [x_c1sqr2less1 GLOBAL];
|
||||
pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
|
||||
|
||||
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw mm7, mm5 ; c1
|
||||
|
@ -165,16 +165,16 @@ sym(vp8_dequant_idct_add_mmx):
|
|||
movq mm5, mm1
|
||||
movq mm4, mm3
|
||||
|
||||
pmulhw mm5, [x_c1sqr2less1 GLOBAL]
|
||||
pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw mm5, mm1
|
||||
|
||||
pmulhw mm3, [x_s1sqr2 GLOBAL]
|
||||
pmulhw mm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw mm3, mm4
|
||||
|
||||
paddw mm3, mm5 ; d1
|
||||
paddw mm0, [fours GLOBAL]
|
||||
paddw mm0, [GLOBAL(fours)]
|
||||
|
||||
paddw mm2, [fours GLOBAL]
|
||||
paddw mm2, [GLOBAL(fours)]
|
||||
movq mm6, mm2 ; a1
|
||||
|
||||
movq mm4, mm0 ; b1
|
||||
|
@ -300,11 +300,11 @@ sym(vp8_dequant_dc_idct_add_mmx):
|
|||
movq mm5, mm1
|
||||
paddw mm2, mm0 ; a1 =0+2
|
||||
|
||||
pmulhw mm5, [x_s1sqr2 GLOBAL];
|
||||
pmulhw mm5, [GLOBAL(x_s1sqr2)];
|
||||
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movq mm7, mm3 ;
|
||||
pmulhw mm7, [x_c1sqr2less1 GLOBAL];
|
||||
pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
|
||||
|
||||
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw mm7, mm5 ; c1
|
||||
|
@ -312,10 +312,10 @@ sym(vp8_dequant_dc_idct_add_mmx):
|
|||
movq mm5, mm1
|
||||
movq mm4, mm3
|
||||
|
||||
pmulhw mm5, [x_c1sqr2less1 GLOBAL]
|
||||
pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw mm5, mm1
|
||||
|
||||
pmulhw mm3, [x_s1sqr2 GLOBAL]
|
||||
pmulhw mm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw mm3, mm4
|
||||
|
||||
paddw mm3, mm5 ; d1
|
||||
|
@ -355,11 +355,11 @@ sym(vp8_dequant_dc_idct_add_mmx):
|
|||
movq mm5, mm1
|
||||
paddw mm2, mm0 ; a1 =0+2
|
||||
|
||||
pmulhw mm5, [x_s1sqr2 GLOBAL];
|
||||
pmulhw mm5, [GLOBAL(x_s1sqr2)];
|
||||
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movq mm7, mm3 ;
|
||||
pmulhw mm7, [x_c1sqr2less1 GLOBAL];
|
||||
pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
|
||||
|
||||
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw mm7, mm5 ; c1
|
||||
|
@ -367,16 +367,16 @@ sym(vp8_dequant_dc_idct_add_mmx):
|
|||
movq mm5, mm1
|
||||
movq mm4, mm3
|
||||
|
||||
pmulhw mm5, [x_c1sqr2less1 GLOBAL]
|
||||
pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw mm5, mm1
|
||||
|
||||
pmulhw mm3, [x_s1sqr2 GLOBAL]
|
||||
pmulhw mm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw mm3, mm4
|
||||
|
||||
paddw mm3, mm5 ; d1
|
||||
paddw mm0, [fours GLOBAL]
|
||||
paddw mm0, [GLOBAL(fours)]
|
||||
|
||||
paddw mm2, [fours GLOBAL]
|
||||
paddw mm2, [GLOBAL(fours)]
|
||||
movq mm6, mm2 ; a1
|
||||
|
||||
movq mm4, mm0 ; b1
|
||||
|
|
|
@ -35,7 +35,7 @@ sym(vp8_short_fdct4x4_mmx):
|
|||
mov rsi, arg(0) ;input
|
||||
mov rdi, arg(1) ;output
|
||||
|
||||
lea rdx, [dct_const_mmx GLOBAL]
|
||||
lea rdx, [GLOBAL(dct_const_mmx)]
|
||||
movsxd rax, dword ptr arg(2) ;pitch
|
||||
|
||||
lea rcx, [rsi + rax*2]
|
||||
|
@ -243,7 +243,7 @@ sym(vp8_short_fdct8x4_wmt):
|
|||
mov rsi, arg(0) ;input
|
||||
mov rdi, arg(1) ;output
|
||||
|
||||
lea rdx, [dct_const_xmm GLOBAL]
|
||||
lea rdx, [GLOBAL(dct_const_xmm)]
|
||||
movsxd rax, dword ptr arg(2) ;pitch
|
||||
|
||||
lea rcx, [rsi + rax*2]
|
||||
|
|
|
@ -52,14 +52,14 @@ sym(vp8_short_fdct4x4_sse2):
|
|||
psllw xmm0, 3 ;b1 <<= 3 a1 <<= 3
|
||||
psllw xmm3, 3 ;c1 <<= 3 d1 <<= 3
|
||||
movdqa xmm1, xmm0
|
||||
pmaddwd xmm0, XMMWORD PTR[_mult_add GLOBAL] ;a1 + b1
|
||||
pmaddwd xmm1, XMMWORD PTR[_mult_sub GLOBAL] ;a1 - b1
|
||||
pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1
|
||||
pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1
|
||||
movdqa xmm4, xmm3
|
||||
pmaddwd xmm3, XMMWORD PTR[_5352_2217 GLOBAL] ;c1*2217 + d1*5352
|
||||
pmaddwd xmm4, XMMWORD PTR[_2217_neg5352 GLOBAL] ;d1*2217 - c1*5352
|
||||
pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352
|
||||
pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)];d1*2217 - c1*5352
|
||||
|
||||
paddd xmm3, XMMWORD PTR[_14500 GLOBAL]
|
||||
paddd xmm4, XMMWORD PTR[_7500 GLOBAL]
|
||||
paddd xmm3, XMMWORD PTR[GLOBAL(_14500)]
|
||||
paddd xmm4, XMMWORD PTR[GLOBAL(_7500)]
|
||||
psrad xmm3, 12 ;(c1 * 2217 + d1 * 5352 + 14500)>>12
|
||||
psrad xmm4, 12 ;(d1 * 2217 - c1 * 5352 + 7500)>>12
|
||||
|
||||
|
@ -80,7 +80,7 @@ sym(vp8_short_fdct4x4_sse2):
|
|||
punpcklwd xmm0, xmm3 ;13 12 11 10 03 02 01 00
|
||||
punpckhwd xmm2, xmm3 ;33 32 31 30 23 22 21 20
|
||||
|
||||
movdqa xmm5, XMMWORD PTR[_7 GLOBAL]
|
||||
movdqa xmm5, XMMWORD PTR[GLOBAL(_7)]
|
||||
pshufd xmm2, xmm2, 04eh
|
||||
movdqa xmm3, xmm0
|
||||
paddw xmm0, xmm2 ;b1 b1 b1 b1 a1 a1 a1 a1
|
||||
|
@ -94,8 +94,8 @@ sym(vp8_short_fdct4x4_sse2):
|
|||
pshufhw xmm0, xmm0, 0d8h ;b1 a1 b1 a1 b1 a1 b1 a1
|
||||
pshufhw xmm3, xmm3, 0d8h ;c1 d1 c1 d1 c1 d1 c1 d1
|
||||
movdqa xmm1, xmm0
|
||||
pmaddwd xmm0, XMMWORD PTR[_mult_add GLOBAL] ;a1 + b1
|
||||
pmaddwd xmm1, XMMWORD PTR[_mult_sub GLOBAL] ;a1 - b1
|
||||
pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1
|
||||
pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1
|
||||
|
||||
pxor xmm4, xmm4 ;zero out for compare
|
||||
paddd xmm0, xmm5
|
||||
|
@ -103,14 +103,14 @@ sym(vp8_short_fdct4x4_sse2):
|
|||
pcmpeqw xmm2, xmm4
|
||||
psrad xmm0, 4 ;(a1 + b1 + 7)>>4
|
||||
psrad xmm1, 4 ;(a1 - b1 + 7)>>4
|
||||
pandn xmm2, XMMWORD PTR[_cmp_mask GLOBAL] ;clear upper,
|
||||
;and keep bit 0 of lower
|
||||
pandn xmm2, XMMWORD PTR[GLOBAL(_cmp_mask)] ;clear upper,
|
||||
;and keep bit 0 of lower
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
pmaddwd xmm3, XMMWORD PTR[_5352_2217 GLOBAL] ;c1*2217 + d1*5352
|
||||
pmaddwd xmm4, XMMWORD PTR[_2217_neg5352 GLOBAL] ;d1*2217 - c1*5352
|
||||
paddd xmm3, XMMWORD PTR[_12000 GLOBAL]
|
||||
paddd xmm4, XMMWORD PTR[_51000 GLOBAL]
|
||||
pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352
|
||||
pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)] ;d1*2217 - c1*5352
|
||||
paddd xmm3, XMMWORD PTR[GLOBAL(_12000)]
|
||||
paddd xmm4, XMMWORD PTR[GLOBAL(_51000)]
|
||||
packssdw xmm0, xmm1 ;op[8] op[0]
|
||||
psrad xmm3, 16 ;(c1 * 2217 + d1 * 5352 + 12000)>>16
|
||||
psrad xmm4, 16 ;(d1 * 2217 - c1 * 5352 + 51000)>>16
|
||||
|
|
|
@ -556,7 +556,7 @@ sym(vp8_filter_block2d_bil4x4_var_mmx):
|
|||
pmullw mm3, [rax+8] ;
|
||||
|
||||
paddw mm1, mm3 ;
|
||||
paddw mm1, [mmx_bi_rd GLOBAL] ;
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
movq mm5, mm1
|
||||
|
@ -580,7 +580,7 @@ filter_block2d_bil4x4_var_mmx_loop:
|
|||
pmullw mm3, [rax+8] ;
|
||||
|
||||
paddw mm1, mm3 ;
|
||||
paddw mm1, [mmx_bi_rd GLOBAL] ;
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
movq mm3, mm5 ;
|
||||
|
@ -592,7 +592,7 @@ filter_block2d_bil4x4_var_mmx_loop:
|
|||
paddw mm1, mm3 ;
|
||||
|
||||
|
||||
paddw mm1, [mmx_bi_rd GLOBAL] ;
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
|
||||
movd mm3, [rdi] ;
|
||||
|
@ -710,10 +710,10 @@ sym(vp8_filter_block2d_bil_var_mmx):
|
|||
paddw mm1, mm3 ;
|
||||
|
||||
paddw mm2, mm4 ;
|
||||
paddw mm1, [mmx_bi_rd GLOBAL] ;
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
paddw mm2, [mmx_bi_rd GLOBAL] ;
|
||||
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm2, mmx_filter_shift ;
|
||||
movq mm5, mm1
|
||||
|
@ -749,10 +749,10 @@ filter_block2d_bil_var_mmx_loop:
|
|||
paddw mm1, mm3 ;
|
||||
paddw mm2, mm4 ;
|
||||
|
||||
paddw mm1, [mmx_bi_rd GLOBAL] ;
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
|
||||
paddw mm2, [mmx_bi_rd GLOBAL] ;
|
||||
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
|
||||
psraw mm2, mmx_filter_shift ;
|
||||
|
||||
movq mm3, mm5 ;
|
||||
|
@ -773,8 +773,8 @@ filter_block2d_bil_var_mmx_loop:
|
|||
paddw mm1, mm3 ;
|
||||
paddw mm2, mm4 ;
|
||||
|
||||
paddw mm1, [mmx_bi_rd GLOBAL] ;
|
||||
paddw mm2, [mmx_bi_rd GLOBAL] ;
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
psraw mm2, mmx_filter_shift ;
|
||||
|
|
|
@ -532,7 +532,7 @@ sym(vp8_filter_block2d_bil_var_sse2):
|
|||
pmullw xmm3, [rax+16] ;
|
||||
paddw xmm1, xmm3 ;
|
||||
|
||||
paddw xmm1, [xmm_bi_rd GLOBAL] ;
|
||||
paddw xmm1, [GLOBAL(xmm_bi_rd)] ;
|
||||
psraw xmm1, xmm_filter_shift ;
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
|
@ -554,7 +554,7 @@ filter_block2d_bil_var_sse2_loop:
|
|||
pmullw xmm3, [rax+16] ;
|
||||
|
||||
paddw xmm1, xmm3 ;
|
||||
paddw xmm1, [xmm_bi_rd GLOBAL] ;
|
||||
paddw xmm1, [GLOBAL(xmm_bi_rd)] ;
|
||||
|
||||
psraw xmm1, xmm_filter_shift ;
|
||||
movdqa xmm3, xmm5 ;
|
||||
|
@ -565,7 +565,7 @@ filter_block2d_bil_var_sse2_loop:
|
|||
pmullw xmm1, [rdx+16] ;
|
||||
paddw xmm1, xmm3 ;
|
||||
|
||||
paddw xmm1, [xmm_bi_rd GLOBAL] ;
|
||||
paddw xmm1, [GLOBAL(xmm_bi_rd)] ;
|
||||
psraw xmm1, xmm_filter_shift ;
|
||||
|
||||
movq xmm3, QWORD PTR [rdi] ;
|
||||
|
|
|
@ -160,7 +160,7 @@
|
|||
ret
|
||||
%%exitGG:
|
||||
%undef GLOBAL
|
||||
%define GLOBAL + %1 wrt ..gotoff
|
||||
%define GLOBAL(x) x + %1 wrt ..gotoff
|
||||
%undef RESTORE_GOT
|
||||
%define RESTORE_GOT pop %1
|
||||
%endmacro
|
||||
|
@ -176,7 +176,7 @@
|
|||
ret
|
||||
%%exitGG:
|
||||
%undef GLOBAL
|
||||
%define GLOBAL + %1 - fake_got
|
||||
%define GLOBAL(x) x + %1 - fake_got
|
||||
%undef RESTORE_GOT
|
||||
%define RESTORE_GOT pop %1
|
||||
%endmacro
|
||||
|
@ -186,7 +186,7 @@
|
|||
%else
|
||||
%macro GET_GOT 1
|
||||
%endmacro
|
||||
%define GLOBAL wrt rip
|
||||
%define GLOBAL(x) rel x
|
||||
%ifidn __OUTPUT_FORMAT__,elf64
|
||||
%define WRT_PLT wrt ..plt
|
||||
%define HIDDEN_DATA(x) x:data hidden
|
||||
|
@ -197,7 +197,7 @@
|
|||
%ifnmacro GET_GOT
|
||||
%macro GET_GOT 1
|
||||
%endmacro
|
||||
%define GLOBAL
|
||||
%define GLOBAL(x) x
|
||||
%endif
|
||||
%ifndef RESTORE_GOT
|
||||
%define RESTORE_GOT
|
||||
|
|
Загрузка…
Ссылка в новой задаче