nasm: match instruction length (movd/movq) to parameters
nasm requires the instruction length (movd/movq) to match to its parameters. I find it more clear to really use 64bit instructions when we use 64bit registers in the assembly. Provide nasm compatibility. No binary change by this patch with yasm on {x86_64,i686}-fedora13-linux-gnu. Few longer opcodes with nasm on {x86_64,i686}-fedora13-linux-gnu have been checked as safe. Change-Id: Id9b1a5cdfb1bc05697e523c317a296df43d42a91
This commit is contained in:
Родитель
2d4ef37507
Коммит
e114f699f6
|
@ -69,7 +69,7 @@ sym(vp8_short_inv_walsh4x4_mmx):
|
|||
movq mm2, [rsi + 16] ;ip[8]
|
||||
movq mm3, [rsi + 24] ;ip[12]
|
||||
|
||||
movd mm7, rax
|
||||
movq mm7, rax
|
||||
movq mm4, mm0
|
||||
|
||||
punpcklwd mm7, mm7 ;0003000300030003h
|
||||
|
|
|
@ -288,7 +288,7 @@ sym(vp8_dequant_dc_idct_add_mmx):
|
|||
psrlq mm0, 16
|
||||
movzx rcx, word ptr arg(6) ;Dc
|
||||
psllq mm0, 16
|
||||
movd mm7, rcx
|
||||
movq mm7, rcx
|
||||
por mm0, mm7
|
||||
|
||||
movsxd rax, dword ptr arg(4) ;pitch
|
||||
|
|
|
@ -50,7 +50,7 @@ sym(vp8_block_error_xmm):
|
|||
psrldq xmm0, 8
|
||||
paddd xmm0, xmm3
|
||||
|
||||
movd rax, xmm0
|
||||
movq rax, xmm0
|
||||
|
||||
pop rdi
|
||||
pop rsi
|
||||
|
@ -115,7 +115,7 @@ sym(vp8_block_error_mmx):
|
|||
psrlq mm1, 32
|
||||
paddd mm0, mm1
|
||||
|
||||
movd rax, mm0
|
||||
movq rax, mm0
|
||||
|
||||
pop rdi
|
||||
pop rsi
|
||||
|
@ -192,7 +192,7 @@ mberror_loop_mmx:
|
|||
psrlq mm2, 32
|
||||
|
||||
paddd mm0, mm2
|
||||
movd rax, mm0
|
||||
movq rax, mm0
|
||||
|
||||
pop rdi
|
||||
pop rsi
|
||||
|
@ -260,7 +260,7 @@ mberror_loop:
|
|||
psrldq xmm0, 8
|
||||
|
||||
paddd xmm0, xmm1
|
||||
movd rax, xmm0
|
||||
movq rax, xmm0
|
||||
|
||||
pop rdi
|
||||
pop rsi
|
||||
|
@ -317,7 +317,7 @@ mbuverror_loop_mmx:
|
|||
psrlq mm7, 32
|
||||
|
||||
paddd mm0, mm7
|
||||
movd rax, mm0
|
||||
movq rax, mm0
|
||||
|
||||
pop rdi
|
||||
pop rsi
|
||||
|
@ -374,7 +374,7 @@ mbuverror_loop:
|
|||
psrldq xmm1, 8
|
||||
paddd xmm1, xmm2
|
||||
|
||||
movd rax, xmm1
|
||||
movq rax, xmm1
|
||||
|
||||
pop rdi
|
||||
pop rsi
|
||||
|
|
|
@ -249,7 +249,7 @@ sym(vp8_fast_quantize_b_impl_mmx):
|
|||
paddd mm0, mm5
|
||||
|
||||
; eob adjustment begins here
|
||||
movd rcx, mm0
|
||||
movq rcx, mm0
|
||||
and rcx, 0xffff
|
||||
|
||||
xor rdx, rdx
|
||||
|
@ -262,7 +262,7 @@ sym(vp8_fast_quantize_b_impl_mmx):
|
|||
and rax, rdx
|
||||
; Substitute the sse assembly for the old mmx mixed assembly/C. The
|
||||
; following is kept as reference
|
||||
; movd rcx, mm0
|
||||
; movq rcx, mm0
|
||||
; bsr rax, rcx
|
||||
;
|
||||
; mov eob, rax
|
||||
|
@ -418,7 +418,7 @@ sym(vp8_fast_quantize_b_impl_sse):
|
|||
psrldq xmm0, 4
|
||||
paddd xmm1, xmm0
|
||||
|
||||
movd rcx, xmm1
|
||||
movq rcx, xmm1
|
||||
and rcx, 0xffff
|
||||
|
||||
xor rdx, rdx
|
||||
|
|
|
@ -100,7 +100,7 @@ x16x16sad_mmx_loop:
|
|||
psrlq mm0, 32
|
||||
paddw mm7, mm0
|
||||
|
||||
movd rax, mm7
|
||||
movq rax, mm7
|
||||
|
||||
pop rdi
|
||||
pop rsi
|
||||
|
@ -172,7 +172,7 @@ x8x16sad_mmx_loop:
|
|||
psrlq mm0, 32
|
||||
|
||||
paddw mm7, mm0
|
||||
movd rax, mm7
|
||||
movq rax, mm7
|
||||
|
||||
pop rdi
|
||||
pop rsi
|
||||
|
@ -242,7 +242,7 @@ x8x8sad_mmx_loop:
|
|||
psrlq mm0, 32
|
||||
|
||||
paddw mm7, mm0
|
||||
movd rax, mm7
|
||||
movq rax, mm7
|
||||
|
||||
pop rdi
|
||||
pop rsi
|
||||
|
@ -331,7 +331,7 @@ sym(vp8_sad4x4_mmx):
|
|||
psrlq mm0, 32
|
||||
paddw mm0, mm1
|
||||
|
||||
movd rax, mm0
|
||||
movq rax, mm0
|
||||
|
||||
pop rdi
|
||||
pop rsi
|
||||
|
@ -418,7 +418,7 @@ x16x8sad_mmx_loop:
|
|||
psrlq mm0, 32
|
||||
|
||||
paddw mm7, mm0
|
||||
movd rax, mm7
|
||||
movq rax, mm7
|
||||
|
||||
pop rdi
|
||||
pop rsi
|
||||
|
|
|
@ -75,7 +75,7 @@ x16x16sad_wmt_loop:
|
|||
psrldq xmm7, 8
|
||||
|
||||
paddw xmm0, xmm7
|
||||
movd rax, xmm0
|
||||
movq rax, xmm0
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
|
@ -113,7 +113,7 @@ sym(vp8_sad8x16_wmt):
|
|||
|
||||
x8x16sad_wmt_loop:
|
||||
|
||||
movd rax, mm7
|
||||
movq rax, mm7
|
||||
cmp rax, arg(4)
|
||||
jg x8x16sad_wmt_early_exit
|
||||
|
||||
|
@ -135,7 +135,7 @@ x8x16sad_wmt_loop:
|
|||
cmp rsi, rcx
|
||||
jne x8x16sad_wmt_loop
|
||||
|
||||
movd rax, mm7
|
||||
movq rax, mm7
|
||||
|
||||
x8x16sad_wmt_early_exit:
|
||||
|
||||
|
@ -174,7 +174,7 @@ sym(vp8_sad8x8_wmt):
|
|||
|
||||
x8x8sad_wmt_loop:
|
||||
|
||||
movd rax, mm7
|
||||
movq rax, mm7
|
||||
cmp rax, arg(4)
|
||||
jg x8x8sad_wmt_early_exit
|
||||
|
||||
|
@ -190,7 +190,7 @@ x8x8sad_wmt_loop:
|
|||
cmp rsi, rcx
|
||||
jne x8x8sad_wmt_loop
|
||||
|
||||
movd rax, mm7
|
||||
movq rax, mm7
|
||||
x8x8sad_wmt_early_exit:
|
||||
|
||||
; begin epilog
|
||||
|
@ -246,7 +246,7 @@ sym(vp8_sad4x4_wmt):
|
|||
psadbw mm4, mm5
|
||||
|
||||
paddw mm0, mm4
|
||||
movd rax, mm0
|
||||
movq rax, mm0
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
|
@ -283,7 +283,7 @@ sym(vp8_sad16x8_wmt):
|
|||
|
||||
x16x8sad_wmt_loop:
|
||||
|
||||
movd rax, mm7
|
||||
movq rax, mm7
|
||||
cmp rax, arg(4)
|
||||
jg x16x8sad_wmt_early_exit
|
||||
|
||||
|
@ -317,7 +317,7 @@ x16x8sad_wmt_loop:
|
|||
cmp rsi, rcx
|
||||
jne x16x8sad_wmt_loop
|
||||
|
||||
movd rax, mm7
|
||||
movq rax, mm7
|
||||
|
||||
x16x8sad_wmt_early_exit:
|
||||
|
||||
|
|
|
@ -530,7 +530,7 @@ sym(vp8_sad16x16_sse3):
|
|||
|
||||
vp8_sad16x16_sse3_loop:
|
||||
|
||||
movd rax, mm7
|
||||
movq rax, mm7
|
||||
cmp rax, arg(4)
|
||||
jg vp8_sad16x16_early_exit
|
||||
|
||||
|
@ -564,7 +564,7 @@ vp8_sad16x16_sse3_loop:
|
|||
cmp rsi, rcx
|
||||
jne vp8_sad16x16_sse3_loop
|
||||
|
||||
movd rax, mm7
|
||||
movq rax, mm7
|
||||
|
||||
vp8_sad16x16_early_exit:
|
||||
|
||||
|
|
|
@ -498,7 +498,7 @@ sym(vp8_get4x4sse_cs_mmx):
|
|||
psrlq mm7, 32
|
||||
|
||||
paddd mm0, mm7
|
||||
movd rax, mm0
|
||||
movq rax, mm0
|
||||
|
||||
|
||||
; begin epilog
|
||||
|
|
|
@ -58,7 +58,7 @@ NEXTROW:
|
|||
movdqa xmm3,xmm4
|
||||
psrldq xmm4,4
|
||||
paddd xmm4,xmm3
|
||||
movd rax,xmm4
|
||||
movq rax,xmm4
|
||||
|
||||
|
||||
; begin epilog
|
||||
|
@ -471,7 +471,7 @@ sym(vp8_get8x8var_sse2):
|
|||
mov rax, arg(5) ;[Sum]
|
||||
mov rdi, arg(4) ;[SSE]
|
||||
|
||||
movd rdx, xmm7
|
||||
movq rdx, xmm7
|
||||
movsx rcx, dx
|
||||
|
||||
mov dword ptr [rax], ecx
|
||||
|
|
|
@ -36,6 +36,43 @@
|
|||
%define rsp esp
|
||||
%define rbp ebp
|
||||
%define movsxd mov
|
||||
%macro movq 2
|
||||
%ifidn %1,eax
|
||||
movd %1,%2
|
||||
%elifidn %2,eax
|
||||
movd %1,%2
|
||||
%elifidn %1,ebx
|
||||
movd %1,%2
|
||||
%elifidn %2,ebx
|
||||
movd %1,%2
|
||||
%elifidn %1,ecx
|
||||
movd %1,%2
|
||||
%elifidn %2,ecx
|
||||
movd %1,%2
|
||||
%elifidn %1,edx
|
||||
movd %1,%2
|
||||
%elifidn %2,edx
|
||||
movd %1,%2
|
||||
%elifidn %1,esi
|
||||
movd %1,%2
|
||||
%elifidn %2,esi
|
||||
movd %1,%2
|
||||
%elifidn %1,edi
|
||||
movd %1,%2
|
||||
%elifidn %2,edi
|
||||
movd %1,%2
|
||||
%elifidn %1,esp
|
||||
movd %1,%2
|
||||
%elifidn %2,esp
|
||||
movd %1,%2
|
||||
%elifidn %1,ebp
|
||||
movd %1,%2
|
||||
%elifidn %2,ebp
|
||||
movd %1,%2
|
||||
%else
|
||||
movq %1,%2
|
||||
%endif
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче