nasm: match instruction length (movd/movq) to parameters

nasm requires the instruction length (movd/movq) to match to its
parameters. I find it more clear to really use 64bit instructions when
we use 64bit registers in the assembly.

Provide nasm compatibility. No binary change by this patch with yasm on
{x86_64,i686}-fedora13-linux-gnu. Few longer opcodes with nasm on
{x86_64,i686}-fedora13-linux-gnu have been checked as safe.

Change-Id: Id9b1a5cdfb1bc05697e523c317a296df43d42a91
This commit is contained in:
Jan Kratochvil 2010-10-04 23:19:33 +02:00
Родитель 2d4ef37507
Коммит e114f699f6
10 изменённых файлов: 66 добавлений и 29 удалений

Просмотреть файл

@ -69,7 +69,7 @@ sym(vp8_short_inv_walsh4x4_mmx):
movq mm2, [rsi + 16] ;ip[8]
movq mm3, [rsi + 24] ;ip[12]
movd mm7, rax
movq mm7, rax
movq mm4, mm0
punpcklwd mm7, mm7 ;0003000300030003h

Просмотреть файл

@ -288,7 +288,7 @@ sym(vp8_dequant_dc_idct_add_mmx):
psrlq mm0, 16
movzx rcx, word ptr arg(6) ;Dc
psllq mm0, 16
movd mm7, rcx
movq mm7, rcx
por mm0, mm7
movsxd rax, dword ptr arg(4) ;pitch

Просмотреть файл

@ -50,7 +50,7 @@ sym(vp8_block_error_xmm):
psrldq xmm0, 8
paddd xmm0, xmm3
movd rax, xmm0
movq rax, xmm0
pop rdi
pop rsi
@ -115,7 +115,7 @@ sym(vp8_block_error_mmx):
psrlq mm1, 32
paddd mm0, mm1
movd rax, mm0
movq rax, mm0
pop rdi
pop rsi
@ -192,7 +192,7 @@ mberror_loop_mmx:
psrlq mm2, 32
paddd mm0, mm2
movd rax, mm0
movq rax, mm0
pop rdi
pop rsi
@ -260,7 +260,7 @@ mberror_loop:
psrldq xmm0, 8
paddd xmm0, xmm1
movd rax, xmm0
movq rax, xmm0
pop rdi
pop rsi
@ -317,7 +317,7 @@ mbuverror_loop_mmx:
psrlq mm7, 32
paddd mm0, mm7
movd rax, mm0
movq rax, mm0
pop rdi
pop rsi
@ -374,7 +374,7 @@ mbuverror_loop:
psrldq xmm1, 8
paddd xmm1, xmm2
movd rax, xmm1
movq rax, xmm1
pop rdi
pop rsi

Просмотреть файл

@ -249,7 +249,7 @@ sym(vp8_fast_quantize_b_impl_mmx):
paddd mm0, mm5
; eob adjustment begins here
movd rcx, mm0
movq rcx, mm0
and rcx, 0xffff
xor rdx, rdx
@ -262,7 +262,7 @@ sym(vp8_fast_quantize_b_impl_mmx):
and rax, rdx
; Substitute the sse assembly for the old mmx mixed assembly/C. The
; following is kept as reference
; movd rcx, mm0
; movq rcx, mm0
; bsr rax, rcx
;
; mov eob, rax
@ -418,7 +418,7 @@ sym(vp8_fast_quantize_b_impl_sse):
psrldq xmm0, 4
paddd xmm1, xmm0
movd rcx, xmm1
movq rcx, xmm1
and rcx, 0xffff
xor rdx, rdx

Просмотреть файл

@ -100,7 +100,7 @@ x16x16sad_mmx_loop:
psrlq mm0, 32
paddw mm7, mm0
movd rax, mm7
movq rax, mm7
pop rdi
pop rsi
@ -172,7 +172,7 @@ x8x16sad_mmx_loop:
psrlq mm0, 32
paddw mm7, mm0
movd rax, mm7
movq rax, mm7
pop rdi
pop rsi
@ -242,7 +242,7 @@ x8x8sad_mmx_loop:
psrlq mm0, 32
paddw mm7, mm0
movd rax, mm7
movq rax, mm7
pop rdi
pop rsi
@ -331,7 +331,7 @@ sym(vp8_sad4x4_mmx):
psrlq mm0, 32
paddw mm0, mm1
movd rax, mm0
movq rax, mm0
pop rdi
pop rsi
@ -418,7 +418,7 @@ x16x8sad_mmx_loop:
psrlq mm0, 32
paddw mm7, mm0
movd rax, mm7
movq rax, mm7
pop rdi
pop rsi

Просмотреть файл

@ -75,7 +75,7 @@ x16x16sad_wmt_loop:
psrldq xmm7, 8
paddw xmm0, xmm7
movd rax, xmm0
movq rax, xmm0
; begin epilog
pop rdi
@ -113,7 +113,7 @@ sym(vp8_sad8x16_wmt):
x8x16sad_wmt_loop:
movd rax, mm7
movq rax, mm7
cmp rax, arg(4)
jg x8x16sad_wmt_early_exit
@ -135,7 +135,7 @@ x8x16sad_wmt_loop:
cmp rsi, rcx
jne x8x16sad_wmt_loop
movd rax, mm7
movq rax, mm7
x8x16sad_wmt_early_exit:
@ -174,7 +174,7 @@ sym(vp8_sad8x8_wmt):
x8x8sad_wmt_loop:
movd rax, mm7
movq rax, mm7
cmp rax, arg(4)
jg x8x8sad_wmt_early_exit
@ -190,7 +190,7 @@ x8x8sad_wmt_loop:
cmp rsi, rcx
jne x8x8sad_wmt_loop
movd rax, mm7
movq rax, mm7
x8x8sad_wmt_early_exit:
; begin epilog
@ -246,7 +246,7 @@ sym(vp8_sad4x4_wmt):
psadbw mm4, mm5
paddw mm0, mm4
movd rax, mm0
movq rax, mm0
; begin epilog
pop rdi
@ -283,7 +283,7 @@ sym(vp8_sad16x8_wmt):
x16x8sad_wmt_loop:
movd rax, mm7
movq rax, mm7
cmp rax, arg(4)
jg x16x8sad_wmt_early_exit
@ -317,7 +317,7 @@ x16x8sad_wmt_loop:
cmp rsi, rcx
jne x16x8sad_wmt_loop
movd rax, mm7
movq rax, mm7
x16x8sad_wmt_early_exit:

Просмотреть файл

@ -530,7 +530,7 @@ sym(vp8_sad16x16_sse3):
vp8_sad16x16_sse3_loop:
movd rax, mm7
movq rax, mm7
cmp rax, arg(4)
jg vp8_sad16x16_early_exit
@ -564,7 +564,7 @@ vp8_sad16x16_sse3_loop:
cmp rsi, rcx
jne vp8_sad16x16_sse3_loop
movd rax, mm7
movq rax, mm7
vp8_sad16x16_early_exit:

Просмотреть файл

@ -498,7 +498,7 @@ sym(vp8_get4x4sse_cs_mmx):
psrlq mm7, 32
paddd mm0, mm7
movd rax, mm0
movq rax, mm0
; begin epilog

Просмотреть файл

@ -58,7 +58,7 @@ NEXTROW:
movdqa xmm3,xmm4
psrldq xmm4,4
paddd xmm4,xmm3
movd rax,xmm4
movq rax,xmm4
; begin epilog
@ -471,7 +471,7 @@ sym(vp8_get8x8var_sse2):
mov rax, arg(5) ;[Sum]
mov rdi, arg(4) ;[SSE]
movd rdx, xmm7
movq rdx, xmm7
movsx rcx, dx
mov dword ptr [rax], ecx

Просмотреть файл

@ -36,6 +36,43 @@
%define rsp esp
%define rbp ebp
%define movsxd mov
%macro movq 2
%ifidn %1,eax
movd %1,%2
%elifidn %2,eax
movd %1,%2
%elifidn %1,ebx
movd %1,%2
%elifidn %2,ebx
movd %1,%2
%elifidn %1,ecx
movd %1,%2
%elifidn %2,ecx
movd %1,%2
%elifidn %1,edx
movd %1,%2
%elifidn %2,edx
movd %1,%2
%elifidn %1,esi
movd %1,%2
%elifidn %2,esi
movd %1,%2
%elifidn %1,edi
movd %1,%2
%elifidn %2,edi
movd %1,%2
%elifidn %1,esp
movd %1,%2
%elifidn %2,esp
movd %1,%2
%elifidn %1,ebp
movd %1,%2
%elifidn %2,ebp
movd %1,%2
%else
movq %1,%2
%endif
%endmacro
%endif