Improve SSSE3 fast quantizer function

Simplified the EOB calculation in the function.

Change-Id: I7422f18be40ae270358f5cb0811d66e64436b56f
This commit is contained in:
Yunqing Wang 2011-12-29 12:05:50 -05:00
Родитель b510863f8f
Коммит 2b2c0c9bda
1 изменённых файлов: 20 добавлений и 22 удалений

Просмотреть файл

@ -80,6 +80,9 @@ sym(vp8_fast_quantize_b_ssse3):
mov rdi, [rsi + vp8_blockd_dequant] mov rdi, [rsi + vp8_blockd_dequant]
mov rcx, [rsi + vp8_blockd_dqcoeff] mov rcx, [rsi + vp8_blockd_dqcoeff]
movdqa xmm2, xmm1 ;store y for getting eob
movdqa xmm3, xmm5
pxor xmm1, xmm0 pxor xmm1, xmm0
pxor xmm5, xmm4 pxor xmm5, xmm4
psubw xmm1, xmm0 psubw xmm1, xmm0
@ -88,35 +91,30 @@ sym(vp8_fast_quantize_b_ssse3):
movdqa [rax], xmm1 movdqa [rax], xmm1
movdqa [rax + 16], xmm5 movdqa [rax + 16], xmm5
movdqa xmm2, [rdi] movdqa xmm0, [rdi]
movdqa xmm3, [rdi + 16] movdqa xmm4, [rdi + 16]
pxor xmm4, xmm4 pmullw xmm0, xmm1
pmullw xmm2, xmm1 pmullw xmm4, xmm5
pmullw xmm3, xmm5 pxor xmm1, xmm1
pcmpeqw xmm1, xmm4 ;non zero mask pcmpgtw xmm2, xmm1 ;calculate eob
pcmpeqw xmm5, xmm4 ;non zero mask pcmpgtw xmm3, xmm1
packsswb xmm1, xmm5 packsswb xmm2, xmm3
pshufb xmm1, [GLOBAL(zz_shuf)] pshufb xmm2, [GLOBAL(zz_shuf)]
pmovmskb edx, xmm1 pmovmskb edx, xmm2
xor rdi, rdi
mov eax, -1
xor dx, ax ;flip the bits for bsr
bsr eax, edx
movdqa [rcx], xmm2 ;store dqcoeff
movdqa [rcx + 16], xmm3 ;store dqcoeff
movdqa [rcx], xmm0 ;store dqcoeff
movdqa [rcx + 16], xmm4 ;store dqcoeff
mov rcx, [rsi + vp8_blockd_eob] mov rcx, [rsi + vp8_blockd_eob]
sub edi, edx ;check for all zeros in bit mask bsr eax, edx ;count 0
sar edi, 31 ;0 or -1
add eax, 1 add eax, 1
and eax, edi ;if the bit mask was all zero,
;then eob = 0 cmp edx, 0 ;if all 0, eob=0
cmove eax, edx
mov BYTE PTR [rcx], al ;store eob mov BYTE PTR [rcx], al ;store eob
; begin epilog ; begin epilog