Merge "Fix dr memory VP8 encode/decode errors"

This commit is contained in:
Yunqing Wang 2014-04-22 16:09:43 -07:00 коммит произвёл Gerrit Code Review
Родитель cfd16a6890 1893122e34
Коммит 7f83a3bcdc
1 изменённых файлов: 56 добавлений и 23 удалений

Просмотреть файл

@ -365,6 +365,7 @@ sym(vp8_intra_pred_uv_tm_%1):
GET_GOT rbx
push rsi
push rdi
push rbx
; end prolog
; read top row
@ -395,8 +396,11 @@ sym(vp8_intra_pred_uv_tm_%1):
movsxd rcx, dword ptr arg(1) ;dst_stride
.vp8_intra_pred_uv_tm_%1_loop:
movd xmm3, [rsi]
movd xmm5, [rsi+rax]
mov bl, [rsi]
movd xmm3, ebx
mov bl, [rsi+rax]
movd xmm5, ebx
%ifidn %1, sse2
punpcklbw xmm3, xmm0
punpcklbw xmm5, xmm0
@ -419,6 +423,7 @@ sym(vp8_intra_pred_uv_tm_%1):
jnz .vp8_intra_pred_uv_tm_%1_loop
; begin epilog
pop rbx
pop rdi
pop rsi
RESTORE_GOT
@ -486,10 +491,8 @@ sym(vp8_intra_pred_uv_ho_%1):
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
%ifidn %1, ssse3
%ifndef GET_GOT_SAVE_ARG
push rbx
%endif
%ifidn %1, ssse3
GET_GOT rbx
%endif
; end prolog
@ -507,13 +510,16 @@ sym(vp8_intra_pred_uv_ho_%1):
%ifidn %1, ssse3
lea rdx, [rcx*3]
movdqa xmm2, [GLOBAL(dc_00001111)]
lea rbx, [rax*3]
%endif
%ifidn %1, mmx2
.vp8_intra_pred_uv_ho_%1_loop:
movd mm0, [rsi]
movd mm1, [rsi+rax]
mov bl, [rsi]
movd mm0, ebx
mov bl, [rsi+rax]
movd mm1, ebx
punpcklbw mm0, mm0
punpcklbw mm1, mm1
pshufw mm0, mm0, 0x0
@ -525,10 +531,19 @@ sym(vp8_intra_pred_uv_ho_%1):
dec edx
jnz .vp8_intra_pred_uv_ho_%1_loop
%else
movd xmm0, [rsi]
movd xmm3, [rsi+rax]
movd xmm1, [rsi+rax*2]
movd xmm4, [rsi+rbx]
mov bl, [rsi]
movd xmm0, ebx
mov bl, [rsi+rax]
movd xmm3, ebx
mov bl, [rsi+rax*2]
movd xmm1, ebx
lea rbx, [rax*3]
mov bl, [rsi+rbx]
movd xmm4, ebx
punpcklbw xmm0, xmm3
punpcklbw xmm1, xmm4
pshufb xmm0, xmm2
@ -539,10 +554,20 @@ sym(vp8_intra_pred_uv_ho_%1):
movhps [rdi+rdx], xmm1
lea rsi, [rsi+rax*4]
lea rdi, [rdi+rcx*4]
movd xmm0, [rsi]
movd xmm3, [rsi+rax]
movd xmm1, [rsi+rax*2]
movd xmm4, [rsi+rbx]
mov bl, [rsi]
movd xmm0, ebx
mov bl, [rsi+rax]
movd xmm3, ebx
mov bl, [rsi+rax*2]
movd xmm1, ebx
lea rbx, [rax*3]
mov bl, [rsi+rbx]
movd xmm4, ebx
punpcklbw xmm0, xmm3
punpcklbw xmm1, xmm4
pshufb xmm0, xmm2
@ -556,10 +581,8 @@ sym(vp8_intra_pred_uv_ho_%1):
; begin epilog
%ifidn %1, ssse3
RESTORE_GOT
%ifndef GET_GOT_SAVE_ARG
%endif
pop rbx
%endif
%endif
pop rdi
pop rsi
UNSHADOW_ARGS
@ -893,6 +916,7 @@ sym(vp8_intra_pred_y_tm_%1):
SAVE_XMM 7
push rsi
push rdi
push rbx
GET_GOT rbx
; end prolog
@ -926,8 +950,11 @@ sym(vp8_intra_pred_y_tm_%1):
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
vp8_intra_pred_y_tm_%1_loop:
movd xmm4, [rsi]
movd xmm5, [rsi+rax]
mov bl, [rsi]
movd xmm4, ebx
mov bl, [rsi+rax]
movd xmm5, ebx
%ifidn %1, sse2
punpcklbw xmm4, xmm0
punpcklbw xmm5, xmm0
@ -956,6 +983,7 @@ vp8_intra_pred_y_tm_%1_loop:
; begin epilog
RESTORE_GOT
pop rbx
pop rdi
pop rsi
RESTORE_XMM
@ -1029,6 +1057,7 @@ sym(vp8_intra_pred_y_ho_sse2):
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
push rbx
; end prolog
;arg(2) not used
@ -1041,8 +1070,11 @@ sym(vp8_intra_pred_y_ho_sse2):
movsxd rcx, dword ptr arg(1) ;dst_stride
vp8_intra_pred_y_ho_sse2_loop:
movd xmm0, [rsi]
movd xmm1, [rsi+rax]
mov bl, [rsi]
movd xmm0, ebx
mov bl, [rsi+rax]
movd xmm1, ebx
; FIXME use pshufb for ssse3 version
punpcklbw xmm0, xmm0
punpcklbw xmm1, xmm1
@ -1058,6 +1090,7 @@ vp8_intra_pred_y_ho_sse2_loop:
jnz vp8_intra_pred_y_ho_sse2_loop
; begin epilog
pop rbx
pop rdi
pop rsi
UNSHADOW_ARGS