Merge "SSSE3 Optimization for Atom processors using new instruction selection and ordering"

This commit is contained in:
Yunqing Wang 2014-12-08 13:34:53 -08:00 коммит произвёл Gerrit Code Review
Родитель c38d0490b3 8f9d94ec17
Коммит cddbdeabd0
1 изменённых файлов: 30 добавлений и 20 удалений

Просмотреть файл

@ -765,40 +765,50 @@ sym(vp9_filter_block1d16_v8_avg_ssse3):
movq xmm0, [rsi - 3] ;load src data
movq xmm4, [rsi + 5]
movq xmm7, [rsi + 13]
movq xmm6, [rsi + 13]
punpcklqdq xmm0, xmm4
punpcklqdq xmm4, xmm7
punpcklqdq xmm4, xmm6
movdqa xmm7, xmm0
punpcklbw xmm7, xmm7
punpckhbw xmm0, xmm0
movdqa xmm1, xmm0
movdqa xmm2, xmm0
movdqa xmm3, xmm0
palignr xmm0, xmm7, 1
palignr xmm1, xmm7, 5
pmaddubsw xmm0, k0k1
palignr xmm2, xmm7, 9
pmaddubsw xmm1, k2k3
palignr xmm3, xmm7, 13
pmaddubsw xmm2, k4k5
pmaddubsw xmm3, k6k7
paddsw xmm0, xmm3
movdqa xmm3, xmm4
punpcklbw xmm3, xmm3
punpckhbw xmm4, xmm4
movdqa xmm5, xmm4
movdqa xmm6, xmm4
movdqa xmm7, xmm4
pshufb xmm0, [GLOBAL(shuf_t0t1)]
pshufb xmm1, [GLOBAL(shuf_t2t3)]
pshufb xmm2, [GLOBAL(shuf_t4t5)]
pshufb xmm3, [GLOBAL(shuf_t6t7)]
pshufb xmm4, [GLOBAL(shuf_t0t1)]
pshufb xmm5, [GLOBAL(shuf_t2t3)]
pshufb xmm6, [GLOBAL(shuf_t4t5)]
pshufb xmm7, [GLOBAL(shuf_t6t7)]
palignr xmm4, xmm3, 1
palignr xmm5, xmm3, 5
palignr xmm6, xmm3, 9
palignr xmm7, xmm3, 13
pmaddubsw xmm0, k0k1
pmaddubsw xmm1, k2k3
pmaddubsw xmm2, k4k5
pmaddubsw xmm3, k6k7
pmaddubsw xmm4, k0k1
pmaddubsw xmm5, k2k3
pmaddubsw xmm6, k4k5
pmaddubsw xmm7, k6k7
paddsw xmm0, xmm3
movdqa xmm3, xmm1
pmaddubsw xmm4, k0k1
pmaxsw xmm1, xmm2
pmaddubsw xmm5, k2k3
pminsw xmm2, xmm3
pmaddubsw xmm6, k4k5
paddsw xmm0, xmm2
pmaddubsw xmm7, k6k7
paddsw xmm0, xmm1
paddsw xmm4, xmm7