2015-04-22 01:36:00 +03:00
|
|
|
/*
|
|
|
|
============================================================================
|
|
|
|
Name : MMIHelpers.h
|
|
|
|
Author : Heiher <r@hev.cc>
|
|
|
|
Version : 0.0.1
|
|
|
|
Copyright : Copyright (c) 2015 everyone.
|
|
|
|
Description : The helpers for x86 SSE to Loongson MMI.
|
|
|
|
============================================================================
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __MMI_HELPERS_H__
|
|
|
|
#define __MMI_HELPERS_H__
|
|
|
|
|
|
|
|
#define __mm_packxxxx(_f, _D, _d, _s, _t) \
|
|
|
|
# _f " %[" # _t "], %[" # _d "h], %[" # _s "h] \n\t" # _f " %[" # _D \
|
|
|
|
"l], %[" # _d "l], %[" # _s \
|
|
|
|
"l] \n\t" \
|
|
|
|
"punpckhwd %[" # _D "h], %[" # _D "l], %[" # _t \
|
|
|
|
"] \n\t" \
|
|
|
|
"punpcklwd %[" # _D "l], %[" # _D "l], %[" # _t "] \n\t"
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2015-04-22 01:36:00 +03:00
|
|
|
#define _mm_or(_D, _d, _s) \
|
|
|
|
"or %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"or %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2015-04-22 01:36:00 +03:00
|
|
|
#define _mm_xor(_D, _d, _s) \
|
|
|
|
"xor %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"xor %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2015-04-22 01:36:00 +03:00
|
|
|
#define _mm_and(_D, _d, _s) \
|
|
|
|
"and %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"and %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: pandn */
|
|
|
|
#define _mm_pandn(_D, _d, _s) \
|
|
|
|
"pandn %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"pandn %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: pshuflw */
|
|
|
|
#define _mm_pshuflh(_D, _d, _s) \
|
|
|
|
"mov.d %[" #_D "h], %[" #_d \
|
|
|
|
"h] \n\t" \
|
|
|
|
"pshufh %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
|
|
|
|
|
|
|
|
/* SSE: psllw (bits) */
|
|
|
|
#define _mm_psllh(_D, _d, _s) \
|
|
|
|
"psllh %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"] \n\t" \
|
|
|
|
"psllh %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
|
|
|
|
|
|
|
|
/* SSE: pslld (bits) */
|
|
|
|
#define _mm_psllw(_D, _d, _s) \
|
|
|
|
"psllw %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"] \n\t" \
|
|
|
|
"psllw %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
|
|
|
|
|
|
|
|
/* SSE: psllq (bits) */
|
|
|
|
#define _mm_pslld(_D, _d, _s) \
|
|
|
|
"dsll %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"] \n\t" \
|
|
|
|
"dsll %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
|
|
|
|
|
|
|
|
/* SSE: pslldq (bytes) */
|
|
|
|
#define _mm_psllq(_D, _d, _s, _s64, _tf) \
|
|
|
|
"subu %[" #_tf "], %[" #_s64 "], %[" #_s \
|
|
|
|
"] \n\t" \
|
|
|
|
"dsrl %[" #_tf "], %[" #_d "l], %[" #_tf \
|
|
|
|
"] \n\t" \
|
|
|
|
"dsll %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"] \n\t" \
|
|
|
|
"dsll %[" #_D "l], %[" #_d "l], %[" #_s \
|
|
|
|
"] \n\t" \
|
|
|
|
"or %[" #_D "h], %[" #_D "h], %[" #_tf "] \n\t"
|
|
|
|
|
|
|
|
/* SSE: psrlw (bits) */
|
|
|
|
#define _mm_psrlh(_D, _d, _s) \
|
|
|
|
"psrlh %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"] \n\t" \
|
|
|
|
"psrlh %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
|
|
|
|
|
|
|
|
/* SSE: psrld (bits) */
|
|
|
|
#define _mm_psrlw(_D, _d, _s) \
|
|
|
|
"psrlw %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"] \n\t" \
|
|
|
|
"psrlw %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
|
|
|
|
|
|
|
|
/* SSE: psrlq (bits) */
|
|
|
|
#define _mm_psrld(_D, _d, _s) \
|
|
|
|
"dsrl %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"] \n\t" \
|
|
|
|
"dsrl %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
|
|
|
|
|
|
|
|
/* SSE: psrldq (bytes) */
|
|
|
|
#define _mm_psrlq(_D, _d, _s, _s64, _tf) \
|
|
|
|
"subu %[" #_tf "], %[" #_s64 "], %[" #_s \
|
|
|
|
"] \n\t" \
|
|
|
|
"dsll %[" #_tf "], %[" #_d "h], %[" #_tf \
|
|
|
|
"] \n\t" \
|
|
|
|
"dsrl %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"] \n\t" \
|
|
|
|
"dsrl %[" #_D "l], %[" #_d "l], %[" #_s \
|
|
|
|
"] \n\t" \
|
|
|
|
"or %[" #_D "l], %[" #_D "l], %[" #_tf "] \n\t"
|
|
|
|
|
|
|
|
/* SSE: psrad */
|
|
|
|
#define _mm_psraw(_D, _d, _s) \
|
|
|
|
"psraw %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"] \n\t" \
|
|
|
|
"psraw %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
|
|
|
|
|
|
|
|
/* SSE: paddb */
|
|
|
|
#define _mm_paddb(_D, _d, _s) \
|
|
|
|
"paddb %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"paddb %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: paddw */
|
|
|
|
#define _mm_paddh(_D, _d, _s) \
|
|
|
|
"paddh %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"paddh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: paddd */
|
|
|
|
#define _mm_paddw(_D, _d, _s) \
|
|
|
|
"paddw %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"paddw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: paddq */
|
|
|
|
#define _mm_paddd(_D, _d, _s) \
|
|
|
|
"dadd %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"dadd %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: psubw */
|
|
|
|
#define _mm_psubh(_D, _d, _s) \
|
|
|
|
"psubh %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"psubh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: psubd */
|
|
|
|
#define _mm_psubw(_D, _d, _s) \
|
|
|
|
"psubw %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"psubw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: pmaxub */
|
|
|
|
#define _mm_pmaxub(_D, _d, _s) \
|
|
|
|
"pmaxub %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"pmaxub %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: pmullw */
|
|
|
|
#define _mm_pmullh(_D, _d, _s) \
|
|
|
|
"pmullh %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"pmullh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: pmulhw */
|
|
|
|
#define _mm_pmulhh(_D, _d, _s) \
|
|
|
|
"pmulhh %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"pmulhh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: pmuludq */
|
|
|
|
#define _mm_pmuluw(_D, _d, _s) \
|
|
|
|
"pmuluw %[" #_D "h], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"pmuluw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: packsswb */
|
|
|
|
#define _mm_packsshb(_D, _d, _s, _t) __mm_packxxxx(packsshb, _D, _d, _s, _t)
|
|
|
|
|
|
|
|
/* SSE: packssdw */
|
|
|
|
#define _mm_packsswh(_D, _d, _s, _t) __mm_packxxxx(packsswh, _D, _d, _s, _t)
|
|
|
|
|
|
|
|
/* SSE: packuswb */
|
|
|
|
#define _mm_packushb(_D, _d, _s, _t) __mm_packxxxx(packushb, _D, _d, _s, _t)
|
|
|
|
|
|
|
|
/* SSE: punpcklbw */
|
|
|
|
#define _mm_punpcklbh(_D, _d, _s) \
|
|
|
|
"punpckhbh %[" #_D "h], %[" #_d "l], %[" #_s \
|
|
|
|
"l] \n\t" \
|
|
|
|
"punpcklbh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: punpcklwd */
|
|
|
|
#define _mm_punpcklhw(_D, _d, _s) \
|
|
|
|
"punpckhhw %[" #_D "h], %[" #_d "l], %[" #_s \
|
|
|
|
"l] \n\t" \
|
|
|
|
"punpcklhw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: punpckldq */
|
|
|
|
#define _mm_punpcklwd(_D, _d, _s) \
|
|
|
|
"punpckhwd %[" #_D "h], %[" #_d "l], %[" #_s \
|
|
|
|
"l] \n\t" \
|
|
|
|
"punpcklwd %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: punpcklqdq */
|
|
|
|
#define _mm_punpckldq(_D, _d, _s) \
|
|
|
|
"mov.d %[" #_D "h], %[" #_s \
|
|
|
|
"l] \n\t" \
|
|
|
|
"mov.d %[" #_D "l], %[" #_d "l] \n\t"
|
|
|
|
|
|
|
|
/* SSE: punpckhbw */
|
|
|
|
#define _mm_punpckhbh(_D, _d, _s) \
|
|
|
|
"punpcklbh %[" #_D "l], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"punpckhbh %[" #_D "h], %[" #_d "h], %[" #_s "h] \n\t"
|
|
|
|
|
|
|
|
/* SSE: punpckhwd */
|
|
|
|
#define _mm_punpckhhw(_D, _d, _s) \
|
|
|
|
"punpcklhw %[" #_D "l], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"punpckhhw %[" #_D "h], %[" #_d "h], %[" #_s "h] \n\t"
|
|
|
|
|
|
|
|
/* SSE: punpckhdq */
|
|
|
|
#define _mm_punpckhwd(_D, _d, _s) \
|
|
|
|
"punpcklwd %[" #_D "l], %[" #_d "h], %[" #_s \
|
|
|
|
"h] \n\t" \
|
|
|
|
"punpckhwd %[" #_D "h], %[" #_d "h], %[" #_s "h] \n\t"
|
|
|
|
|
|
|
|
/* SSE: punpckhqdq */
|
|
|
|
#define _mm_punpckhdq(_D, _d, _s) \
|
|
|
|
"mov.d %[" #_D "l], %[" #_d \
|
|
|
|
"h] \n\t" \
|
|
|
|
"mov.d %[" #_D "h], %[" #_s "h] \n\t"
|
|
|
|
|
|
|
|
#endif /* __MMI_HELPERS_H__ */
|