63 lines
1.4 KiB
C
63 lines
1.4 KiB
C
#ifndef _SSE_MATH_FUN_
|
|
#define _SSE_MATH_FUN_
|
|
|
|
#include <xmmintrin.h>
|
|
|
|
|
|
|
|
/* yes I know, the top of this file is quite ugly */
|
|
|
|
#ifdef _MSC_VER /* visual c++ */
|
|
# define ALIGN16_BEG __declspec(align(16))
|
|
# define ALIGN16_END
|
|
#else /* gcc or icc */
|
|
# define ALIGN16_BEG
|
|
# define ALIGN16_END __attribute__((aligned(16)))
|
|
#endif
|
|
|
|
/* __m128 is ugly to write */
|
|
typedef __m128 v4sf; // vector of 4 float (sse1)
|
|
|
|
#ifdef USE_SSE2
|
|
# include <emmintrin.h>
|
|
typedef __m128i v4si; // vector of 4 int (sse2)
|
|
#else
|
|
typedef __m64 v2si; // vector of 2 int (mmx)
|
|
#endif
|
|
|
|
/* natural logarithm computed for 4 simultaneous float
|
|
return NaN for x <= 0
|
|
*/
|
|
v4sf log_ps(v4sf x);
|
|
|
|
#ifndef USE_SSE2
|
|
typedef union xmm_mm_union {
|
|
__m128 xmm;
|
|
__m64 mm[2];
|
|
} xmm_mm_union;
|
|
|
|
#define COPY_XMM_TO_MM(xmm_, mm0_, mm1_) { \
|
|
xmm_mm_union u; u.xmm = xmm_; \
|
|
mm0_ = u.mm[0]; \
|
|
mm1_ = u.mm[1]; \
|
|
}
|
|
|
|
#define COPY_MM_TO_XMM(mm0_, mm1_, xmm_) { \
|
|
xmm_mm_union u; u.mm[0]=mm0_; u.mm[1]=mm1_; xmm_ = u.xmm; \
|
|
}
|
|
|
|
#endif // USE_SSE2
|
|
|
|
|
|
v4sf exp_ps(v4sf x);
|
|
|
|
v4sf sin_ps(v4sf x);
|
|
|
|
/* almost the same as sin_ps */
|
|
v4sf cos_ps(v4sf x);
|
|
|
|
/* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them..
|
|
it is almost as fast, and gives you a free cosine with your sine */
|
|
void sincos_ps(v4sf x, v4sf *s, v4sf *c);
|
|
|
|
#endif |