summaryrefslogtreecommitdiffstats
path: root/clang/lib/Headers/avx512fintrin.h
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Headers/avx512fintrin.h')
-rw-r--r--clang/lib/Headers/avx512fintrin.h1239
1 files changed, 665 insertions, 574 deletions
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index fc030d87a07..5bfe39ec792 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -2577,819 +2577,910 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__mmask8)-1, (int)(R)); })
#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), (__mmask8)-1, \
- (int)(R)); })
+ (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), (int)(R)); })
#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(A)); })
#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(C)); })
#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)_mm512_setzero_pd()); })
#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)-1, (int)(R)); })
+ (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)); })
#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(A)); })
#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)_mm512_setzero_pd()); })
#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), (__mmask8)-1, \
- (int)(R)); })
+ (__m512d)__builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), (int)(R)); })
#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(C)); })
#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)_mm512_setzero_pd()); })
#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)-1, (int)(R)); })
+ (__m512d)__builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)); })
#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)_mm512_setzero_pd()); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __A);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __C);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) _mm512_setzero_pd());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __A);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) _mm512_setzero_pd());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __C);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) _mm512_setzero_pd());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) _mm512_setzero_pd());
}
#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), (__mmask16)-1, \
- (int)(R)); })
+ (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), (int)(R)); })
#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(A)); })
#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(C)); })
#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)_mm512_setzero_ps()); })
#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)-1, (int)(R)); })
+ (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)); })
#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(A)); })
#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)_mm512_setzero_ps()); })
#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), (__mmask16)-1, \
- (int)(R)); })
+ (__m512)__builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), (int)(R)); })
#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(C)); })
#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)_mm512_setzero_ps()); })
#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)-1, (int)(R)); })
+ (__m512)__builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)); })
#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)_mm512_setzero_ps()); })
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION), \
+ (__v16sf) __A);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION), \
+ (__v16sf) __C);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) _mm512_setzero_ps());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __A);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) _mm512_setzero_ps());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __C);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) _mm512_setzero_ps());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) _mm512_setzero_ps());
}
#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)-1, (int)(R)); })
+ (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)); })
#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(A)); })
#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(C)); })
#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)_mm512_setzero_pd()); })
#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)-1, (int)(R)); })
+ (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)); })
#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(A)); })
#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)_mm512_setzero_pd()); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __A);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __C);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) _mm512_setzero_pd());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __A);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) _mm512_setzero_pd());
}
#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)-1, (int)(R)); })
+ (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)); })
#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(A)); })
#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(C)); })
#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)_mm512_setzero_ps()); })
#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)-1, (int)(R)); })
+ (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)); })
#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(A)); })
#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)_mm512_setzero_ps()); })
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION), \
+ (__v16sf) __A);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION), \
+ (__v16sf) __C);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) _mm512_setzero_ps());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __A);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) _mm512_setzero_ps());
}
#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(C)); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ (__m512d)__builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __C);
}
#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
-
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(C)); })
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ (__m512)__builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __C);
}
#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(C)); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ (__m512d)__builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __C);
}
#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(C)); })
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ (__m512)__builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __C);
}
#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(A)); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __A);
}
#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(A)); })
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __A);
}
#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ -(__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(A)); })
#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ -(__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(C)); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask16) __U,
+ (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ -(__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __A);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask16) __U,
+ (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ -(__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __C);
}
#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__m512)__builtin_ia32_vfmaddps512 ((__v16sf)(__m512)(A), \
+ -(__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(A)); })
#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__m512)__builtin_ia32_vfmaddps512 ((__v16sf)(__m512)(A), \
+ -(__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(C)); })
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ -(__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __A);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ -(__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __C);
}
@@ -8112,27 +8203,27 @@ _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- (__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
- (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
- (__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), (__mmask8)(U), \
- (int)(R)); })
+ (__m128d)__builtin_ia32_vfmaddss3_mask((__v2df)(__m128d)(W), \
+ (__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (__mmask8)(U), \
+ (int)(R)); })
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
@@ -8144,11 +8235,11 @@ _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
{
- return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
- (__v4sf) __X,
- (__v4sf) __Y,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ (__v4sf) __X,
+ (__v4sf) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
@@ -8160,27 +8251,27 @@ _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- (__v4sf) __A,
- -(__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ (__v4sf) __A,
+ -(__v4sf) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
(__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
(__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), (__mmask8)(U), \
+ -(__v4sf)(__m128)(B), (__mmask8)(U), \
(int)(R)); })
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
- (__v4sf) __B,
- -(__v4sf) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
@@ -8192,11 +8283,11 @@ _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
{
- return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
- (__v4sf) __X,
- (__v4sf) __Y,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ (__v4sf) __X,
+ -(__v4sf) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
@@ -8208,11 +8299,11 @@ _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- -(__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
@@ -8224,43 +8315,43 @@ _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
+ -(__v4sf) __B,
+ (__v4sf) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
- (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
+ (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
+ -(__v4sf)(__m128)(B), \
(__v4sf)(__m128)(C), (__mmask8)(U), \
(int)(R)); })
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
{
- return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
- (__v4sf) __X,
- (__v4sf) __Y,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ -(__v4sf) __X,
+ (__v4sf) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
- (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
- (__v4sf)(__m128)(X), \
+ (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
+ -(__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), (__mmask8)(U), \
(int)(R)); })
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- -(__v4sf) __A,
- -(__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ -(__v4sf) __A,
+ -(__v4sf) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
@@ -8272,43 +8363,43 @@ _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
- (__v4sf) __B,
- -(__v4sf) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
+ -(__v4sf) __B,
+ -(__v4sf) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
- (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
+ (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
+ -(__v4sf)(__m128)(B), \
-(__v4sf)(__m128)(C), (__mmask8)(U), \
_MM_FROUND_CUR_DIRECTION); })
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
{
- return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W,
- (__v4sf) __X,
- (__v4sf) __Y,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ -(__v4sf) __X,
+ -(__v4sf) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
- (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \
- (__v4sf)(__m128)(X), \
+ (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
+ -(__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), (__mmask8)(U), \
(int)(R)); })
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
- (__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
@@ -8320,11 +8411,11 @@ _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
@@ -8336,11 +8427,11 @@ _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
- (__v2df) __X,
- (__v2df) __Y,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ (__v2df) __X,
+ (__v2df) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
@@ -8352,11 +8443,11 @@ _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
- (__v2df) __A,
- -(__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ (__v2df) __A,
+ -(__v2df) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
@@ -8368,11 +8459,11 @@ _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
- (__v2df) __B,
- -(__v2df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
@@ -8384,11 +8475,11 @@ _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
{
- return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
- (__v2df) __X,
- (__v2df) __Y,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ (__v2df) __X,
+ -(__v2df) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
@@ -8400,11 +8491,11 @@ _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
- -(__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
@@ -8416,43 +8507,43 @@ _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
+ -(__v2df) __B,
+ (__v2df) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
- (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
+ (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
+ -(__v2df)(__m128d)(B), \
(__v2df)(__m128d)(C), (__mmask8)(U), \
(int)(R)); })
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
- (__v2df) __X,
- (__v2df) __Y,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ -(__v2df) __X,
+ (__v2df) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
- (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
- (__v2df)(__m128d)(X), \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
+ -(__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), (__mmask8)(U), \
(int)(R)); })
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
- -(__v2df) __A,
- -(__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ -(__v2df) __A,
+ -(__v2df) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
@@ -8464,16 +8555,16 @@ _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
- (__v2df) __B,
- -(__v2df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
+ -(__v2df) __B,
+ -(__v2df) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
- (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
+ (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
+ -(__v2df)(__m128d)(B), \
-(__v2df)(__m128d)(C), \
(__mmask8)(U), \
_MM_FROUND_CUR_DIRECTION); })
@@ -8481,16 +8572,16 @@ _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
{
- return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W),
- (__v2df) __X,
- (__v2df) (__Y),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ -(__v2df) __X,
+ -(__v2df) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
- (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \
- (__v2df)(__m128d)(X), \
+ (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
+ -(__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), \
(__mmask8)(U), (int)(R)); })
OpenPOWER on IntegriCloud