summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-11-25 19:32:12 +0000
committerCraig Topper <craig.topper@intel.com>2017-11-25 19:32:12 +0000
commit9e032ed55aae8cfd0e72b3b73dbf2f0402a27435 (patch)
treeb318c3ab030a8f02bafd04921720234292180550 /clang/lib
parente485631cd148e18701254634a3003e6bb5797eb2 (diff)
downloadbcm5719-llvm-9e032ed55aae8cfd0e72b3b73dbf2f0402a27435.tar.gz
bcm5719-llvm-9e032ed55aae8cfd0e72b3b73dbf2f0402a27435.zip
[X86] Use separate builtins for fma4 scalar intrinsics. Use negations to remove some of the scalar fma3 builtins.
fma4 instructions zero the upper bits of the xmm register. fma3 instructions leave the bits unmodified. This requires separate builtins for the different semantics. While we're cleaning up the scalar builtins this also removes the fma3 fmsub/fnmadd/fnmsub builtins by using negates in the header file. llvm-svn: 318985
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/Headers/fma4intrin.h16
-rw-r--r--clang/lib/Headers/fmaintrin.h12
2 files changed, 14 insertions, 14 deletions
diff --git a/clang/lib/Headers/fma4intrin.h b/clang/lib/Headers/fma4intrin.h
index cda20e7d810..962b1a60a25 100644
--- a/clang/lib/Headers/fma4intrin.h
+++ b/clang/lib/Headers/fma4intrin.h
@@ -48,13 +48,13 @@ _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -72,13 +72,13 @@ _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -96,13 +96,13 @@ _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -120,13 +120,13 @@ _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/fmaintrin.h b/clang/lib/Headers/fmaintrin.h
index 86a1198879b..478a0ac81cf 100644
--- a/clang/lib/Headers/fmaintrin.h
+++ b/clang/lib/Headers/fmaintrin.h
@@ -70,13 +70,13 @@ _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -94,13 +94,13 @@ _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -118,13 +118,13 @@ _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
OpenPOWER on IntegriCloud