summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.def12
-rw-r--r--clang/lib/Headers/fma4intrin.h16
-rw-r--r--clang/lib/Headers/fmaintrin.h12
-rw-r--r--clang/test/CodeGen/fma-builtins.c20
-rw-r--r--clang/test/CodeGen/fma4-builtins.c24
5 files changed, 48 insertions, 36 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 8962e7a41c8..b77033a0b38 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -682,14 +682,10 @@ TARGET_BUILTIN(__builtin_ia32_sha256msg2, "V4iV4iV4i", "", "sha")
// FMA
TARGET_BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubss3, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubsd3, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddss3, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddsd3, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubss3, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "", "fma")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "", "fma")
+TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "", "fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "", "fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "", "fma|fma4")
diff --git a/clang/lib/Headers/fma4intrin.h b/clang/lib/Headers/fma4intrin.h
index cda20e7d810..962b1a60a25 100644
--- a/clang/lib/Headers/fma4intrin.h
+++ b/clang/lib/Headers/fma4intrin.h
@@ -48,13 +48,13 @@ _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -72,13 +72,13 @@ _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -96,13 +96,13 @@ _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -120,13 +120,13 @@ _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/fmaintrin.h b/clang/lib/Headers/fmaintrin.h
index 86a1198879b..478a0ac81cf 100644
--- a/clang/lib/Headers/fmaintrin.h
+++ b/clang/lib/Headers/fmaintrin.h
@@ -70,13 +70,13 @@ _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -94,13 +94,13 @@ _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -118,13 +118,13 @@ _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfnmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfnmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
diff --git a/clang/test/CodeGen/fma-builtins.c b/clang/test/CodeGen/fma-builtins.c
index d5905d33a97..6f792a78d84 100644
--- a/clang/test/CodeGen/fma-builtins.c
+++ b/clang/test/CodeGen/fma-builtins.c
@@ -43,13 +43,15 @@ __m128d test_mm_fmsub_pd(__m128d a, __m128d b, __m128d c) {
__m128 test_mm_fmsub_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fmsub_ss
- // CHECK: @llvm.x86.fma.vfmsub.ss
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
return _mm_fmsub_ss(a, b, c);
}
__m128d test_mm_fmsub_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_fmsub_sd
- // CHECK: @llvm.x86.fma.vfmsub.sd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
return _mm_fmsub_sd(a, b, c);
}
@@ -69,13 +71,15 @@ __m128d test_mm_fnmadd_pd(__m128d a, __m128d b, __m128d c) {
__m128 test_mm_fnmadd_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fnmadd_ss
- // CHECK: @llvm.x86.fma.vfnmadd.ss
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> [[NEG]], <4 x float> %{{.+}})
return _mm_fnmadd_ss(a, b, c);
}
__m128d test_mm_fnmadd_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_fnmadd_sd
- // CHECK: @llvm.x86.fma.vfnmadd.sd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> [[NEG]], <2 x double> %{{.+}})
return _mm_fnmadd_sd(a, b, c);
}
@@ -97,13 +101,17 @@ __m128d test_mm_fnmsub_pd(__m128d a, __m128d b, __m128d c) {
__m128 test_mm_fnmsub_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fnmsub_ss
- // CHECK: @llvm.x86.fma.vfnmsub.ss
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> [[NEG]], <4 x float> [[NEG2]])
return _mm_fnmsub_ss(a, b, c);
}
__m128d test_mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_fnmsub_sd
- // CHECK: @llvm.x86.fma.vfnmsub.sd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> [[NEG]], <2 x double> [[NEG2]])
return _mm_fnmsub_sd(a, b, c);
}
diff --git a/clang/test/CodeGen/fma4-builtins.c b/clang/test/CodeGen/fma4-builtins.c
index 68a2ec6e3a9..c848d4a751d 100644
--- a/clang/test/CodeGen/fma4-builtins.c
+++ b/clang/test/CodeGen/fma4-builtins.c
@@ -17,13 +17,13 @@ __m128d test_mm_macc_pd(__m128d a, __m128d b, __m128d c) {
__m128 test_mm_macc_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_macc_ss
- // CHECK: @llvm.x86.fma.vfmadd.ss
+ // CHECK: @llvm.x86.fma4.vfmadd.ss
return _mm_macc_ss(a, b, c);
}
__m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_macc_sd
- // CHECK: @llvm.x86.fma.vfmadd.sd
+ // CHECK: @llvm.x86.fma4.vfmadd.sd
return _mm_macc_sd(a, b, c);
}
@@ -43,13 +43,15 @@ __m128d test_mm_msub_pd(__m128d a, __m128d b, __m128d c) {
__m128 test_mm_msub_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_msub_ss
- // CHECK: @llvm.x86.fma.vfmsub.ss
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma4.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
return _mm_msub_ss(a, b, c);
}
__m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_msub_sd
- // CHECK: @llvm.x86.fma.vfmsub.sd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma4.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
return _mm_msub_sd(a, b, c);
}
@@ -69,13 +71,15 @@ __m128d test_mm_nmacc_pd(__m128d a, __m128d b, __m128d c) {
__m128 test_mm_nmacc_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_nmacc_ss
- // CHECK: @llvm.x86.fma.vfnmadd.ss
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma4.vfmadd.ss(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> %{{.+}})
return _mm_nmacc_ss(a, b, c);
}
__m128d test_mm_nmacc_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_nmacc_sd
- // CHECK: @llvm.x86.fma.vfnmadd.sd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma4.vfmadd.sd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> %{{.+}})
return _mm_nmacc_sd(a, b, c);
}
@@ -97,13 +101,17 @@ __m128d test_mm_nmsub_pd(__m128d a, __m128d b, __m128d c) {
__m128 test_mm_nmsub_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_nmsub_ss
- // CHECK: @llvm.x86.fma.vfnmsub.ss
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma4.vfmadd.ss(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> [[NEG2]])
return _mm_nmsub_ss(a, b, c);
}
__m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_nmsub_sd
- // CHECK: @llvm.x86.fma.vfnmsub.sd
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+ // CHECK: @llvm.x86.fma4.vfmadd.sd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> [[NEG2]])
return _mm_nmsub_sd(a, b, c);
}
OpenPOWER on IntegriCloud