summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.def64
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp13
-rw-r--r--clang/lib/Headers/avx512vlintrin.h38
-rw-r--r--clang/test/CodeGen/target-builtin-noerror.c13
4 files changed, 72 insertions, 56 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 42948469b70..c77b2ba4d75 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -718,38 +718,38 @@ TARGET_BUILTIN(__builtin_ia32_sha256msg1, "V4iV4iV4i", "", "sha")
TARGET_BUILTIN(__builtin_ia32_sha256msg2, "V4iV4iV4i", "", "sha")
// FMA
-TARGET_BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubps, "V4fV4fV4fV4f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubpd, "V2dV2dV2dV2d", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubss, "V4fV4fV4fV4f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubsd, "V2dV2dV2dV2d", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddps, "V4fV4fV4fV4f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddpd, "V2dV2dV2dV2d", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddss, "V4fV4fV4fV4f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddsd, "V2dV2dV2dV2d", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubps, "V4fV4fV4fV4f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubpd, "V2dV2dV2dV2d", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubss, "V4fV4fV4fV4f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubsd, "V2dV2dV2dV2d", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddps, "V4fV4fV4fV4f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd, "V2dV2dV2dV2d", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubps256, "V8fV8fV8fV8f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubpd256, "V4dV4dV4dV4d", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddps256, "V8fV8fV8fV8f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddpd256, "V4dV4dV4dV4d", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubps256, "V8fV8fV8fV8f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubpd256, "V4dV4dV4dV4d", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddps256, "V8fV8fV8fV8f", "", "fma,fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd256, "V4dV4dV4dV4d", "", "fma,fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubps, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubpd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubss, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubsd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmaddps, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmaddpd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmaddss, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmaddsd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubps, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubpd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubss, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubsd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddps, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubps256, "V8fV8fV8fV8f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubpd256, "V4dV4dV4dV4d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmaddps256, "V8fV8fV8fV8f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmaddpd256, "V4dV4dV4dV4d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubps256, "V8fV8fV8fV8f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubpd256, "V4dV4dV4dV4d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddps256, "V8fV8fV8fV8f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd256, "V4dV4dV4dV4d", "", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_vfmaddpd128_mask3, "V2dV2dV2dV2dUc", "", "avx512vl")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c3fd54e7b22..5274b53ccc1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -341,10 +341,15 @@ bool CodeGenFunction::checkBuiltinTargetFeatures(
// true, otherwise return false.
SmallVector<StringRef, 1> AttrFeatures;
StringRef(FeatureList).split(AttrFeatures, ",");
- for (const auto &Feature : AttrFeatures)
- if (FeatureMap[Feature])
- return true;
- return false;
+ return std::all_of(AttrFeatures.begin(), AttrFeatures.end(),
+ [&](StringRef &Feature) {
+ SmallVector<StringRef, 1> OrFeatures;
+ Feature.split(OrFeatures, "|");
+ return std::any_of(OrFeatures.begin(), OrFeatures.end(),
+ [&](StringRef &Feature) {
+ return FeatureMap[Feature];
+ });
+ });
}
RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 622385ae77f..8f13536fbb0 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -28,18 +28,18 @@
#ifndef __AVX512VLINTRIN_H
#define __AVX512VLINTRIN_H
-/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
+#define __DEFAULT_FN_ATTRS_BOTH __attribute__((__always_inline__, __nodebug__, __target__("avx512vl, avx512bw")))
/* Integer compare */
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
__u);
@@ -57,13 +57,13 @@ _mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
__u);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
__u);
@@ -81,13 +81,13 @@ _mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
__u);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
__u);
@@ -105,13 +105,13 @@ _mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
__u);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
__u);
@@ -226,16 +226,13 @@ _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
__u);
}
-
-
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
__u);
@@ -253,13 +250,13 @@ _mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
__u);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
__u);
@@ -277,13 +274,13 @@ _mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
__u);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
__u);
@@ -301,13 +298,13 @@ _mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
__u);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
__u);
@@ -4604,5 +4601,6 @@ _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
}
#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_BOTH
#endif /* __AVX512VLINTRIN_H */
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 569fb0a60ac..7d86b968462 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -29,3 +29,16 @@ int qq() {
else
return qq_noavx();
}
+
+// Test that fma and fma4 are both separately and combined valid for an fma intrinsic.
+__m128 __attribute__((target("fma"))) fma_1(__m128 a, __m128 b, __m128 c) {
+ return __builtin_ia32_vfmaddps(a, b, c);
+}
+
+__m128 __attribute__((target("fma4"))) fma_2(__m128 a, __m128 b, __m128 c) {
+ return __builtin_ia32_vfmaddps(a, b, c);
+}
+
+__m128 __attribute__((target("fma,fma4"))) fma_3(__m128 a, __m128 b, __m128 c) {
+ return __builtin_ia32_vfmaddps(a, b, c);
+}
OpenPOWER on IntegriCloud