summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.def8
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp44
-rw-r--r--clang/lib/Headers/avx512bwintrin.h36
-rw-r--r--clang/lib/Headers/avx512dqintrin.h36
-rw-r--r--clang/test/CodeGen/avx512bw-builtins.c68
-rw-r--r--clang/test/CodeGen/avx512dq-builtins.c68
6 files changed, 260 insertions, 0 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 11c330518ee..db81f58e082 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1761,6 +1761,14 @@ TARGET_BUILTIN(__builtin_ia32_kortestcsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ktestcqi, "iUcUc", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_ktestzqi, "iUcUc", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_ktestchi, "iUsUs", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_ktestzhi, "iUsUs", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_ktestcsi, "iUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ktestzsi, "iUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 58f8b7d33e2..789f01cc822 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10075,6 +10075,50 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
}
+ case X86::BI__builtin_ia32_ktestcqi:
+ case X86::BI__builtin_ia32_ktestzqi:
+ case X86::BI__builtin_ia32_ktestchi:
+ case X86::BI__builtin_ia32_ktestzhi:
+ case X86::BI__builtin_ia32_ktestcsi:
+ case X86::BI__builtin_ia32_ktestzsi:
+ case X86::BI__builtin_ia32_ktestcdi:
+ case X86::BI__builtin_ia32_ktestzdi: {
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_ktestcqi:
+ IID = Intrinsic::x86_avx512_ktestc_b;
+ break;
+ case X86::BI__builtin_ia32_ktestzqi:
+ IID = Intrinsic::x86_avx512_ktestz_b;
+ break;
+ case X86::BI__builtin_ia32_ktestchi:
+ IID = Intrinsic::x86_avx512_ktestc_w;
+ break;
+ case X86::BI__builtin_ia32_ktestzhi:
+ IID = Intrinsic::x86_avx512_ktestz_w;
+ break;
+ case X86::BI__builtin_ia32_ktestcsi:
+ IID = Intrinsic::x86_avx512_ktestc_d;
+ break;
+ case X86::BI__builtin_ia32_ktestzsi:
+ IID = Intrinsic::x86_avx512_ktestz_d;
+ break;
+ case X86::BI__builtin_ia32_ktestcdi:
+ IID = Intrinsic::x86_avx512_ktestc_q;
+ break;
+ case X86::BI__builtin_ia32_ktestzdi:
+ IID = Intrinsic::x86_avx512_ktestz_q;
+ break;
+ }
+
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+ Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
+ Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
+ Function *Intr = CGM.getIntrinsic(IID);
+ return Builder.CreateCall(Intr, {LHS, RHS});
+ }
+
case X86::BI__builtin_ia32_kaddqi:
case X86::BI__builtin_ia32_kaddhi:
case X86::BI__builtin_ia32_kaddsi:
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 740f5cd05ff..bff5b975c13 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -143,6 +143,42 @@ _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
}
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B)
+{
+ return (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B)
+{
+ return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
+ *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
+ return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
+{
+ return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
+{
+ return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
+ *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
+ return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
+}
+
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_kadd_mask32(__mmask32 __A, __mmask32 __B)
{
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index 7e025e953e2..6e6c293af22 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -86,6 +86,42 @@ _kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
}
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_ktestc_mask8_u8(__mmask8 __A, __mmask8 __B)
+{
+ return (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_ktestz_mask8_u8(__mmask8 __A, __mmask8 __B)
+{
+ return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
+ *__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
+ return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_ktestc_mask16_u8(__mmask16 __A, __mmask16 __B)
+{
+ return (unsigned char)__builtin_ia32_ktestchi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_ktestz_mask16_u8(__mmask16 __A, __mmask16 __B)
+{
+ return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
+ *__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B);
+ return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
+}
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kadd_mask8(__mmask8 __A, __mmask8 __B)
{
diff --git a/clang/test/CodeGen/avx512bw-builtins.c b/clang/test/CodeGen/avx512bw-builtins.c
index 22bc9747cb8..01476c9e89c 100644
--- a/clang/test/CodeGen/avx512bw-builtins.c
+++ b/clang/test/CodeGen/avx512bw-builtins.c
@@ -226,6 +226,74 @@ unsigned char test_kortest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m5
_mm512_cmpneq_epu8_mask(__C, __D), CF);
}
+unsigned char test_ktestz_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_ktestz_mask32_u8
+ // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.d(<32 x i1> [[LHS]], <32 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ return _ktestz_mask32_u8(_mm512_cmpneq_epu16_mask(__A, __B),
+ _mm512_cmpneq_epu16_mask(__C, __D));
+}
+
+unsigned char test_ktestc_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_ktestc_mask32_u8
+ // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.d(<32 x i1> [[LHS]], <32 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ return _ktestc_mask32_u8(_mm512_cmpneq_epu16_mask(__A, __B),
+ _mm512_cmpneq_epu16_mask(__C, __D));
+}
+
+unsigned char test_ktest_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
+ // CHECK-LABEL: @test_ktest_mask32_u8
+ // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.d(<32 x i1> [[LHS]], <32 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.d(<32 x i1> [[LHS]], <32 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ return _ktest_mask32_u8(_mm512_cmpneq_epu16_mask(__A, __B),
+ _mm512_cmpneq_epu16_mask(__C, __D), CF);
+}
+
+unsigned char test_ktestz_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_ktestz_mask64_u8
+ // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.q(<64 x i1> [[LHS]], <64 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ return _ktestz_mask64_u8(_mm512_cmpneq_epu8_mask(__A, __B),
+ _mm512_cmpneq_epu8_mask(__C, __D));
+}
+
+unsigned char test_ktestc_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_ktestc_mask64_u8
+ // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.q(<64 x i1> [[LHS]], <64 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ return _ktestc_mask64_u8(_mm512_cmpneq_epu8_mask(__A, __B),
+ _mm512_cmpneq_epu8_mask(__C, __D));
+}
+
+unsigned char test_ktest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
+ // CHECK-LABEL: @test_ktest_mask64_u8
+ // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.q(<64 x i1> [[LHS]], <64 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.q(<64 x i1> [[LHS]], <64 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ return _ktest_mask64_u8(_mm512_cmpneq_epu8_mask(__A, __B),
+ _mm512_cmpneq_epu8_mask(__C, __D), CF);
+}
+
__mmask32 test_kadd_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: @test_kadd_mask32
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
diff --git a/clang/test/CodeGen/avx512dq-builtins.c b/clang/test/CodeGen/avx512dq-builtins.c
index 77f583391c1..6227a83b55d 100644
--- a/clang/test/CodeGen/avx512dq-builtins.c
+++ b/clang/test/CodeGen/avx512dq-builtins.c
@@ -114,6 +114,74 @@ unsigned char test_kortest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu64_mask(__C, __D), CF);
}
+unsigned char test_ktestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_ktestz_mask8_u8
+ // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.b(<8 x i1> [[LHS]], <8 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ return _ktestz_mask8_u8(_mm512_cmpneq_epu64_mask(__A, __B),
+ _mm512_cmpneq_epu64_mask(__C, __D));
+}
+
+unsigned char test_ktestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_ktestc_mask8_u8
+ // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.b(<8 x i1> [[LHS]], <8 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ return _ktestc_mask8_u8(_mm512_cmpneq_epu64_mask(__A, __B),
+ _mm512_cmpneq_epu64_mask(__C, __D));
+}
+
+unsigned char test_ktest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
+ // CHECK-LABEL: @test_ktest_mask8_u8
+ // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.b(<8 x i1> [[LHS]], <8 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.b(<8 x i1> [[LHS]], <8 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ return _ktest_mask8_u8(_mm512_cmpneq_epu64_mask(__A, __B),
+ _mm512_cmpneq_epu64_mask(__C, __D), CF);
+}
+
+unsigned char test_ktestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_ktestz_mask16_u8
+ // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.w(<16 x i1> [[LHS]], <16 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ return _ktestz_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D));
+}
+
+unsigned char test_ktestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_ktestc_mask16_u8
+ // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.w(<16 x i1> [[LHS]], <16 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ return _ktestc_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D));
+}
+
+unsigned char test_ktest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
+ // CHECK-LABEL: @test_ktest_mask16_u8
+ // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.w(<16 x i1> [[LHS]], <16 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.w(<16 x i1> [[LHS]], <16 x i1> [[RHS]])
+ // CHECK: trunc i32 [[RES]] to i8
+ return _ktest_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D), CF);
+}
+
__mmask8 test_kadd_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: @test_kadd_mask8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
OpenPOWER on IntegriCloud