3 files changed, 62 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4563354f1f3..557664c09dc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1395,6 +1395,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     // Custom lower several nodes.
     for (MVT VT : MVT::vector_valuetypes()) {
       unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+      if (EltSize == 1) {
+        setOperationAction(ISD::AND, VT, Legal);
+        setOperationAction(ISD::OR,  VT, Legal);
+        setOperationAction(ISD::XOR,  VT, Legal);
+      }
       if (EltSize >= 32 && VT.getSizeInBits() <= 512) {
         setOperationAction(ISD::MGATHER,  VT, Custom);
         setOperationAction(ISD::MSCATTER, VT, Custom);
@@ -18166,6 +18171,10 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
   if (!VT.isSimple())
     return false;
 
+  // Not for i1 vectors
+  if (VT.getScalarType() == MVT::i1)
+    return false;
+
   // Very little shuffling can be done for 64-bit vectors right now.
   if (VT.getSizeInBits() == 64)
     return false;
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index bc9ee59ae49..bc8ab83fe31 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1864,7 +1864,7 @@ let Predicates = [HasBWI] in {
 
 
 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
-let Predicates = [HasAVX512] in {
+let Predicates = [HasAVX512, NoDQI] in {
   // GR from/to 8-bit mask without native support
   def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
             (COPY_TO_REGCLASS
@@ -1874,7 +1874,8 @@ let Predicates = [HasAVX512] in {
             (EXTRACT_SUBREG
               (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
               sub_8bit)>;
-
+}
+let Predicates = [HasAVX512] in {
   def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
             (COPY_TO_REGCLASS VK16:$src, VK1)>;
   def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 677524a9565..5f3588d68d0 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -157,4 +157,53 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1
   %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
   %resse = sext <2 x i1>%res to <2 x i64>
   ret <2 x i64> %resse
-}
-\ No newline at end of file
+}
+
+; KNL-LABEL: test6
+; KNL: vpmovsxbd
+; KNL: vpandd
+; KNL: kmovw   %eax, %k1
+; KNL vptestmd {{.*}}, %k0 {%k1}
+
+; SKX-LABEL: test6
+; SKX: vpmovb2m
+; SKX: kmovw   %eax, %k1
+; SKX: kandw
+define void @test6(<16 x i1> %mask)  {
+allocas:
+  %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
+  %b = bitcast <16 x i1> %a to i16
+  %c = icmp eq i16 %b, 0
+  br i1 %c, label %true, label %false
+
+true:
+  ret void
+
+false:
+  ret void
+}
+
+; KNL-LABEL: test7
+; KNL: vpmovsxwq
+; KNL: vpandq
+; KNL: vptestmq {{.*}}, %k0
+; KNL: korw
+
+; SKX-LABEL: test7
+; SKX: vpmovw2m
+; SKX: kmovw   %eax, %k1
+; SKX: korb
+
+define void @test7(<8 x i1> %mask)  {
+allocas:
+  %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
+  %b = bitcast <8 x i1> %a to i8
+  %c = icmp eq i8 %b, 0
+  br i1 %c, label %true, label %false
+
+true:
+  ret void
+
+false:
+  ret void
+}