[InstCombine][AVX-512] When turning intrinsics with masking into native IR, don't emit a select if the mask is known to be all ones.

This saves InstCombine the burden of having to optimize the select later. llvm-svn: 290774
author: Craig Topper <craig.topper@gmail.com> 2016-12-30 23:06:28 +0000
committer: Craig Topper <craig.topper@gmail.com> 2016-12-30 23:06:28 +0000
commit: 991636312b34d50506284dde1616deb8c7e6e242 (patch)
tree: 72c5890c1bb0e838a9e46731cb4e13db4d0f4d28
parent: 6905d22dc28255f035508461ae423790cdd74e9b (diff)
download: bcm5719-llvm-991636312b34d50506284dde1616deb8c7e6e242.tar.gz
bcm5719-llvm-991636312b34d50506284dde1616deb8c7e6e242.zip
1 files changed, 20 insertions, 9 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index a258ac56568..25f692c6fb9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1038,13 +1038,20 @@ static Value *simplifyX86vpcom(const IntrinsicInst &II,
 // masked intrinsics.
 static Value *emitX86MaskSelect(Value *Mask, Value *Op0, Value *Op1,
                                 InstCombiner::BuilderTy &Builder) {
+  unsigned VWidth = Op0->getType()->getVectorNumElements();
+
+  // If the mask is all ones we don't need the select. But we need to check
+  // only the bit thats will be used in case VWidth is less than 8.
+  if (auto *C = dyn_cast<ConstantInt>(Mask))
+    if (C->getValue().zextOrTrunc(VWidth).isAllOnesValue())
+      return Op0;
+
   auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
                          cast<IntegerType>(Mask->getType())->getBitWidth());
   Mask = Builder.CreateBitCast(Mask, MaskTy);
 
   // If we have less than 8 elements, then the starting mask was an i8 and
   // we need to extract down to the right number of elements.
-  unsigned VWidth = Op0->getType()->getVectorNumElements();
   if (VWidth < 8) {
     uint32_t Indices[4];
     for (unsigned i = 0; i != VWidth; ++i)
@@ -1873,16 +1880,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         }
 
         // Handle the masking aspect of the intrinsic.
-        // Cast the mask to an i1 vector and then extract the lowest element.
         Value *Mask = II->getArgOperand(3);
-        auto *MaskTy = VectorType::get(Builder->getInt1Ty(),
+        auto *C = dyn_cast<ConstantInt>(Mask);
+        // We don't need a select if we know the mask bit is a 1.
+        if (!C || !C->getValue()[0]) {
+          // Cast the mask to an i1 vector and then extract the lowest element.
+          auto *MaskTy = VectorType::get(Builder->getInt1Ty(),
                              cast<IntegerType>(Mask->getType())->getBitWidth());
-        Mask = Builder->CreateBitCast(Mask, MaskTy);
-        Mask = Builder->CreateExtractElement(Mask, (uint64_t)0);
-        // Extract the lowest element from the passthru operand.
-        Value *Passthru = Builder->CreateExtractElement(II->getArgOperand(2),
-                                                        (uint64_t)0);
-        V = Builder->CreateSelect(Mask, V, Passthru);
+          Mask = Builder->CreateBitCast(Mask, MaskTy);
+          Mask = Builder->CreateExtractElement(Mask, (uint64_t)0);
+          // Extract the lowest element from the passthru operand.
+          Value *Passthru = Builder->CreateExtractElement(II->getArgOperand(2),
+                                                          (uint64_t)0);
+          V = Builder->CreateSelect(Mask, V, Passthru);
+        }
 
         // Insert the result back into the original argument 0.
         V = Builder->CreateInsertElement(Arg0, V, (uint64_t)0);
author	Craig Topper <craig.topper@gmail.com>	2016-12-30 23:06:28 +0000
committer	Craig Topper <craig.topper@gmail.com>	2016-12-30 23:06:28 +0000
commit	991636312b34d50506284dde1616deb8c7e6e242 (patch)
tree	72c5890c1bb0e838a9e46731cb4e13db4d0f4d28
parent	6905d22dc28255f035508461ae423790cdd74e9b (diff)
download	bcm5719-llvm-991636312b34d50506284dde1616deb8c7e6e242.tar.gz bcm5719-llvm-991636312b34d50506284dde1616deb8c7e6e242.zip