diff options
| author | Craig Topper <craig.topper@gmail.com> | 2016-12-30 23:06:28 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2016-12-30 23:06:28 +0000 |
| commit | 991636312b34d50506284dde1616deb8c7e6e242 (patch) | |
| tree | 72c5890c1bb0e838a9e46731cb4e13db4d0f4d28 | |
| parent | 6905d22dc28255f035508461ae423790cdd74e9b (diff) | |
| download | bcm5719-llvm-991636312b34d50506284dde1616deb8c7e6e242.tar.gz bcm5719-llvm-991636312b34d50506284dde1616deb8c7e6e242.zip | |
[InstCombine][AVX-512] When turning intrinsics with masking into native IR, don't emit a select if the mask is known to be all ones.
This saves InstCombine the burden of having to optimize the select later.
llvm-svn: 290774
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index a258ac56568..25f692c6fb9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1038,13 +1038,20 @@ static Value *simplifyX86vpcom(const IntrinsicInst &II, // masked intrinsics. static Value *emitX86MaskSelect(Value *Mask, Value *Op0, Value *Op1, InstCombiner::BuilderTy &Builder) { + unsigned VWidth = Op0->getType()->getVectorNumElements(); + + // If the mask is all ones we don't need the select. But we need to check + // only the bit thats will be used in case VWidth is less than 8. + if (auto *C = dyn_cast<ConstantInt>(Mask)) + if (C->getValue().zextOrTrunc(VWidth).isAllOnesValue()) + return Op0; + auto *MaskTy = VectorType::get(Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth()); Mask = Builder.CreateBitCast(Mask, MaskTy); // If we have less than 8 elements, then the starting mask was an i8 and // we need to extract down to the right number of elements. - unsigned VWidth = Op0->getType()->getVectorNumElements(); if (VWidth < 8) { uint32_t Indices[4]; for (unsigned i = 0; i != VWidth; ++i) @@ -1873,16 +1880,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } // Handle the masking aspect of the intrinsic. - // Cast the mask to an i1 vector and then extract the lowest element. Value *Mask = II->getArgOperand(3); - auto *MaskTy = VectorType::get(Builder->getInt1Ty(), + auto *C = dyn_cast<ConstantInt>(Mask); + // We don't need a select if we know the mask bit is a 1. + if (!C || !C->getValue()[0]) { + // Cast the mask to an i1 vector and then extract the lowest element. + auto *MaskTy = VectorType::get(Builder->getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth()); - Mask = Builder->CreateBitCast(Mask, MaskTy); - Mask = Builder->CreateExtractElement(Mask, (uint64_t)0); - // Extract the lowest element from the passthru operand. - Value *Passthru = Builder->CreateExtractElement(II->getArgOperand(2), - (uint64_t)0); - V = Builder->CreateSelect(Mask, V, Passthru); + Mask = Builder->CreateBitCast(Mask, MaskTy); + Mask = Builder->CreateExtractElement(Mask, (uint64_t)0); + // Extract the lowest element from the passthru operand. + Value *Passthru = Builder->CreateExtractElement(II->getArgOperand(2), + (uint64_t)0); + V = Builder->CreateSelect(Mask, V, Passthru); + } // Insert the result back into the original argument 0. V = Builder->CreateInsertElement(Arg0, V, (uint64_t)0); |

