summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2019-07-04 16:45:34 +0000
committerSanjay Patel <spatel@rotateright.com>2019-07-04 16:45:34 +0000
commit75b5edf6a1df09d466a9c4f5669121add1632067 (patch)
treebee2abd1e38ebc6a66c44827282cfb5806626221 /llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
parent0cd50b2a95d32924a164e8a79fff313f037e002d (diff)
downloadbcm5719-llvm-75b5edf6a1df09d466a9c4f5669121add1632067.tar.gz
bcm5719-llvm-75b5edf6a1df09d466a9c4f5669121add1632067.zip
[InstCombine] allow undef elements when forming splat from chain of insertelements
We allow forming a splat (broadcast) shuffle, but we were conservatively limiting that to cases where all elements of the vector are specified. It should be safe from a codegen perspective to allow undefined lanes of the vector because the expansion of a splat shuffle would become the chain of inserts again. Forming splat shuffles can reduce IR and help enable further IR transforms. Motivating bugs: https://bugs.llvm.org/show_bug.cgi?id=42174 https://bugs.llvm.org/show_bug.cgi?id=16739 Differential Revision: https://reviews.llvm.org/D63848 llvm-svn: 365147
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp21
1 files changed, 17 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 693fe5d3881..22d1e48cd96 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -704,10 +704,18 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
CurrIE = NextIE;
}
- // Make sure we've seen an insert into every element.
- if (llvm::any_of(ElementPresent, [](bool Present) { return !Present; }))
+ // If this is just a single insertelement (not a sequence), we are done.
+ if (FirstIE == &InsElt)
return nullptr;
+ // If we are not inserting into an undef vector, make sure we've seen an
+ // insert into every element.
+ // TODO: If the base vector is not undef, it might be better to create a splat
+ // and then a select-shuffle (blend) with the base vector.
+ if (!isa<UndefValue>(FirstIE->getOperand(0)))
+ if (any_of(ElementPresent, [](bool Present) { return !Present; }))
+ return nullptr;
+
// Create the insert + shuffle.
Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
UndefValue *UndefVec = UndefValue::get(VecTy);
@@ -715,8 +723,13 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
- Constant *ZeroMask = ConstantVector::getSplat(NumElements, Zero);
- return new ShuffleVectorInst(FirstIE, UndefVec, ZeroMask);
+ // Splat from element 0, but replace absent elements with undef in the mask.
+ SmallVector<Constant *, 16> Mask(NumElements, Zero);
+ for (unsigned i = 0; i != NumElements; ++i)
+ if (!ElementPresent[i])
+ Mask[i] = UndefValue::get(Int32Ty);
+
+ return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
}
/// If we have an insertelement instruction feeding into another insertelement
OpenPOWER on IntegriCloud