From 75b5edf6a1df09d466a9c4f5669121add1632067 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 4 Jul 2019 16:45:34 +0000 Subject: [InstCombine] allow undef elements when forming splat from chain of insertelements We allow forming a splat (broadcast) shuffle, but we were conservatively limiting that to cases where all elements of the vector are specified. It should be safe from a codegen perspective to allow undefined lanes of the vector because the expansion of a splat shuffle would become the chain of inserts again. Forming splat shuffles can reduce IR and help enable further IR transforms. Motivating bugs: https://bugs.llvm.org/show_bug.cgi?id=42174 https://bugs.llvm.org/show_bug.cgi?id=16739 Differential Revision: https://reviews.llvm.org/D63848 llvm-svn: 365147 --- .../Transforms/InstCombine/InstCombineVectorOps.cpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp') diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 693fe5d3881..22d1e48cd96 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -704,10 +704,18 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) { CurrIE = NextIE; } - // Make sure we've seen an insert into every element. - if (llvm::any_of(ElementPresent, [](bool Present) { return !Present; })) + // If this is just a single insertelement (not a sequence), we are done. + if (FirstIE == &InsElt) return nullptr; + // If we are not inserting into an undef vector, make sure we've seen an + // insert into every element. + // TODO: If the base vector is not undef, it might be better to create a splat + // and then a select-shuffle (blend) with the base vector. + if (!isa(FirstIE->getOperand(0))) + if (any_of(ElementPresent, [](bool Present) { return !Present; })) + return nullptr; + // Create the insert + shuffle. Type *Int32Ty = Type::getInt32Ty(InsElt.getContext()); UndefValue *UndefVec = UndefValue::get(VecTy); @@ -715,8 +723,13 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) { if (!cast(FirstIE->getOperand(2))->isZero()) FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt); - Constant *ZeroMask = ConstantVector::getSplat(NumElements, Zero); - return new ShuffleVectorInst(FirstIE, UndefVec, ZeroMask); + // Splat from element 0, but replace absent elements with undef in the mask. + SmallVector Mask(NumElements, Zero); + for (unsigned i = 0; i != NumElements; ++i) + if (!ElementPresent[i]) + Mask[i] = UndefValue::get(Int32Ty); + + return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask)); } /// If we have an insertelement instruction feeding into another insertelement -- cgit v1.2.3