diff options
Diffstat (limited to 'llvm/lib/Transforms')
| -rw-r--r-- | llvm/lib/Transforms/Utils/LoopUtils.cpp | 202 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 37 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 48 |
3 files changed, 222 insertions, 65 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 175d013a011..d41fe6267a0 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" @@ -1112,3 +1113,204 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { else return (FalseVal + (TrueVal / 2)) / TrueVal; } + +/// \brief Adds a 'fast' flag to floating point operations. +static Value *addFastMathFlag(Value *V) { + if (isa<FPMathOperator>(V)) { + FastMathFlags Flags; + Flags.setUnsafeAlgebra(); + cast<Instruction>(V)->setFastMathFlags(Flags); + } + return V; +} + +// Helper to generate a log2 shuffle reduction. +static Value * +getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, + RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind = + RecurrenceDescriptor::MRK_Invalid, + ArrayRef<Value *> RedOps = ArrayRef<Value *>()) { + unsigned VF = Src->getType()->getVectorNumElements(); + // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles + // and vector ops, reducing the set of values being computed by half each + // round. + assert(isPowerOf2_32(VF) && + "Reduction emission only supported for pow2 vectors!"); + Value *TmpVec = Src; + SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); + for (unsigned i = VF; i != 1; i >>= 1) { + // Move the upper half of the vector to the lower half. + for (unsigned j = 0; j != i / 2; ++j) + ShuffleMask[j] = Builder.getInt32(i / 2 + j); + + // Fill the rest of the mask with undef. + std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), + UndefValue::get(Builder.getInt32Ty())); + + Value *Shuf = Builder.CreateShuffleVector( + TmpVec, UndefValue::get(TmpVec->getType()), + ConstantVector::get(ShuffleMask), "rdx.shuf"); + + if (Op != Instruction::ICmp && Op != Instruction::FCmp) { + // Floating point operations had to be 'fast' to enable the reduction. + TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, + TmpVec, Shuf, "bin.rdx")); + } else { + assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && + "Invalid min/max"); + TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, + Shuf); + } + if (!RedOps.empty()) + propagateIRFlags(TmpVec, RedOps); + } + // The result is in the first element of the vector. + return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); +} + +/// Create a simple vector reduction specified by an opcode and some +/// flags (if generating min/max reductions). +Value *llvm::createSimpleTargetReduction( + IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, + Value *Src, TargetTransformInfo::ReductionFlags Flags, + ArrayRef<Value *> RedOps) { + assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); + + Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); + std::function<Value*()> BuildFunc; + using RD = RecurrenceDescriptor; + RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; + // TODO: Support creating ordered reductions. + FastMathFlags FMFUnsafe; + FMFUnsafe.setUnsafeAlgebra(); + + switch (Opcode) { + case Instruction::Add: + BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; + break; + case Instruction::Mul: + BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; + break; + case Instruction::And: + BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; + break; + case Instruction::Or: + BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; + break; + case Instruction::Xor: + BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; + break; + case Instruction::FAdd: + BuildFunc = [&]() { + auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); + cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); + return Rdx; + }; + break; + case Instruction::FMul: + BuildFunc = [&]() { + auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); + cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); + return Rdx; + }; + break; + case Instruction::ICmp: + if (Flags.IsMaxOp) { + MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; + BuildFunc = [&]() { + return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); + }; + } else { + MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; + BuildFunc = [&]() { + return Builder.CreateIntMinReduce(Src, Flags.IsSigned); + }; + } + break; + case Instruction::FCmp: + if (Flags.IsMaxOp) { + MinMaxKind = RD::MRK_FloatMax; + BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; + } else { + MinMaxKind = RD::MRK_FloatMin; + BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; + } + break; + default: + llvm_unreachable("Unhandled opcode"); + break; + } + if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) + return BuildFunc(); + return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); +} + +/// Create a vector reduction using a given recurrence descriptor. +Value *llvm::createTargetReduction(IRBuilder<> &Builder, + const TargetTransformInfo *TTI, + RecurrenceDescriptor &Desc, Value *Src, + bool NoNaN) { + // TODO: Support in-order reductions based on the recurrence descriptor. + RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind(); + TargetTransformInfo::ReductionFlags Flags; + Flags.NoNaN = NoNaN; + auto getSimpleRdx = [&](unsigned Opc) { + return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags); + }; + switch (RecKind) { + case RecurrenceDescriptor::RK_FloatAdd: + return getSimpleRdx(Instruction::FAdd); + case RecurrenceDescriptor::RK_FloatMult: + return getSimpleRdx(Instruction::FMul); + case RecurrenceDescriptor::RK_IntegerAdd: + return getSimpleRdx(Instruction::Add); + case RecurrenceDescriptor::RK_IntegerMult: + return getSimpleRdx(Instruction::Mul); + case RecurrenceDescriptor::RK_IntegerAnd: + return getSimpleRdx(Instruction::And); + case RecurrenceDescriptor::RK_IntegerOr: + return getSimpleRdx(Instruction::Or); + case RecurrenceDescriptor::RK_IntegerXor: + return getSimpleRdx(Instruction::Xor); + case RecurrenceDescriptor::RK_IntegerMinMax: { + switch (Desc.getMinMaxRecurrenceKind()) { + case RecurrenceDescriptor::MRK_SIntMax: + Flags.IsSigned = true; + Flags.IsMaxOp = true; + break; + case RecurrenceDescriptor::MRK_UIntMax: + Flags.IsMaxOp = true; + break; + case RecurrenceDescriptor::MRK_SIntMin: + Flags.IsSigned = true; + break; + case RecurrenceDescriptor::MRK_UIntMin: + break; + default: + llvm_unreachable("Unhandled MRK"); + } + return getSimpleRdx(Instruction::ICmp); + } + case RecurrenceDescriptor::RK_FloatMinMax: { + Flags.IsMaxOp = + Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax; + return getSimpleRdx(Instruction::FCmp); + } + default: + llvm_unreachable("Unhandled RecKind"); + } +} + +void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL) { + if (auto *VecOp = dyn_cast<Instruction>(I)) { + if (auto *I0 = dyn_cast<Instruction>(VL[0])) { + // VecOVp is initialized to the 0th scalar, so start counting from index + // '1'. + VecOp->copyIRFlags(I0); + for (int i = 1, e = VL.size(); i < e; ++i) { + if (auto *Scalar = dyn_cast<Instruction>(VL[i])) + VecOp->andIRFlags(Scalar); + } + } + } +} diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3fde0a45396..a9aa48395af 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1700,6 +1700,9 @@ public: /// access that can be widened. bool memoryInstructionCanBeWidened(Instruction *I, unsigned VF = 1); + // Returns true if the NoNaN attribute is set on the function. + bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; } + private: /// Check if a single basic block loop is vectorizable. /// At this point we know that this is a loop with a constant trip count @@ -4258,39 +4261,9 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { } if (VF > 1) { - // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles - // and vector ops, reducing the set of values being computed by half each - // round. - assert(isPowerOf2_32(VF) && - "Reduction emission only supported for pow2 vectors!"); - Value *TmpVec = ReducedPartRdx; - SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); - for (unsigned i = VF; i != 1; i >>= 1) { - // Move the upper half of the vector to the lower half. - for (unsigned j = 0; j != i / 2; ++j) - ShuffleMask[j] = Builder.getInt32(i / 2 + j); - - // Fill the rest of the mask with undef. - std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), - UndefValue::get(Builder.getInt32Ty())); - - Value *Shuf = Builder.CreateShuffleVector( - TmpVec, UndefValue::get(TmpVec->getType()), - ConstantVector::get(ShuffleMask), "rdx.shuf"); - - if (Op != Instruction::ICmp && Op != Instruction::FCmp) - // Floating point operations had to be 'fast' to enable the reduction. - TmpVec = addFastMathFlag(Builder.CreateBinOp( - (Instruction::BinaryOps)Op, TmpVec, Shuf, "bin.rdx")); - else - TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, - TmpVec, Shuf); - } - - // The result is in the first element of the vector. + bool NoNaN = Legal->hasFunNoNaNAttr(); ReducedPartRdx = - Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); - + createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx, NoNaN); // If the reduction can be performed in a smaller type, we need to extend // the reduction to the wider type before we branch to the original loop. if (Phi->getType() != RdxDesc.getRecurrenceType()) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f112c555205..99084444bdd 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -41,6 +41,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Vectorize.h" #include <algorithm> #include <memory> @@ -212,23 +213,6 @@ static unsigned getSameOpcode(ArrayRef<Value *> VL) { return Opcode; } -/// Get the intersection (logical and) of all of the potential IR flags -/// of each scalar operation (VL) that will be converted into a vector (I). -/// Flag set: NSW, NUW, exact, and all of fast-math. -static void propagateIRFlags(Value *I, ArrayRef<Value *> VL) { - if (auto *VecOp = dyn_cast<Instruction>(I)) { - if (auto *I0 = dyn_cast<Instruction>(VL[0])) { - // VecOVp is initialized to the 0th scalar, so start counting from index - // '1'. - VecOp->copyIRFlags(I0); - for (int i = 1, e = VL.size(); i < e; ++i) { - if (auto *Scalar = dyn_cast<Instruction>(VL[i])) - VecOp->andIRFlags(Scalar); - } - } - } -} - /// \returns true if all of the values in \p VL have the same type or false /// otherwise. static bool allSameType(ArrayRef<Value *> VL) { @@ -4513,7 +4497,7 @@ public: // Emit a reduction. Value *ReducedSubTree = - emitReduction(VectorizedRoot, Builder, ReduxWidth, ReductionOps); + emitReduction(VectorizedRoot, Builder, ReduxWidth, ReductionOps, TTI); if (VectorizedTree) { Builder.SetCurrentDebugLocation(Loc); VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree, @@ -4583,33 +4567,31 @@ private: /// \brief Emit a horizontal reduction of the vectorized value. Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder, - unsigned ReduxWidth, ArrayRef<Value *> RedOps) { + unsigned ReduxWidth, ArrayRef<Value *> RedOps, + const TargetTransformInfo *TTI) { assert(VectorizedValue && "Need to have a vectorized tree node"); assert(isPowerOf2_32(ReduxWidth) && "We only handle power-of-two reductions for now"); + if (!IsPairwiseReduction) + return createSimpleTargetReduction( + Builder, TTI, ReductionOpcode, VectorizedValue, + TargetTransformInfo::ReductionFlags(), RedOps); + Value *TmpVec = VectorizedValue; for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) { - if (IsPairwiseReduction) { - Value *LeftMask = + Value *LeftMask = createRdxShuffleMask(ReduxWidth, i, true, true, Builder); - Value *RightMask = + Value *RightMask = createRdxShuffleMask(ReduxWidth, i, true, false, Builder); - Value *LeftShuf = Builder.CreateShuffleVector( + Value *LeftShuf = Builder.CreateShuffleVector( TmpVec, UndefValue::get(TmpVec->getType()), LeftMask, "rdx.shuf.l"); - Value *RightShuf = Builder.CreateShuffleVector( + Value *RightShuf = Builder.CreateShuffleVector( TmpVec, UndefValue::get(TmpVec->getType()), (RightMask), "rdx.shuf.r"); - TmpVec = Builder.CreateBinOp(ReductionOpcode, LeftShuf, RightShuf, - "bin.rdx"); - } else { - Value *UpperHalf = - createRdxShuffleMask(ReduxWidth, i, false, false, Builder); - Value *Shuf = Builder.CreateShuffleVector( - TmpVec, UndefValue::get(TmpVec->getType()), UpperHalf, "rdx.shuf"); - TmpVec = Builder.CreateBinOp(ReductionOpcode, TmpVec, Shuf, "bin.rdx"); - } + TmpVec = + Builder.CreateBinOp(ReductionOpcode, LeftShuf, RightShuf, "bin.rdx"); propagateIRFlags(TmpVec, RedOps); } |

