diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/ExpandReductions.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopUtils.cpp | 32 |
2 files changed, 41 insertions, 6 deletions
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp index abf487a4f19..7552ba8cd85 100644 --- a/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/llvm/lib/CodeGen/ExpandReductions.cpp @@ -78,13 +78,15 @@ RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) { bool expandReductions(Function &F, const TargetTransformInfo *TTI) { bool Changed = false; - SmallVector<IntrinsicInst*, 4> Worklist; + SmallVector<IntrinsicInst *, 4> Worklist; for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) if (auto II = dyn_cast<IntrinsicInst>(&*I)) Worklist.push_back(II); for (auto *II : Worklist) { IRBuilder<> Builder(II); + bool IsOrdered = false; + Value *Acc = nullptr; Value *Vec = nullptr; auto ID = II->getIntrinsicID(); auto MRK = RecurrenceDescriptor::MRK_Invalid; @@ -92,11 +94,10 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { case Intrinsic::experimental_vector_reduce_fadd: case Intrinsic::experimental_vector_reduce_fmul: // FMFs must be attached to the call, otherwise it's an ordered reduction - // and it can't be handled by generating this shuffle sequence. - // TODO: Implement scalarization of ordered reductions here for targets - // without native support. + // and it can't be handled by generating a shuffle sequence. if (!II->getFastMathFlags().isFast()) - continue; + IsOrdered = true; + Acc = II->getArgOperand(0); Vec = II->getArgOperand(1); break; case Intrinsic::experimental_vector_reduce_add: @@ -118,7 +119,9 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { } if (!TTI->shouldExpandReduction(II)) continue; - auto Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); + Value *Rdx = + IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK) + : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); II->replaceAllUsesWith(Rdx); II->eraseFromParent(); Changed = true; diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 67e209583b7..805a003f18f 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1526,6 +1526,38 @@ static Value *addFastMathFlag(Value *V) { return V; } +// Helper to generate an ordered reduction. +Value * +llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, + unsigned Op, + RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, + ArrayRef<Value *> RedOps) { + unsigned VF = Src->getType()->getVectorNumElements(); + + // Extract and apply reduction ops in ascending order: + // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1] + Value *Result = Acc; + for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) { + Value *Ext = + Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx)); + + if (Op != Instruction::ICmp && Op != Instruction::FCmp) { + Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext, + "bin.rdx"); + } else { + assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && + "Invalid min/max"); + Result = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, Result, + Ext); + } + + if (!RedOps.empty()) + propagateIRFlags(Result, RedOps); + } + + return Result; +} + // Helper to generate a log2 shuffle reduction. Value * llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, |