diff options
author | Sander de Smalen <sander.desmalen@arm.com> | 2019-06-11 08:22:10 +0000 |
---|---|---|
committer | Sander de Smalen <sander.desmalen@arm.com> | 2019-06-11 08:22:10 +0000 |
commit | cbeb563cfb1752044fb8771586ae9bbd89d2a07b (patch) | |
tree | dd9dec7d2ce2d7f949c97d9624df5ea1bbbf551d /llvm/lib/CodeGen/ExpandReductions.cpp | |
parent | e2acbeb94cf28cf6a8c82e09073df79aa1e846be (diff) | |
download | bcm5719-llvm-cbeb563cfb1752044fb8771586ae9bbd89d2a07b.tar.gz bcm5719-llvm-cbeb563cfb1752044fb8771586ae9bbd89d2a07b.zip |
Change semantics of fadd/fmul vector reductions.
This patch changes how LLVM handles the accumulator/start value
in the reduction, by never ignoring it regardless of the presence of
fast-math flags on callsites. This change introduces the following
new intrinsics to replace the existing ones:
llvm.experimental.vector.reduce.fadd -> llvm.experimental.vector.reduce.v2.fadd
llvm.experimental.vector.reduce.fmul -> llvm.experimental.vector.reduce.v2.fmul
and adds functionality to auto-upgrade existing LLVM IR and bitcode.
Reviewers: RKSimon, greened, dmgreen, nikic, simoll, aemerson
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D60261
llvm-svn: 363035
Diffstat (limited to 'llvm/lib/CodeGen/ExpandReductions.cpp')
-rw-r--r-- | llvm/lib/CodeGen/ExpandReductions.cpp | 57 |
1 files changed, 29 insertions, 28 deletions
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp index 340ee19c339..1069a2423b8 100644 --- a/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/llvm/lib/CodeGen/ExpandReductions.cpp @@ -29,9 +29,9 @@ namespace { unsigned getOpcode(Intrinsic::ID ID) { switch (ID) { - case Intrinsic::experimental_vector_reduce_fadd: + case Intrinsic::experimental_vector_reduce_v2_fadd: return Instruction::FAdd; - case Intrinsic::experimental_vector_reduce_fmul: + case Intrinsic::experimental_vector_reduce_v2_fmul: return Instruction::FMul; case Intrinsic::experimental_vector_reduce_add: return Instruction::Add; @@ -83,22 +83,33 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { Worklist.push_back(II); for (auto *II : Worklist) { + if (!TTI->shouldExpandReduction(II)) + continue; + + FastMathFlags FMF = + isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{}; + Intrinsic::ID ID = II->getIntrinsicID(); + RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID); + + Value *Rdx = nullptr; IRBuilder<> Builder(II); - bool IsOrdered = false; - Value *Acc = nullptr; - Value *Vec = nullptr; - auto ID = II->getIntrinsicID(); - auto MRK = RecurrenceDescriptor::MRK_Invalid; + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(FMF); switch (ID) { - case Intrinsic::experimental_vector_reduce_fadd: - case Intrinsic::experimental_vector_reduce_fmul: + case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::experimental_vector_reduce_v2_fmul: { // FMFs must be attached to the call, otherwise it's an ordered reduction // and it can't be handled by generating a shuffle sequence. - if (!II->getFastMathFlags().isFast()) - IsOrdered = true; - Acc = II->getArgOperand(0); - Vec = II->getArgOperand(1); - break; + Value *Acc = II->getArgOperand(0); + Value *Vec = II->getArgOperand(1); + if (!FMF.allowReassoc()) + Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK); + else { + Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); + Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID), + Acc, Rdx, "bin.rdx"); + } + } break; case Intrinsic::experimental_vector_reduce_add: case Intrinsic::experimental_vector_reduce_mul: case Intrinsic::experimental_vector_reduce_and: @@ -109,23 +120,13 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umin: case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: - Vec = II->getArgOperand(0); - MRK = getMRK(ID); - break; + case Intrinsic::experimental_vector_reduce_fmin: { + Value *Vec = II->getArgOperand(0); + Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); + } break; default: continue; } - if (!TTI->shouldExpandReduction(II)) - continue; - // Propagate FMF using the builder. - FastMathFlags FMF = - isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{}; - IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); - Builder.setFastMathFlags(FMF); - Value *Rdx = - IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK) - : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); II->replaceAllUsesWith(Rdx); II->eraseFromParent(); Changed = true; |