summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/ExpandReductions.cpp
diff options
context:
space:
mode:
authorSander de Smalen <sander.desmalen@arm.com>2019-06-11 08:22:10 +0000
committerSander de Smalen <sander.desmalen@arm.com>2019-06-11 08:22:10 +0000
commitcbeb563cfb1752044fb8771586ae9bbd89d2a07b (patch)
treedd9dec7d2ce2d7f949c97d9624df5ea1bbbf551d /llvm/lib/CodeGen/ExpandReductions.cpp
parente2acbeb94cf28cf6a8c82e09073df79aa1e846be (diff)
downloadbcm5719-llvm-cbeb563cfb1752044fb8771586ae9bbd89d2a07b.tar.gz
bcm5719-llvm-cbeb563cfb1752044fb8771586ae9bbd89d2a07b.zip
Change semantics of fadd/fmul vector reductions.
This patch changes how LLVM handles the accumulator/start value in the reduction, by never ignoring it regardless of the presence of fast-math flags on callsites. This change introduces the following new intrinsics to replace the existing ones: llvm.experimental.vector.reduce.fadd -> llvm.experimental.vector.reduce.v2.fadd llvm.experimental.vector.reduce.fmul -> llvm.experimental.vector.reduce.v2.fmul and adds functionality to auto-upgrade existing LLVM IR and bitcode. Reviewers: RKSimon, greened, dmgreen, nikic, simoll, aemerson Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D60261 llvm-svn: 363035
Diffstat (limited to 'llvm/lib/CodeGen/ExpandReductions.cpp')
-rw-r--r--llvm/lib/CodeGen/ExpandReductions.cpp57
1 files changed, 29 insertions, 28 deletions
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp
index 340ee19c339..1069a2423b8 100644
--- a/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -29,9 +29,9 @@ namespace {
unsigned getOpcode(Intrinsic::ID ID) {
switch (ID) {
- case Intrinsic::experimental_vector_reduce_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
return Instruction::FAdd;
- case Intrinsic::experimental_vector_reduce_fmul:
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
return Instruction::FMul;
case Intrinsic::experimental_vector_reduce_add:
return Instruction::Add;
@@ -83,22 +83,33 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
Worklist.push_back(II);
for (auto *II : Worklist) {
+ if (!TTI->shouldExpandReduction(II))
+ continue;
+
+ FastMathFlags FMF =
+ isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
+ Intrinsic::ID ID = II->getIntrinsicID();
+ RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
+
+ Value *Rdx = nullptr;
IRBuilder<> Builder(II);
- bool IsOrdered = false;
- Value *Acc = nullptr;
- Value *Vec = nullptr;
- auto ID = II->getIntrinsicID();
- auto MRK = RecurrenceDescriptor::MRK_Invalid;
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ Builder.setFastMathFlags(FMF);
switch (ID) {
- case Intrinsic::experimental_vector_reduce_fadd:
- case Intrinsic::experimental_vector_reduce_fmul:
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fmul: {
// FMFs must be attached to the call, otherwise it's an ordered reduction
// and it can't be handled by generating a shuffle sequence.
- if (!II->getFastMathFlags().isFast())
- IsOrdered = true;
- Acc = II->getArgOperand(0);
- Vec = II->getArgOperand(1);
- break;
+ Value *Acc = II->getArgOperand(0);
+ Value *Vec = II->getArgOperand(1);
+ if (!FMF.allowReassoc())
+ Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
+ else {
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
+ Acc, Rdx, "bin.rdx");
+ }
+ } break;
case Intrinsic::experimental_vector_reduce_add:
case Intrinsic::experimental_vector_reduce_mul:
case Intrinsic::experimental_vector_reduce_and:
@@ -109,23 +120,13 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin:
- Vec = II->getArgOperand(0);
- MRK = getMRK(ID);
- break;
+ case Intrinsic::experimental_vector_reduce_fmin: {
+ Value *Vec = II->getArgOperand(0);
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ } break;
default:
continue;
}
- if (!TTI->shouldExpandReduction(II))
- continue;
- // Propagate FMF using the builder.
- FastMathFlags FMF =
- isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
- IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
- Builder.setFastMathFlags(FMF);
- Value *Rdx =
- IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
- : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
II->replaceAllUsesWith(Rdx);
II->eraseFromParent();
Changed = true;
OpenPOWER on IntegriCloud