summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/ExpandReductions.cpp
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-11-14 10:02:51 -0800
committerCraig Topper <craig.topper@intel.com>2019-11-14 10:26:53 -0800
commit17bb2d7c803db4cc52ebfb95e627b92eeccf302a (patch)
tree7097bee5a7d5e37a5a61c4edc6b5a8a0363e7592 /llvm/lib/CodeGen/ExpandReductions.cpp
parent4ee70e00b509fe26bac4196df76dc7c6153f1206 (diff)
downloadbcm5719-llvm-17bb2d7c803db4cc52ebfb95e627b92eeccf302a.tar.gz
bcm5719-llvm-17bb2d7c803db4cc52ebfb95e627b92eeccf302a.zip
[ExpandReductions] Don't push all intrinsics to the worklist. Just push reductions.
We were previously pushing all intrinsics used in a function to the worklist. This is wasteful for memory in a function with a lot of intrinsics. We also ask TTI if we should expand every intrinsic, but we only have expansion support for the reduction intrinsics. This just wastes time for the non-reduction intrinsics. This patch only pushes reduction intrinsics into the worklist and skips other intrinsics. Differential Revision: https://reviews.llvm.org/D69470
Diffstat (limited to 'llvm/lib/CodeGen/ExpandReductions.cpp')
-rw-r--r--llvm/lib/CodeGen/ExpandReductions.cpp39
1 files changed, 29 insertions, 10 deletions
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp
index 2bd510398e7..4ccf1d2c8c5 100644
--- a/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -79,14 +79,32 @@ RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
bool Changed = false;
SmallVector<IntrinsicInst *, 4> Worklist;
- for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
- if (auto II = dyn_cast<IntrinsicInst>(&*I))
- Worklist.push_back(II);
+ for (auto &I : instructions(F)) {
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
+ case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::experimental_vector_reduce_fmax:
+ case Intrinsic::experimental_vector_reduce_fmin:
+ if (TTI->shouldExpandReduction(II))
+ Worklist.push_back(II);
+
+ break;
+ }
+ }
+ }
for (auto *II : Worklist) {
- if (!TTI->shouldExpandReduction(II))
- continue;
-
FastMathFlags FMF =
isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
Intrinsic::ID ID = II->getIntrinsicID();
@@ -97,6 +115,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
Builder.setFastMathFlags(FMF);
switch (ID) {
+ default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::experimental_vector_reduce_v2_fadd:
case Intrinsic::experimental_vector_reduce_v2_fmul: {
// FMFs must be attached to the call, otherwise it's an ordered reduction
@@ -113,7 +132,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
Acc, Rdx, "bin.rdx");
}
- } break;
+ break;
+ }
case Intrinsic::experimental_vector_reduce_add:
case Intrinsic::experimental_vector_reduce_mul:
case Intrinsic::experimental_vector_reduce_and:
@@ -130,9 +150,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
continue;
Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
- } break;
- default:
- continue;
+ break;
+ }
}
II->replaceAllUsesWith(Rdx);
II->eraseFromParent();
OpenPOWER on IntegriCloud