diff options
author | Craig Topper <craig.topper@intel.com> | 2019-11-14 10:02:51 -0800 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2019-11-14 10:26:53 -0800 |
commit | 17bb2d7c803db4cc52ebfb95e627b92eeccf302a (patch) | |
tree | 7097bee5a7d5e37a5a61c4edc6b5a8a0363e7592 /llvm/lib/CodeGen/ExpandReductions.cpp | |
parent | 4ee70e00b509fe26bac4196df76dc7c6153f1206 (diff) | |
download | bcm5719-llvm-17bb2d7c803db4cc52ebfb95e627b92eeccf302a.tar.gz bcm5719-llvm-17bb2d7c803db4cc52ebfb95e627b92eeccf302a.zip |
[ExpandReductions] Don't push all intrinsics to the worklist. Just push reductions.
We were previously pushing all intrinsics used in a function to the
worklist. This is wasteful for memory in a function with a lot of
intrinsics.
We also ask TTI if we should expand every intrinsic, but we only
have expansion support for the reduction intrinsics. This just
wastes time for the non-reduction intrinsics.
This patch only pushes reduction intrinsics into the worklist and
skips other intrinsics.
Differential Revision: https://reviews.llvm.org/D69470
Diffstat (limited to 'llvm/lib/CodeGen/ExpandReductions.cpp')
-rw-r--r-- | llvm/lib/CodeGen/ExpandReductions.cpp | 39 |
1 files changed, 29 insertions, 10 deletions
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp index 2bd510398e7..4ccf1d2c8c5 100644 --- a/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/llvm/lib/CodeGen/ExpandReductions.cpp @@ -79,14 +79,32 @@ RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) { bool expandReductions(Function &F, const TargetTransformInfo *TTI) { bool Changed = false; SmallVector<IntrinsicInst *, 4> Worklist; - for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) - if (auto II = dyn_cast<IntrinsicInst>(&*I)) - Worklist.push_back(II); + for (auto &I : instructions(F)) { + if (auto *II = dyn_cast<IntrinsicInst>(&I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::experimental_vector_reduce_v2_fmul: + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: + if (TTI->shouldExpandReduction(II)) + Worklist.push_back(II); + + break; + } + } + } for (auto *II : Worklist) { - if (!TTI->shouldExpandReduction(II)) - continue; - FastMathFlags FMF = isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{}; Intrinsic::ID ID = II->getIntrinsicID(); @@ -97,6 +115,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.setFastMathFlags(FMF); switch (ID) { + default: llvm_unreachable("Unexpected intrinsic!"); case Intrinsic::experimental_vector_reduce_v2_fadd: case Intrinsic::experimental_vector_reduce_v2_fmul: { // FMFs must be attached to the call, otherwise it's an ordered reduction @@ -113,7 +132,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID), Acc, Rdx, "bin.rdx"); } - } break; + break; + } case Intrinsic::experimental_vector_reduce_add: case Intrinsic::experimental_vector_reduce_mul: case Intrinsic::experimental_vector_reduce_and: @@ -130,9 +150,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { continue; Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); - } break; - default: - continue; + break; + } } II->replaceAllUsesWith(Rdx); II->eraseFromParent(); |