diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/ExpandReductions.cpp | 167 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/TargetPassConfig.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/LoopUtils.cpp | 9 |
6 files changed, 182 insertions, 5 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index a73fe65e965..805b645eaca 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -505,6 +505,9 @@ bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode, return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags); } +bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { + return TTIImpl->shouldExpandReduction(II); +} TargetTransformInfo::Concept::~Concept() {} diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 26da748fa24..1cdfd773a32 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_library(LLVMCodeGen ExecutionDepsFix.cpp ExpandISelPseudos.cpp ExpandPostRAPseudos.cpp + ExpandReductions.cpp FaultMaps.cpp FEntryInserter.cpp FuncletLayout.cpp diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp new file mode 100644 index 00000000000..a40ea28056d --- /dev/null +++ b/llvm/lib/CodeGen/ExpandReductions.cpp @@ -0,0 +1,167 @@ +//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements IR expansion for reduction intrinsics, allowing targets +// to enable the experimental intrinsics until just before codegen. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/ExpandReductions.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace { + +unsigned getOpcode(Intrinsic::ID ID) { + switch (ID) { + case Intrinsic::experimental_vector_reduce_fadd: + return Instruction::FAdd; + case Intrinsic::experimental_vector_reduce_fmul: + return Instruction::FMul; + case Intrinsic::experimental_vector_reduce_add: + return Instruction::Add; + case Intrinsic::experimental_vector_reduce_mul: + return Instruction::Mul; + case Intrinsic::experimental_vector_reduce_and: + return Instruction::And; + case Intrinsic::experimental_vector_reduce_or: + return Instruction::Or; + case Intrinsic::experimental_vector_reduce_xor: + return Instruction::Xor; + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::experimental_vector_reduce_umin: + return Instruction::ICmp; + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: + return Instruction::FCmp; + default: + llvm_unreachable("Unexpected ID"); + } +} + +RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) { + switch (ID) { + case Intrinsic::experimental_vector_reduce_smax: + return RecurrenceDescriptor::MRK_SIntMax; + case Intrinsic::experimental_vector_reduce_smin: + return RecurrenceDescriptor::MRK_SIntMin; + case Intrinsic::experimental_vector_reduce_umax: + return RecurrenceDescriptor::MRK_UIntMax; + case Intrinsic::experimental_vector_reduce_umin: + return RecurrenceDescriptor::MRK_UIntMin; + case Intrinsic::experimental_vector_reduce_fmax: + return RecurrenceDescriptor::MRK_FloatMax; + case Intrinsic::experimental_vector_reduce_fmin: + return RecurrenceDescriptor::MRK_FloatMin; + default: + return RecurrenceDescriptor::MRK_Invalid; + } +} + +bool expandReductions(Function &F, const TargetTransformInfo *TTI) { + bool Changed = false; + SmallVector<IntrinsicInst*, 4> Worklist; + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + if (auto II = dyn_cast<IntrinsicInst>(&*I)) + Worklist.push_back(II); + + for (auto *II : Worklist) { + IRBuilder<> Builder(II); + Value *Vec = nullptr; + auto ID = II->getIntrinsicID(); + auto MRK = RecurrenceDescriptor::MRK_Invalid; + switch (ID) { + case Intrinsic::experimental_vector_reduce_fadd: + case Intrinsic::experimental_vector_reduce_fmul: + // FMFs must be attached to the call, otherwise it's an ordered reduction + // and it can't be handled by generating this shuffle sequence. + // TODO: Implement scalarization of ordered reductions here for targets + // without native support. + if (!II->getFastMathFlags().unsafeAlgebra()) + continue; + Vec = II->getArgOperand(1); + break; + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: + Vec = II->getArgOperand(0); + MRK = getMRK(ID); + break; + default: + continue; + } + if (!TTI->shouldExpandReduction(II)) + continue; + auto Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); + II->replaceAllUsesWith(Rdx); + II->eraseFromParent(); + Changed = true; + } + return Changed; +} + +class ExpandReductions : public FunctionPass { +public: + static char ID; + ExpandReductions() : FunctionPass(ID) { + initializeExpandReductionsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + return expandReductions(F, TTI); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.setPreservesCFG(); + } +}; +} + +char ExpandReductions::ID; +INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions", + "Expand reduction intrinsics", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(ExpandReductions, "expand-reductions", + "Expand reduction intrinsics", false, false) + +FunctionPass *llvm::createExpandReductionsPass() { + return new ExpandReductions(); +} + +PreservedAnalyses ExpandReductionsPass::run(Function &F, + FunctionAnalysisManager &AM) { + const auto &TTI = AM.getResult<TargetIRAnalysis>(F); + if (!expandReductions(F, &TTI)) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 150195f5f85..cbe37c40834 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -487,6 +487,9 @@ void TargetPassConfig::addIRPasses() { // Insert calls to mcount-like functions. addPass(createCountingFunctionInserterPass()); + + // Expand reduction intrinsics into shuffle sequences if the target wants to. + addPass(createExpandReductionsPass()); } /// Turn exception handling constructs into something the code generators can diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index a8cf2ccf106..39258115dcb 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -137,6 +137,10 @@ public: unsigned getMinPrefetchStride(); unsigned getMaxPrefetchIterationsAhead(); + + bool shouldExpandReduction(const IntrinsicInst *II) const { + return false; + } /// @} }; diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index d41fe6267a0..81f033e7d51 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1125,11 +1125,10 @@ static Value *addFastMathFlag(Value *V) { } // Helper to generate a log2 shuffle reduction. -static Value * -getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, - RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind = - RecurrenceDescriptor::MRK_Invalid, - ArrayRef<Value *> RedOps = ArrayRef<Value *>()) { +Value * +llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, + RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, + ArrayRef<Value *> RedOps) { unsigned VF = Src->getType()->getVectorNumElements(); // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles // and vector ops, reducing the set of values being computed by half each |

