diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/ExpandReductions.cpp | 57 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 18 | ||||
-rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 43 | ||||
-rw-r--r-- | llvm/lib/IR/IRBuilder.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopUtils.cpp | 12 |
5 files changed, 87 insertions, 47 deletions
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp index 340ee19c339..1069a2423b8 100644 --- a/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/llvm/lib/CodeGen/ExpandReductions.cpp @@ -29,9 +29,9 @@ namespace { unsigned getOpcode(Intrinsic::ID ID) { switch (ID) { - case Intrinsic::experimental_vector_reduce_fadd: + case Intrinsic::experimental_vector_reduce_v2_fadd: return Instruction::FAdd; - case Intrinsic::experimental_vector_reduce_fmul: + case Intrinsic::experimental_vector_reduce_v2_fmul: return Instruction::FMul; case Intrinsic::experimental_vector_reduce_add: return Instruction::Add; @@ -83,22 +83,33 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { Worklist.push_back(II); for (auto *II : Worklist) { + if (!TTI->shouldExpandReduction(II)) + continue; + + FastMathFlags FMF = + isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{}; + Intrinsic::ID ID = II->getIntrinsicID(); + RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID); + + Value *Rdx = nullptr; IRBuilder<> Builder(II); - bool IsOrdered = false; - Value *Acc = nullptr; - Value *Vec = nullptr; - auto ID = II->getIntrinsicID(); - auto MRK = RecurrenceDescriptor::MRK_Invalid; + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(FMF); switch (ID) { - case Intrinsic::experimental_vector_reduce_fadd: - case Intrinsic::experimental_vector_reduce_fmul: + case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::experimental_vector_reduce_v2_fmul: { // FMFs must be attached to the call, otherwise it's an ordered reduction // and it can't be handled by generating a shuffle sequence. - if (!II->getFastMathFlags().isFast()) - IsOrdered = true; - Acc = II->getArgOperand(0); - Vec = II->getArgOperand(1); - break; + Value *Acc = II->getArgOperand(0); + Value *Vec = II->getArgOperand(1); + if (!FMF.allowReassoc()) + Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK); + else { + Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); + Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID), + Acc, Rdx, "bin.rdx"); + } + } break; case Intrinsic::experimental_vector_reduce_add: case Intrinsic::experimental_vector_reduce_mul: case Intrinsic::experimental_vector_reduce_and: @@ -109,23 +120,13 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umin: case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: - Vec = II->getArgOperand(0); - MRK = getMRK(ID); - break; + case Intrinsic::experimental_vector_reduce_fmin: { + Value *Vec = II->getArgOperand(0); + Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); + } break; default: continue; } - if (!TTI->shouldExpandReduction(II)) - continue; - // Propagate FMF using the builder. - FastMathFlags FMF = - isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{}; - IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); - Builder.setFastMathFlags(FMF); - Value *Rdx = - IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK) - : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); II->replaceAllUsesWith(Rdx); II->eraseFromParent(); Changed = true; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 95b429b915c..0ad5bf70e31 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6736,8 +6736,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, LowerDeoptimizeCall(&I); return; - case Intrinsic::experimental_vector_reduce_fadd: - case Intrinsic::experimental_vector_reduce_fmul: + case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::experimental_vector_reduce_v2_fmul: case Intrinsic::experimental_vector_reduce_add: case Intrinsic::experimental_vector_reduce_mul: case Intrinsic::experimental_vector_reduce_and: @@ -8795,15 +8795,17 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, FMF = I.getFastMathFlags(); switch (Intrinsic) { - case Intrinsic::experimental_vector_reduce_fadd: - if (FMF.isFast()) - Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2); + case Intrinsic::experimental_vector_reduce_v2_fadd: + if (FMF.allowReassoc()) + Res = DAG.getNode(ISD::FADD, dl, VT, Op1, + DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2)); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); break; - case Intrinsic::experimental_vector_reduce_fmul: - if (FMF.isFast()) - Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2); + case Intrinsic::experimental_vector_reduce_v2_fmul: + if (FMF.allowReassoc()) + Res = DAG.getNode(ISD::FMUL, dl, VT, Op1, + DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2)); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); break; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index e6a096a8855..e8ecee858d7 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -602,6 +602,26 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } break; } + case 'e': { + SmallVector<StringRef, 2> Groups; + Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+"); + if (R.match(Name, &Groups)) { + Intrinsic::ID ID = Intrinsic::not_intrinsic; + if (Groups[1] == "fadd") + ID = Intrinsic::experimental_vector_reduce_v2_fadd; + if (Groups[1] == "fmul") + ID = Intrinsic::experimental_vector_reduce_v2_fmul; + + if (ID != Intrinsic::not_intrinsic) { + rename(F); + auto Args = F->getFunctionType()->params(); + Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]}; + NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys); + return true; + } + } + break; + } case 'i': case 'l': { bool IsLifetimeStart = Name.startswith("lifetime.start"); @@ -3467,7 +3487,28 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { DefaultCase(); return; } - + case Intrinsic::experimental_vector_reduce_v2_fmul: { + SmallVector<Value *, 2> Args; + if (CI->isFast()) + Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0)); + else + Args.push_back(CI->getOperand(0)); + Args.push_back(CI->getOperand(1)); + NewCall = Builder.CreateCall(NewFn, Args); + cast<Instruction>(NewCall)->copyFastMathFlags(CI); + break; + } + case Intrinsic::experimental_vector_reduce_v2_fadd: { + SmallVector<Value *, 2> Args; + if (CI->isFast()) + Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType())); + else + Args.push_back(CI->getOperand(0)); + Args.push_back(CI->getOperand(1)); + NewCall = Builder.CreateCall(NewFn, Args); + cast<Instruction>(NewCall)->copyFastMathFlags(CI); + break; + } case Intrinsic::arm_neon_vld1: case Intrinsic::arm_neon_vld2: case Intrinsic::arm_neon_vld3: diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 68aa18e3f5b..36c823e7a10 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -323,7 +323,7 @@ CallInst *IRBuilderBase::CreateFAddReduce(Value *Acc, Value *Src) { Value *Ops[] = {Acc, Src}; Type *Tys[] = {Acc->getType(), Src->getType()}; auto Decl = Intrinsic::getDeclaration( - M, Intrinsic::experimental_vector_reduce_fadd, Tys); + M, Intrinsic::experimental_vector_reduce_v2_fadd, Tys); return createCallHelper(Decl, Ops, this); } @@ -332,7 +332,7 @@ CallInst *IRBuilderBase::CreateFMulReduce(Value *Acc, Value *Src) { Value *Ops[] = {Acc, Src}; Type *Tys[] = {Acc->getType(), Src->getType()}; auto Decl = Intrinsic::getDeclaration( - M, Intrinsic::experimental_vector_reduce_fmul, Tys); + M, Intrinsic::experimental_vector_reduce_v2_fmul, Tys); return createCallHelper(Decl, Ops, this); } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 03d84c39b66..29ae77c385d 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -801,13 +801,9 @@ Value *llvm::createSimpleTargetReduction( ArrayRef<Value *> RedOps) { assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); - Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); std::function<Value *()> BuildFunc; using RD = RecurrenceDescriptor; RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; - // TODO: Support creating ordered reductions. - FastMathFlags FMFFast; - FMFFast.setFast(); switch (Opcode) { case Instruction::Add: @@ -827,15 +823,15 @@ Value *llvm::createSimpleTargetReduction( break; case Instruction::FAdd: BuildFunc = [&]() { - auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); - cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); + auto Rdx = Builder.CreateFAddReduce( + Constant::getNullValue(Src->getType()->getVectorElementType()), Src); return Rdx; }; break; case Instruction::FMul: BuildFunc = [&]() { - auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); - cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); + Type *Ty = Src->getType()->getVectorElementType(); + auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src); return Rdx; }; break; |