summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/ExpandReductions.cpp57
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp18
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp43
-rw-r--r--llvm/lib/IR/IRBuilder.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp12
5 files changed, 87 insertions, 47 deletions
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp
index 340ee19c339..1069a2423b8 100644
--- a/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -29,9 +29,9 @@ namespace {
unsigned getOpcode(Intrinsic::ID ID) {
switch (ID) {
- case Intrinsic::experimental_vector_reduce_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
return Instruction::FAdd;
- case Intrinsic::experimental_vector_reduce_fmul:
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
return Instruction::FMul;
case Intrinsic::experimental_vector_reduce_add:
return Instruction::Add;
@@ -83,22 +83,33 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
Worklist.push_back(II);
for (auto *II : Worklist) {
+ if (!TTI->shouldExpandReduction(II))
+ continue;
+
+ FastMathFlags FMF =
+ isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
+ Intrinsic::ID ID = II->getIntrinsicID();
+ RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
+
+ Value *Rdx = nullptr;
IRBuilder<> Builder(II);
- bool IsOrdered = false;
- Value *Acc = nullptr;
- Value *Vec = nullptr;
- auto ID = II->getIntrinsicID();
- auto MRK = RecurrenceDescriptor::MRK_Invalid;
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ Builder.setFastMathFlags(FMF);
switch (ID) {
- case Intrinsic::experimental_vector_reduce_fadd:
- case Intrinsic::experimental_vector_reduce_fmul:
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fmul: {
// FMFs must be attached to the call, otherwise it's an ordered reduction
// and it can't be handled by generating a shuffle sequence.
- if (!II->getFastMathFlags().isFast())
- IsOrdered = true;
- Acc = II->getArgOperand(0);
- Vec = II->getArgOperand(1);
- break;
+ Value *Acc = II->getArgOperand(0);
+ Value *Vec = II->getArgOperand(1);
+ if (!FMF.allowReassoc())
+ Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
+ else {
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
+ Acc, Rdx, "bin.rdx");
+ }
+ } break;
case Intrinsic::experimental_vector_reduce_add:
case Intrinsic::experimental_vector_reduce_mul:
case Intrinsic::experimental_vector_reduce_and:
@@ -109,23 +120,13 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin:
- Vec = II->getArgOperand(0);
- MRK = getMRK(ID);
- break;
+ case Intrinsic::experimental_vector_reduce_fmin: {
+ Value *Vec = II->getArgOperand(0);
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ } break;
default:
continue;
}
- if (!TTI->shouldExpandReduction(II))
- continue;
- // Propagate FMF using the builder.
- FastMathFlags FMF =
- isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
- IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
- Builder.setFastMathFlags(FMF);
- Value *Rdx =
- IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
- : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
II->replaceAllUsesWith(Rdx);
II->eraseFromParent();
Changed = true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 95b429b915c..0ad5bf70e31 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6736,8 +6736,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
LowerDeoptimizeCall(&I);
return;
- case Intrinsic::experimental_vector_reduce_fadd:
- case Intrinsic::experimental_vector_reduce_fmul:
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
case Intrinsic::experimental_vector_reduce_add:
case Intrinsic::experimental_vector_reduce_mul:
case Intrinsic::experimental_vector_reduce_and:
@@ -8795,15 +8795,17 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
FMF = I.getFastMathFlags();
switch (Intrinsic) {
- case Intrinsic::experimental_vector_reduce_fadd:
- if (FMF.isFast())
- Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2);
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ if (FMF.allowReassoc())
+ Res = DAG.getNode(ISD::FADD, dl, VT, Op1,
+ DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2));
else
Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
break;
- case Intrinsic::experimental_vector_reduce_fmul:
- if (FMF.isFast())
- Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2);
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
+ if (FMF.allowReassoc())
+ Res = DAG.getNode(ISD::FMUL, dl, VT, Op1,
+ DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2));
else
Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
break;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index e6a096a8855..e8ecee858d7 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -602,6 +602,26 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
break;
}
+ case 'e': {
+ SmallVector<StringRef, 2> Groups;
+ Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
+ if (R.match(Name, &Groups)) {
+ Intrinsic::ID ID = Intrinsic::not_intrinsic;
+ if (Groups[1] == "fadd")
+ ID = Intrinsic::experimental_vector_reduce_v2_fadd;
+ if (Groups[1] == "fmul")
+ ID = Intrinsic::experimental_vector_reduce_v2_fmul;
+
+ if (ID != Intrinsic::not_intrinsic) {
+ rename(F);
+ auto Args = F->getFunctionType()->params();
+ Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]};
+ NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
+ return true;
+ }
+ }
+ break;
+ }
case 'i':
case 'l': {
bool IsLifetimeStart = Name.startswith("lifetime.start");
@@ -3467,7 +3487,28 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
DefaultCase();
return;
}
-
+ case Intrinsic::experimental_vector_reduce_v2_fmul: {
+ SmallVector<Value *, 2> Args;
+ if (CI->isFast())
+ Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0));
+ else
+ Args.push_back(CI->getOperand(0));
+ Args.push_back(CI->getOperand(1));
+ NewCall = Builder.CreateCall(NewFn, Args);
+ cast<Instruction>(NewCall)->copyFastMathFlags(CI);
+ break;
+ }
+ case Intrinsic::experimental_vector_reduce_v2_fadd: {
+ SmallVector<Value *, 2> Args;
+ if (CI->isFast())
+ Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType()));
+ else
+ Args.push_back(CI->getOperand(0));
+ Args.push_back(CI->getOperand(1));
+ NewCall = Builder.CreateCall(NewFn, Args);
+ cast<Instruction>(NewCall)->copyFastMathFlags(CI);
+ break;
+ }
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld2:
case Intrinsic::arm_neon_vld3:
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 68aa18e3f5b..36c823e7a10 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -323,7 +323,7 @@ CallInst *IRBuilderBase::CreateFAddReduce(Value *Acc, Value *Src) {
Value *Ops[] = {Acc, Src};
Type *Tys[] = {Acc->getType(), Src->getType()};
auto Decl = Intrinsic::getDeclaration(
- M, Intrinsic::experimental_vector_reduce_fadd, Tys);
+ M, Intrinsic::experimental_vector_reduce_v2_fadd, Tys);
return createCallHelper(Decl, Ops, this);
}
@@ -332,7 +332,7 @@ CallInst *IRBuilderBase::CreateFMulReduce(Value *Acc, Value *Src) {
Value *Ops[] = {Acc, Src};
Type *Tys[] = {Acc->getType(), Src->getType()};
auto Decl = Intrinsic::getDeclaration(
- M, Intrinsic::experimental_vector_reduce_fmul, Tys);
+ M, Intrinsic::experimental_vector_reduce_v2_fmul, Tys);
return createCallHelper(Decl, Ops, this);
}
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 03d84c39b66..29ae77c385d 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -801,13 +801,9 @@ Value *llvm::createSimpleTargetReduction(
ArrayRef<Value *> RedOps) {
assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
- Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
std::function<Value *()> BuildFunc;
using RD = RecurrenceDescriptor;
RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
- // TODO: Support creating ordered reductions.
- FastMathFlags FMFFast;
- FMFFast.setFast();
switch (Opcode) {
case Instruction::Add:
@@ -827,15 +823,15 @@ Value *llvm::createSimpleTargetReduction(
break;
case Instruction::FAdd:
BuildFunc = [&]() {
- auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src);
- cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
+ auto Rdx = Builder.CreateFAddReduce(
+ Constant::getNullValue(Src->getType()->getVectorElementType()), Src);
return Rdx;
};
break;
case Instruction::FMul:
BuildFunc = [&]() {
- auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src);
- cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
+ Type *Ty = Src->getType()->getVectorElementType();
+ auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src);
return Rdx;
};
break;
OpenPOWER on IntegriCloud