5 files changed, 87 insertions, 47 deletions
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp
index 340ee19c339..1069a2423b8 100644
--- a/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -29,9 +29,9 @@ namespace {
 
 unsigned getOpcode(Intrinsic::ID ID) {
   switch (ID) {
-  case Intrinsic::experimental_vector_reduce_fadd:
+  case Intrinsic::experimental_vector_reduce_v2_fadd:
     return Instruction::FAdd;
-  case Intrinsic::experimental_vector_reduce_fmul:
+  case Intrinsic::experimental_vector_reduce_v2_fmul:
     return Instruction::FMul;
   case Intrinsic::experimental_vector_reduce_add:
     return Instruction::Add;
@@ -83,22 +83,33 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
       Worklist.push_back(II);
 
   for (auto *II : Worklist) {
+    if (!TTI->shouldExpandReduction(II))
+      continue;
+
+    FastMathFlags FMF =
+        isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
+    Intrinsic::ID ID = II->getIntrinsicID();
+    RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
+
+    Value *Rdx = nullptr;
     IRBuilder<> Builder(II);
-    bool IsOrdered = false;
-    Value *Acc = nullptr;
-    Value *Vec = nullptr;
-    auto ID = II->getIntrinsicID();
-    auto MRK = RecurrenceDescriptor::MRK_Invalid;
+    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+    Builder.setFastMathFlags(FMF);
     switch (ID) {
-    case Intrinsic::experimental_vector_reduce_fadd:
-    case Intrinsic::experimental_vector_reduce_fmul:
+    case Intrinsic::experimental_vector_reduce_v2_fadd:
+    case Intrinsic::experimental_vector_reduce_v2_fmul: {
       // FMFs must be attached to the call, otherwise it's an ordered reduction
       // and it can't be handled by generating a shuffle sequence.
-      if (!II->getFastMathFlags().isFast())
-        IsOrdered = true;
-      Acc = II->getArgOperand(0);
-      Vec = II->getArgOperand(1);
-      break;
+      Value *Acc = II->getArgOperand(0);
+      Value *Vec = II->getArgOperand(1);
+      if (!FMF.allowReassoc())
+        Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
+      else {
+        Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+        Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
+                                  Acc, Rdx, "bin.rdx");
+      }
+    } break;
     case Intrinsic::experimental_vector_reduce_add:
     case Intrinsic::experimental_vector_reduce_mul:
     case Intrinsic::experimental_vector_reduce_and:
@@ -109,23 +120,13 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
     case Intrinsic::experimental_vector_reduce_umax:
     case Intrinsic::experimental_vector_reduce_umin:
     case Intrinsic::experimental_vector_reduce_fmax:
-    case Intrinsic::experimental_vector_reduce_fmin:
-      Vec = II->getArgOperand(0);
-      MRK = getMRK(ID);
-      break;
+    case Intrinsic::experimental_vector_reduce_fmin: {
+      Value *Vec = II->getArgOperand(0);
+      Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+    } break;
     default:
       continue;
     }
-    if (!TTI->shouldExpandReduction(II))
-      continue;
-    // Propagate FMF using the builder.
-    FastMathFlags FMF =
-        isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
-    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
-    Builder.setFastMathFlags(FMF);
-    Value *Rdx =
-        IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
-                  : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
     II->replaceAllUsesWith(Rdx);
     II->eraseFromParent();
     Changed = true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 95b429b915c..0ad5bf70e31 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6736,8 +6736,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     LowerDeoptimizeCall(&I);
     return;
 
-  case Intrinsic::experimental_vector_reduce_fadd:
-  case Intrinsic::experimental_vector_reduce_fmul:
+  case Intrinsic::experimental_vector_reduce_v2_fadd:
+  case Intrinsic::experimental_vector_reduce_v2_fmul:
   case Intrinsic::experimental_vector_reduce_add:
   case Intrinsic::experimental_vector_reduce_mul:
   case Intrinsic::experimental_vector_reduce_and:
@@ -8795,15 +8795,17 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
     FMF = I.getFastMathFlags();
 
   switch (Intrinsic) {
-  case Intrinsic::experimental_vector_reduce_fadd:
-    if (FMF.isFast())
-      Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2);
+  case Intrinsic::experimental_vector_reduce_v2_fadd:
+    if (FMF.allowReassoc())
+      Res = DAG.getNode(ISD::FADD, dl, VT, Op1,
+                        DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2));
     else
       Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
     break;
-  case Intrinsic::experimental_vector_reduce_fmul:
-    if (FMF.isFast())
-      Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2);
+  case Intrinsic::experimental_vector_reduce_v2_fmul:
+    if (FMF.allowReassoc())
+      Res = DAG.getNode(ISD::FMUL, dl, VT, Op1,
+                        DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2));
     else
       Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
     break;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index e6a096a8855..e8ecee858d7 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -602,6 +602,26 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     }
     break;
   }
+  case 'e': {
+    SmallVector<StringRef, 2> Groups;
+    Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
+    if (R.match(Name, &Groups)) {
+      Intrinsic::ID ID = Intrinsic::not_intrinsic;
+      if (Groups[1] == "fadd")
+        ID = Intrinsic::experimental_vector_reduce_v2_fadd;
+      if (Groups[1] == "fmul")
+        ID = Intrinsic::experimental_vector_reduce_v2_fmul;
+
+      if (ID != Intrinsic::not_intrinsic) {
+        rename(F);
+        auto Args = F->getFunctionType()->params();
+        Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]};
+        NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
+        return true;
+      }
+    }
+    break;
+  }
   case 'i':
   case 'l': {
     bool IsLifetimeStart = Name.startswith("lifetime.start");
@@ -3467,7 +3487,28 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     DefaultCase();
     return;
   }
-
+  case Intrinsic::experimental_vector_reduce_v2_fmul: {
+    SmallVector<Value *, 2> Args;
+    if (CI->isFast())
+      Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0));
+    else
+      Args.push_back(CI->getOperand(0));
+    Args.push_back(CI->getOperand(1));
+    NewCall = Builder.CreateCall(NewFn, Args);
+    cast<Instruction>(NewCall)->copyFastMathFlags(CI);
+    break;
+  }
+  case Intrinsic::experimental_vector_reduce_v2_fadd: {
+    SmallVector<Value *, 2> Args;
+    if (CI->isFast())
+      Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType()));
+    else
+      Args.push_back(CI->getOperand(0));
+    Args.push_back(CI->getOperand(1));
+    NewCall = Builder.CreateCall(NewFn, Args);
+    cast<Instruction>(NewCall)->copyFastMathFlags(CI);
+    break;
+  }
   case Intrinsic::arm_neon_vld1:
   case Intrinsic::arm_neon_vld2:
   case Intrinsic::arm_neon_vld3:
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 68aa18e3f5b..36c823e7a10 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -323,7 +323,7 @@ CallInst *IRBuilderBase::CreateFAddReduce(Value *Acc, Value *Src) {
   Value *Ops[] = {Acc, Src};
   Type *Tys[] = {Acc->getType(), Src->getType()};
   auto Decl = Intrinsic::getDeclaration(
-      M, Intrinsic::experimental_vector_reduce_fadd, Tys);
+      M, Intrinsic::experimental_vector_reduce_v2_fadd, Tys);
   return createCallHelper(Decl, Ops, this);
 }
 
@@ -332,7 +332,7 @@ CallInst *IRBuilderBase::CreateFMulReduce(Value *Acc, Value *Src) {
   Value *Ops[] = {Acc, Src};
   Type *Tys[] = {Acc->getType(), Src->getType()};
   auto Decl = Intrinsic::getDeclaration(
-      M, Intrinsic::experimental_vector_reduce_fmul, Tys);
+      M, Intrinsic::experimental_vector_reduce_v2_fmul, Tys);
   return createCallHelper(Decl, Ops, this);
 }
 
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 03d84c39b66..29ae77c385d 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -801,13 +801,9 @@ Value *llvm::createSimpleTargetReduction(
     ArrayRef<Value *> RedOps) {
   assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
 
-  Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
   std::function<Value *()> BuildFunc;
   using RD = RecurrenceDescriptor;
   RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
-  // TODO: Support creating ordered reductions.
-  FastMathFlags FMFFast;
-  FMFFast.setFast();
 
   switch (Opcode) {
   case Instruction::Add:
@@ -827,15 +823,15 @@ Value *llvm::createSimpleTargetReduction(
     break;
   case Instruction::FAdd:
     BuildFunc = [&]() {
-      auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src);
-      cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
+      auto Rdx = Builder.CreateFAddReduce(
+          Constant::getNullValue(Src->getType()->getVectorElementType()), Src);
       return Rdx;
     };
     break;
   case Instruction::FMul:
     BuildFunc = [&]() {
-      auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src);
-      cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
+      Type *Ty = Src->getType()->getVectorElementType();
+      auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src);
       return Rdx;
     };
     break;