Change semantics of fadd/fmul vector reductions.

This patch changes how LLVM handles the accumulator/start value in the reduction, by never ignoring it regardless of the presence of fast-math flags on callsites. This change introduces the following new intrinsics to replace the existing ones: llvm.experimental.vector.reduce.fadd -> llvm.experimental.vector.reduce.v2.fadd llvm.experimental.vector.reduce.fmul -> llvm.experimental.vector.reduce.v2.fmul and adds functionality to auto-upgrade existing LLVM IR and bitcode. Reviewers: RKSimon, greened, dmgreen, nikic, simoll, aemerson Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D60261 llvm-svn: 363035
author: Sander de Smalen <sander.desmalen@arm.com> 2019-06-11 08:22:10 +0000
committer: Sander de Smalen <sander.desmalen@arm.com> 2019-06-11 08:22:10 +0000
commit: cbeb563cfb1752044fb8771586ae9bbd89d2a07b (patch)
tree: dd9dec7d2ce2d7f949c97d9624df5ea1bbbf551d /llvm/lib/CodeGen/ExpandReductions.cpp
parent: e2acbeb94cf28cf6a8c82e09073df79aa1e846be (diff)
download: bcm5719-llvm-cbeb563cfb1752044fb8771586ae9bbd89d2a07b.tar.gz
bcm5719-llvm-cbeb563cfb1752044fb8771586ae9bbd89d2a07b.zip
1 files changed, 29 insertions, 28 deletions
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp
index 340ee19c339..1069a2423b8 100644
--- a/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -29,9 +29,9 @@ namespace {
 
 unsigned getOpcode(Intrinsic::ID ID) {
   switch (ID) {
-  case Intrinsic::experimental_vector_reduce_fadd:
+  case Intrinsic::experimental_vector_reduce_v2_fadd:
     return Instruction::FAdd;
-  case Intrinsic::experimental_vector_reduce_fmul:
+  case Intrinsic::experimental_vector_reduce_v2_fmul:
     return Instruction::FMul;
   case Intrinsic::experimental_vector_reduce_add:
     return Instruction::Add;
@@ -83,22 +83,33 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
       Worklist.push_back(II);
 
   for (auto *II : Worklist) {
+    if (!TTI->shouldExpandReduction(II))
+      continue;
+
+    FastMathFlags FMF =
+        isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
+    Intrinsic::ID ID = II->getIntrinsicID();
+    RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
+
+    Value *Rdx = nullptr;
     IRBuilder<> Builder(II);
-    bool IsOrdered = false;
-    Value *Acc = nullptr;
-    Value *Vec = nullptr;
-    auto ID = II->getIntrinsicID();
-    auto MRK = RecurrenceDescriptor::MRK_Invalid;
+    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+    Builder.setFastMathFlags(FMF);
     switch (ID) {
-    case Intrinsic::experimental_vector_reduce_fadd:
-    case Intrinsic::experimental_vector_reduce_fmul:
+    case Intrinsic::experimental_vector_reduce_v2_fadd:
+    case Intrinsic::experimental_vector_reduce_v2_fmul: {
       // FMFs must be attached to the call, otherwise it's an ordered reduction
       // and it can't be handled by generating a shuffle sequence.
-      if (!II->getFastMathFlags().isFast())
-        IsOrdered = true;
-      Acc = II->getArgOperand(0);
-      Vec = II->getArgOperand(1);
-      break;
+      Value *Acc = II->getArgOperand(0);
+      Value *Vec = II->getArgOperand(1);
+      if (!FMF.allowReassoc())
+        Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
+      else {
+        Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+        Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
+                                  Acc, Rdx, "bin.rdx");
+      }
+    } break;
     case Intrinsic::experimental_vector_reduce_add:
     case Intrinsic::experimental_vector_reduce_mul:
     case Intrinsic::experimental_vector_reduce_and:
@@ -109,23 +120,13 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
     case Intrinsic::experimental_vector_reduce_umax:
     case Intrinsic::experimental_vector_reduce_umin:
     case Intrinsic::experimental_vector_reduce_fmax:
-    case Intrinsic::experimental_vector_reduce_fmin:
-      Vec = II->getArgOperand(0);
-      MRK = getMRK(ID);
-      break;
+    case Intrinsic::experimental_vector_reduce_fmin: {
+      Value *Vec = II->getArgOperand(0);
+      Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+    } break;
     default:
       continue;
     }
-    if (!TTI->shouldExpandReduction(II))
-      continue;
-    // Propagate FMF using the builder.
-    FastMathFlags FMF =
-        isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
-    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
-    Builder.setFastMathFlags(FMF);
-    Value *Rdx =
-        IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
-                  : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
     II->replaceAllUsesWith(Rdx);
     II->eraseFromParent();
     Changed = true;
author	Sander de Smalen <sander.desmalen@arm.com>	2019-06-11 08:22:10 +0000
committer	Sander de Smalen <sander.desmalen@arm.com>	2019-06-11 08:22:10 +0000
commit	cbeb563cfb1752044fb8771586ae9bbd89d2a07b (patch)
tree	dd9dec7d2ce2d7f949c97d9624df5ea1bbbf551d /llvm/lib/CodeGen/ExpandReductions.cpp
parent	e2acbeb94cf28cf6a8c82e09073df79aa1e846be (diff)
download	bcm5719-llvm-cbeb563cfb1752044fb8771586ae9bbd89d2a07b.tar.gz bcm5719-llvm-cbeb563cfb1752044fb8771586ae9bbd89d2a07b.zip