Improve ISel across lane float min/max reduction

In vectorized float min/max reduction code, the final "reduce" step is sub-optimal. In AArch64, this change wll combine : svn0 = vector_shuffle t0, undef<2,3,u,u> fmin = fminnum t0,svn0 svn1 = vector_shuffle fmin, undef<1,u,u,u> cc = setcc fmin, svn1, ole n0 = extract_vector_elt cc, #0 n1 = extract_vector_elt fmin, #0 n2 = extract_vector_elt fmin, #1 result = select n0, n1,n2 into : result = llvm.aarch64.neon.fminnmv t0 This change extends r247575. llvm-svn: 249834
author: Jun Bum Lim <junbuml@codeaurora.org> 2015-10-09 14:11:25 +0000
committer: Jun Bum Lim <junbuml@codeaurora.org> 2015-10-09 14:11:25 +0000
commit: 0aace13d18417fcb22fa69e76008b1423abb4e5d (patch)
tree: 8a2f8c1f84a461a531dc2503bc94b5f97c2dfcbe /llvm/lib
parent: 6269236a8ee11c973a9b14b299677c0509002c27 (diff)
download: bcm5719-llvm-0aace13d18417fcb22fa69e76008b1423abb4e5d.tar.gz
bcm5719-llvm-0aace13d18417fcb22fa69e76008b1423abb4e5d.zip
1 files changed, 47 insertions, 12 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fa8cad82795..a599997f87a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8750,8 +8750,13 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
     return SDValue();
 
   int NumVecElts = VTy.getVectorNumElements();
-  if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)
-    return SDValue();
+  if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
+    if (NumVecElts != 4)
+      return SDValue();
+  } else {
+    if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)
+      return SDValue();
+  }
 
   int NumExpectedSteps = APInt(8, NumVecElts).logBase2();
   SDValue PreOp = OpV;
@@ -8802,6 +8807,8 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
     PreOp = CurOp;
   }
   unsigned Opcode;
+  bool IsIntrinsic = false;
+
   switch (Op) {
   default:
     llvm_unreachable("Unexpected operator for across vector reduction");
@@ -8820,11 +8827,24 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
   case ISD::UMIN:
     Opcode = AArch64ISD::UMINV;
     break;
+  case ISD::FMAXNUM:
+    Opcode = Intrinsic::aarch64_neon_fmaxnmv;
+    IsIntrinsic = true;
+    break;
+  case ISD::FMINNUM:
+    Opcode = Intrinsic::aarch64_neon_fminnmv;
+    IsIntrinsic = true;
+    break;
   }
   SDLoc DL(N);
-  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),
-                     DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp),
-                     DAG.getConstant(0, DL, MVT::i64));
+
+  return IsIntrinsic
+             ? DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
+                           DAG.getConstant(Opcode, DL, MVT::i32), PreOp)
+             : DAG.getNode(
+                   ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),
+                   DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp),
+                   DAG.getConstant(0, DL, MVT::i64));
 }
 
 /// Target-specific DAG combine for the across vector min/max reductions.
@@ -8848,9 +8868,6 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
 ///     becomes :
 ///   %1 = smaxv %0
 ///   %result = extract_vector_elt %1, 0
-/// FIXME: Currently this function matches only SMAXV, UMAXV, SMINV, and UMINV.
-/// We could also support other types of across lane reduction available
-/// in AArch64, including FMAXNMV, FMAXV, FMINNMV, and FMINV.
 static SDValue
 performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
                                         const AArch64Subtarget *Subtarget) {
@@ -8878,17 +8895,26 @@ performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
   SDValue VectorOp = SetCC.getOperand(0);
   unsigned Op = VectorOp->getOpcode();
   // Check if the input vector is fed by the operator we want to handle.
-  if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN && Op != ISD::UMIN)
+  if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN &&
+      Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM)
     return SDValue();
 
   EVT VTy = VectorOp.getValueType();
   if (!VTy.isVector())
     return SDValue();
 
-  EVT EltTy = VTy.getVectorElementType();
-  if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
+  if (VTy.getSizeInBits() < 64)
     return SDValue();
 
+  EVT EltTy = VTy.getVectorElementType();
+  if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
+    if (EltTy != MVT::f32)
+      return SDValue();
+  } else {
+    if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
+      return SDValue();
+  }
+
   // Check if extracting from the same vector.
   // For example,
   //   %sc = setcc %vector, %svn1, gt
@@ -8904,7 +8930,13 @@ performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
   if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) ||
       (Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) ||
       (Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) ||
-      (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE))
+      (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) ||
+      (Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE &&
+       CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT &&
+       CC != ISD::SETGE) ||
+      (Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE &&
+       CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT &&
+       CC != ISD::SETLE))
     return SDValue();
 
   // Expect to check only lane 0 from the vector SETCC.
@@ -8963,6 +8995,9 @@ performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG,
   if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
     return SDValue();
 
+  if (VTy.getSizeInBits() < 64)
+    return SDValue();
+
   return tryMatchAcrossLaneShuffleForReduction(N, N0, ISD::ADD, DAG);
 }
author	Jun Bum Lim <junbuml@codeaurora.org>	2015-10-09 14:11:25 +0000
committer	Jun Bum Lim <junbuml@codeaurora.org>	2015-10-09 14:11:25 +0000
commit	0aace13d18417fcb22fa69e76008b1423abb4e5d (patch)
tree	8a2f8c1f84a461a531dc2503bc94b5f97c2dfcbe /llvm/lib
parent	6269236a8ee11c973a9b14b299677c0509002c27 (diff)
download	bcm5719-llvm-0aace13d18417fcb22fa69e76008b1423abb4e5d.tar.gz bcm5719-llvm-0aace13d18417fcb22fa69e76008b1423abb4e5d.zip