[PowerPC] Emit scalar fp min/max instructions

VSX provides floating point minimum and maximum instructions that conform to IEEE semantics. This legalizes the respective nodes and emits VSX code for them. Furthermore, on Power9 cores we have xsmaxcdp and xsmincdp instructions that conform to language semantics for the conditional operator even in the presence of NaNs. Differential revision: https://reviews.llvm.org/D62993
author: Nemanja Ivanovic <nemanjai@ca.ibm.com> 2019-10-28 18:33:31 -0500
committer: Nemanja Ivanovic <nemanjai@ca.ibm.com> 2019-10-28 19:13:33 -0500
commit: 25a41ad242000520629a274e83db1ea884d1c1e7 (patch)
tree: f3b81de2918112876b709a1d13c34851d14eabf1 /llvm/lib/Target/PowerPC/PPCISelLowering.cpp
parent: 9ecd3225d134541bdfde18a6648edb8b9e048035 (diff)
download: bcm5719-llvm-25a41ad242000520629a274e83db1ea884d1c1e7.tar.gz
bcm5719-llvm-25a41ad242000520629a274e83db1ea884d1c1e7.zip
1 files changed, 35 insertions, 7 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 075e3c8e94d..a2b45be72c4 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -548,6 +548,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
   }
 
+  if (Subtarget.hasVSX()) {
+    setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
+    setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
+    setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
+    setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
+  }
+
   if (Subtarget.hasAltivec()) {
     // First set operation action for all vector types to expand. Then we
     // will selectively turn on ones that can be effectively codegen'd.
@@ -1294,6 +1301,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((PPCISD::NodeType)Opcode) {
   case PPCISD::FIRST_NUMBER:    break;
   case PPCISD::FSEL:            return "PPCISD::FSEL";
+  case PPCISD::XSMAXCDP:        return "PPCISD::XSMAXCDP";
+  case PPCISD::XSMINCDP:        return "PPCISD::XSMINCDP";
   case PPCISD::FCFID:           return "PPCISD::FCFID";
   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
@@ -7214,17 +7223,15 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
       !Op.getOperand(2).getValueType().isFloatingPoint())
     return Op;
 
+  bool HasNoInfs = DAG.getTarget().Options.NoInfsFPMath;
+  bool HasNoNaNs = DAG.getTarget().Options.NoNaNsFPMath;
   // We might be able to do better than this under some circumstances, but in
   // general, fsel-based lowering of select is a finite-math-only optimization.
   // For more information, see section F.3 of the 2.06 ISA specification.
-  if (!DAG.getTarget().Options.NoInfsFPMath ||
-      !DAG.getTarget().Options.NoNaNsFPMath)
+  // With ISA 3.0, we have xsmaxcdp/xsmincdp which are OK to emit even in the
+  // presence of infinities.
+  if (!Subtarget.hasP9Vector() && (!HasNoInfs || !HasNoNaNs))
     return Op;
-  // TODO: Propagate flags from the select rather than global settings.
-  SDNodeFlags Flags;
-  Flags.setNoInfs(true);
-  Flags.setNoNaNs(true);
-
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
 
   EVT ResVT = Op.getValueType();
@@ -7233,6 +7240,27 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
   SDLoc dl(Op);
 
+  if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
+    switch (CC) {
+    default:
+      // Not a min/max but with finite math, we may still be able to use fsel.
+      if (HasNoInfs && HasNoNaNs)
+        break;
+      return Op;
+    case ISD::SETOGT:
+    case ISD::SETGT:
+      return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
+    case ISD::SETOLT:
+    case ISD::SETLT:
+      return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
+    }
+  }
+
+  // TODO: Propagate flags from the select rather than global settings.
+  SDNodeFlags Flags;
+  Flags.setNoInfs(true);
+  Flags.setNoNaNs(true);
+
   // If the RHS of the comparison is a 0.0, we don't need to do the
   // subtraction at all.
   SDValue Sel1;
author	Nemanja Ivanovic <nemanjai@ca.ibm.com>	2019-10-28 18:33:31 -0500
committer	Nemanja Ivanovic <nemanjai@ca.ibm.com>	2019-10-28 19:13:33 -0500
commit	25a41ad242000520629a274e83db1ea884d1c1e7 (patch)
tree	f3b81de2918112876b709a1d13c34851d14eabf1 /llvm/lib/Target/PowerPC/PPCISelLowering.cpp
parent	9ecd3225d134541bdfde18a6648edb8b9e048035 (diff)
download	bcm5719-llvm-25a41ad242000520629a274e83db1ea884d1c1e7.tar.gz bcm5719-llvm-25a41ad242000520629a274e83db1ea884d1c1e7.zip