diff options
| author | Nemanja Ivanovic <nemanjai@ca.ibm.com> | 2019-10-28 18:33:31 -0500 |
|---|---|---|
| committer | Nemanja Ivanovic <nemanjai@ca.ibm.com> | 2019-10-28 19:13:33 -0500 |
| commit | 25a41ad242000520629a274e83db1ea884d1c1e7 (patch) | |
| tree | f3b81de2918112876b709a1d13c34851d14eabf1 /llvm/lib/Target/PowerPC/PPCISelLowering.cpp | |
| parent | 9ecd3225d134541bdfde18a6648edb8b9e048035 (diff) | |
| download | bcm5719-llvm-25a41ad242000520629a274e83db1ea884d1c1e7.tar.gz bcm5719-llvm-25a41ad242000520629a274e83db1ea884d1c1e7.zip | |
[PowerPC] Emit scalar fp min/max instructions
VSX provides floating point minimum and maximum instructions that conform
to IEEE semantics. This legalizes the respective nodes and emits VSX code
for them. Furthermore, on Power9 cores we have xsmaxcdp and xsmincdp
instructions that conform to language semantics for the conditional operator
even in the presence of NaNs.
Differential revision: https://reviews.llvm.org/D62993
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 42 |
1 files changed, 35 insertions, 7 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 075e3c8e94d..a2b45be72c4 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -548,6 +548,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } + if (Subtarget.hasVSX()) { + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); + } + if (Subtarget.hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. @@ -1294,6 +1301,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; case PPCISD::FSEL: return "PPCISD::FSEL"; + case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP"; + case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP"; case PPCISD::FCFID: return "PPCISD::FCFID"; case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; @@ -7214,17 +7223,15 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { !Op.getOperand(2).getValueType().isFloatingPoint()) return Op; + bool HasNoInfs = DAG.getTarget().Options.NoInfsFPMath; + bool HasNoNaNs = DAG.getTarget().Options.NoNaNsFPMath; // We might be able to do better than this under some circumstances, but in // general, fsel-based lowering of select is a finite-math-only optimization. // For more information, see section F.3 of the 2.06 ISA specification. - if (!DAG.getTarget().Options.NoInfsFPMath || - !DAG.getTarget().Options.NoNaNsFPMath) + // With ISA 3.0, we have xsmaxcdp/xsmincdp which are OK to emit even in the + // presence of infinities. + if (!Subtarget.hasP9Vector() && (!HasNoInfs || !HasNoNaNs)) return Op; - // TODO: Propagate flags from the select rather than global settings. - SDNodeFlags Flags; - Flags.setNoInfs(true); - Flags.setNoNaNs(true); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); EVT ResVT = Op.getValueType(); @@ -7233,6 +7240,27 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); SDLoc dl(Op); + if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) { + switch (CC) { + default: + // Not a min/max but with finite math, we may still be able to use fsel. + if (HasNoInfs && HasNoNaNs) + break; + return Op; + case ISD::SETOGT: + case ISD::SETGT: + return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS); + case ISD::SETOLT: + case ISD::SETLT: + return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS); + } + } + + // TODO: Propagate flags from the select rather than global settings. + SDNodeFlags Flags; + Flags.setNoInfs(true); + Flags.setNoNaNs(true); + // If the RHS of the comparison is a 0.0, we don't need to do the // subtraction at all. SDValue Sel1; |

