diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 20 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 14 |
5 files changed, 42 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 774810e2e01..e02ced04f08 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -80,6 +80,7 @@ public: private: SDValue foldFrameIndex(SDValue N) const; + bool isNoNanSrc(SDValue N) const; bool isInlineImmediate(const SDNode *N) const; bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, const R600InstrInfo *TII); @@ -143,6 +144,8 @@ private: bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; + + bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, @@ -188,6 +191,17 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { return SelectionDAGISel::runOnMachineFunction(MF); } +bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { + if (TM.Options.NoNaNsFPMath) + return true; + + // TODO: Move into isKnownNeverNaN + if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(N)) + return BO->Flags.hasNoNaNs(); + + return CurDAG->isKnownNeverNaN(N); +} + bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { const SIInstrInfo *TII = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); @@ -1569,6 +1583,12 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, return true; } +bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + SelectVOP3Mods(In, Src, SrcMods); + return isNoNanSrc(Src); +} + bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const { bool Res = SelectVOP3Mods(In, Src, SrcMods); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 606b6cea2e4..004b10f5d20 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -635,6 +635,8 @@ def smax_oneuse : HasOneUseBinOp<smax>; def smin_oneuse : HasOneUseBinOp<smin>; def umax_oneuse : HasOneUseBinOp<umax>; def umin_oneuse : HasOneUseBinOp<umin>; +def fminnum_oneuse : HasOneUseBinOp<fminnum>; +def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>; } // Properties = [SDNPCommutative, SDNPAssociative] def sub_oneuse : HasOneUseBinOp<sub>; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 89d3a72fae4..7e49fc28703 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2795,6 +2795,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1), Op.getOperand(2), DAG.getCondCode(CCOpcode)); } + case Intrinsic::amdgcn_fmed3: + return DAG.getNode(AMDGPUISD::FMED3, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); case Intrinsic::amdgcn_fmul_legacy: return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT, Op.getOperand(1), Op.getOperand(2)); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 6f4746b7559..cf9b9c5a145 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -603,6 +603,9 @@ def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod" def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">; def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">; +// VOP3Mods, but the input source is known to never be NaN. +def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">; + //===----------------------------------------------------------------------===// // SI assembler operands //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 06516b24f32..2a4b0951ac4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1125,6 +1125,20 @@ def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>; def : IntMed3Pat<V_MED3_I32, smax, smax_oneuse, smin_oneuse>; def : IntMed3Pat<V_MED3_U32, umax, umax_oneuse, umin_oneuse>; +// This matches 16 permutations of +// max(min(x, y), min(max(x, y), z)) +class FPMed3Pat<ValueType vt, + Instruction med3Inst> : Pat< + (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), + (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), + (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), + (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), + (vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))), + (med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +def : FPMed3Pat<f32, V_MED3_F32>; + // Undo sub x, c -> add x, -c canonicalization since c is more likely // an inline immediate than -c. |