diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 13 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 2 |
3 files changed, 11 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index f223f57bfa3..e98182562d8 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -694,7 +694,8 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { unsigned Op = MI.getOpcode(); switch (Op) { case AMDGPU::V_MAX_F32_e64: - case AMDGPU::V_MAX_F16_e64: { + case AMDGPU::V_MAX_F16_e64: + case AMDGPU::V_MAX_F64: { if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm()) return nullptr; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ca42c13a909..bf4d3fb9b4f 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4055,7 +4055,8 @@ SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG, } // No med3 for f16, but clamp is possible. - if (VT == MVT::f16) + // TODO: gfx9 has med3 f16 + if (VT == MVT::f16 || VT == MVT::f64) return SDValue(); // This isn't safe with signaling NaNs because in IEEE mode, min/max on a @@ -4073,6 +4074,7 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); unsigned Opc = N->getOpcode(); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); @@ -4080,7 +4082,9 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N, // Only do this if the inner op has one use since this will just increases // register pressure for no benefit. - if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY) { + + if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY && + VT != MVT::f64) { // max(max(a, b), c) -> max3(a, b, c) // min(min(a, b), c) -> min3(a, b, c) if (Op0.getOpcode() == Opc && Op0.hasOneUse()) { @@ -4122,8 +4126,8 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N, if (((Opc == ISD::FMINNUM && Op0.getOpcode() == ISD::FMAXNUM) || (Opc == AMDGPUISD::FMIN_LEGACY && Op0.getOpcode() == AMDGPUISD::FMAX_LEGACY)) && - (N->getValueType(0) == MVT::f32 || - (N->getValueType(0) == MVT::f16 && Subtarget->has16BitInsts())) && + (VT == MVT::f32 || VT == MVT::f64 || + (VT == MVT::f16 && Subtarget->has16BitInsts())) && Op0.hasOneUse()) { if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1)) return Res; @@ -4404,7 +4408,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, case AMDGPUISD::FMIN_LEGACY: case AMDGPUISD::FMAX_LEGACY: { if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG && - N->getValueType(0) != MVT::f64 && getTargetMachine().getOptLevel() > CodeGenOpt::None) return performMinMaxCombine(N, DCI); break; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 5d147b24ec9..1a40bd72ad3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -657,8 +657,8 @@ class ClampPat<Instruction inst, ValueType vt> : Pat < i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, $omod) >; -// TODO: Does f64 support clamp? def : ClampPat<V_MAX_F32_e64, f32>; +def : ClampPat<V_MAX_F64, f64>; def : ClampPat<V_MAX_F16_e64, f16>; /********** ================================ **********/ |