diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 43 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 10 |
4 files changed, 55 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 6e56b0b0502..2874a55bef3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -423,6 +423,7 @@ int TWO_PI_INV = 0x3e22f983; int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding int FP32_NEG_ONE = 0xbf800000; int FP32_ONE = 0x3f800000; +int FP64_ONE = 0x3ff0000000000000; } def CONST : Constants; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 64975cf1809..9aaa31c29fe 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -281,6 +281,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::UINT_TO_FP); + setTargetDAGCombine(ISD::FCANONICALIZE); // All memory operations. Some folding on the pointer operand is done to help // matching the constant offsets in the addressing modes. @@ -2400,6 +2401,46 @@ SDValue SITargetLowering::performClassCombine(SDNode *N, return SDValue(); } +// Constant fold canonicalize. +SDValue SITargetLowering::performFCanonicalizeCombine( + SDNode *N, + DAGCombinerInfo &DCI) const { + ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)); + if (!CFP) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + const APFloat &C = CFP->getValueAPF(); + + // Flush denormals to 0 if not enabled. + if (C.isDenormal()) { + EVT VT = N->getValueType(0); + if (VT == MVT::f32 && !Subtarget->hasFP32Denormals()) + return DAG.getConstantFP(0.0, SDLoc(N), VT); + + if (VT == MVT::f64 && !Subtarget->hasFP64Denormals()) + return DAG.getConstantFP(0.0, SDLoc(N), VT); + } + + if (C.isNaN()) { + EVT VT = N->getValueType(0); + APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics()); + if (C.isSignaling()) { + // Quiet a signaling NaN. + return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT); + } + + // Make sure it is the canonical NaN bitpattern. + // + // TODO: Can we use -1 as the canonical NaN value since it's an inline + // immediate? + if (C.bitcastToAPInt() != CanonicalQNaN.bitcastToAPInt()) + return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT); + } + + return SDValue(CFP, 0); +} + static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) { switch (Opc) { case ISD::FMAXNUM: @@ -2747,6 +2788,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, return performOrCombine(N, DCI); case AMDGPUISD::FP_CLASS: return performClassCombine(N, DCI); + case ISD::FCANONICALIZE: + return performFCanonicalizeCombine(N, DCI); } return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 96c2503f400..4e7131a0b5e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -55,6 +55,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 3363fcca9ab..4620ec05752 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3515,6 +3515,16 @@ def : Pat < >; } +def : Pat< + (fcanonicalize f32:$src), + (V_MUL_F32_e64 0, CONST.FP32_ONE, 0, $src, 0, 0) +>; + +def : Pat< + (fcanonicalize f64:$src), + (V_MUL_F64 0, CONST.FP64_ONE, 0, $src, 0, 0) +>; + //===----------------------------------------------------------------------===// // Fract Patterns //===----------------------------------------------------------------------===// |