diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-02-21 23:35:48 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-02-21 23:35:48 +0000 |
commit | 2fdf2a1a189ce2d0462eb1902a88f5210b963063 (patch) | |
tree | cf28f9b484ce67fa9098bb72532749abe3f54aa0 /llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | |
parent | e6e206d4b48146b194b04f43a3dec4d32d568019 (diff) | |
download | bcm5719-llvm-2fdf2a1a189ce2d0462eb1902a88f5210b963063.tar.gz bcm5719-llvm-2fdf2a1a189ce2d0462eb1902a88f5210b963063.zip |
AMDGPU: Redefine clamp node as clamp 0.0-1.0
Change implementation to use max instead of add.
min/max/med3 do not flush denormals regardless of the mode,
so it is OK to use it whether or not they are enabled.
Also allow using clamp with f16, and use knowledge
of dx10_clamp.
llvm-svn: 295788
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 59 |
1 files changed, 45 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 12c17c3e9eb..9e3ac8b871e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1012,22 +1012,29 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, EVT VT = Op.getValueType(); switch (IntrinsicID) { - default: return Op; - case AMDGPUIntrinsic::AMDGPU_clamp: // Legacy name. - return DAG.getNode(AMDGPUISD::CLAMP, DL, VT, - Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + default: return Op; + case AMDGPUIntrinsic::AMDGPU_clamp: { + // Deprecated in favor of emitting min/max combo or fmed3. + ConstantFPSDNode *CSrc1 = dyn_cast<ConstantFPSDNode>(Op.getOperand(2)); + ConstantFPSDNode *CSrc2 = dyn_cast<ConstantFPSDNode>(Op.getOperand(3)); + if (CSrc1 && CSrc2 && CSrc1->isZero() && CSrc2->isExactlyValue(1.0)) + return DAG.getNode(AMDGPUISD::CLAMP, DL, VT, Op.getOperand(1)); - case AMDGPUIntrinsic::AMDGPU_bfe_i32: - return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT, - Op.getOperand(1), - Op.getOperand(2), - Op.getOperand(3)); + SDValue Max = DAG.getNode(ISD::FMAXNUM, DL, VT, Op.getOperand(1), + Op.getOperand(2)); + return DAG.getNode(ISD::FMINNUM, DL, VT, Max, Op.getOperand(3)); + } + case AMDGPUIntrinsic::AMDGPU_bfe_i32: + return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT, + Op.getOperand(1), + Op.getOperand(2), + Op.getOperand(3)); - case AMDGPUIntrinsic::AMDGPU_bfe_u32: - return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT, - Op.getOperand(1), - Op.getOperand(2), - Op.getOperand(3)); + case AMDGPUIntrinsic::AMDGPU_bfe_u32: + return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT, + Op.getOperand(1), + Op.getOperand(2), + Op.getOperand(3)); } } @@ -2445,6 +2452,28 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N, SN->getBasePtr(), SN->getMemOperand()); } +SDValue AMDGPUTargetLowering::performClampCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + ConstantFPSDNode *CSrc = dyn_cast<ConstantFPSDNode>(N->getOperand(0)); + if (!CSrc) + return SDValue(); + + const APFloat &F = CSrc->getValueAPF(); + APFloat Zero = APFloat::getZero(F.getSemantics()); + APFloat::cmpResult Cmp0 = F.compare(Zero); + if (Cmp0 == APFloat::cmpLessThan || + (Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) { + return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0)); + } + + APFloat One(F.getSemantics(), "1.0"); + APFloat::cmpResult Cmp1 = F.compare(One); + if (Cmp1 == APFloat::cmpGreaterThan) + return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0)); + + return SDValue(CSrc, 0); +} + /// Split the 64-bit value \p LHS into two 32-bit components, and perform the /// binary operation \p Opc to it with the corresponding constant operands. SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl( @@ -3323,6 +3352,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, return performLoadCombine(N, DCI); case ISD::STORE: return performStoreCombine(N, DCI); + case AMDGPUISD::CLAMP: + return performClampCombine(N, DCI); } return SDValue(); } |