summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-02-21 23:35:48 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-02-21 23:35:48 +0000
commit2fdf2a1a189ce2d0462eb1902a88f5210b963063 (patch)
treecf28f9b484ce67fa9098bb72532749abe3f54aa0 /llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
parente6e206d4b48146b194b04f43a3dec4d32d568019 (diff)
downloadbcm5719-llvm-2fdf2a1a189ce2d0462eb1902a88f5210b963063.tar.gz
bcm5719-llvm-2fdf2a1a189ce2d0462eb1902a88f5210b963063.zip
AMDGPU: Redefine clamp node as clamp 0.0-1.0
Change implementation to use max instead of add. min/max/med3 do not flush denormals regardless of the mode, so it is OK to use it whether or not they are enabled. Also allow using clamp with f16, and use knowledge of dx10_clamp. llvm-svn: 295788
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp59
1 files changed, 45 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 12c17c3e9eb..9e3ac8b871e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1012,22 +1012,29 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
EVT VT = Op.getValueType();
switch (IntrinsicID) {
- default: return Op;
- case AMDGPUIntrinsic::AMDGPU_clamp: // Legacy name.
- return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ default: return Op;
+ case AMDGPUIntrinsic::AMDGPU_clamp: {
+ // Deprecated in favor of emitting min/max combo or fmed3.
+ ConstantFPSDNode *CSrc1 = dyn_cast<ConstantFPSDNode>(Op.getOperand(2));
+ ConstantFPSDNode *CSrc2 = dyn_cast<ConstantFPSDNode>(Op.getOperand(3));
+ if (CSrc1 && CSrc2 && CSrc1->isZero() && CSrc2->isExactlyValue(1.0))
+ return DAG.getNode(AMDGPUISD::CLAMP, DL, VT, Op.getOperand(1));
- case AMDGPUIntrinsic::AMDGPU_bfe_i32:
- return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
- Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3));
+ SDValue Max = DAG.getNode(ISD::FMAXNUM, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ return DAG.getNode(ISD::FMINNUM, DL, VT, Max, Op.getOperand(3));
+ }
+ case AMDGPUIntrinsic::AMDGPU_bfe_i32:
+ return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
+ Op.getOperand(1),
+ Op.getOperand(2),
+ Op.getOperand(3));
- case AMDGPUIntrinsic::AMDGPU_bfe_u32:
- return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
- Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3));
+ case AMDGPUIntrinsic::AMDGPU_bfe_u32:
+ return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
+ Op.getOperand(1),
+ Op.getOperand(2),
+ Op.getOperand(3));
}
}
@@ -2445,6 +2452,28 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
SN->getBasePtr(), SN->getMemOperand());
}
+SDValue AMDGPUTargetLowering::performClampCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ ConstantFPSDNode *CSrc = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
+ if (!CSrc)
+ return SDValue();
+
+ const APFloat &F = CSrc->getValueAPF();
+ APFloat Zero = APFloat::getZero(F.getSemantics());
+ APFloat::cmpResult Cmp0 = F.compare(Zero);
+ if (Cmp0 == APFloat::cmpLessThan ||
+ (Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) {
+ return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0));
+ }
+
+ APFloat One(F.getSemantics(), "1.0");
+ APFloat::cmpResult Cmp1 = F.compare(One);
+ if (Cmp1 == APFloat::cmpGreaterThan)
+ return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0));
+
+ return SDValue(CSrc, 0);
+}
+
/// Split the 64-bit value \p LHS into two 32-bit components, and perform the
/// binary operation \p Opc to it with the corresponding constant operands.
SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
@@ -3323,6 +3352,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return performLoadCombine(N, DCI);
case ISD::STORE:
return performStoreCombine(N, DCI);
+ case AMDGPUISD::CLAMP:
+ return performClampCombine(N, DCI);
}
return SDValue();
}
OpenPOWER on IntegriCloud