summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-03-29 19:14:54 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-03-29 19:14:54 +0000
commit055e4dce45c3f2194c0610db1cee6dedfa6040ab (patch)
tree5ab91d97a6f8a6965f38f403c5db59f446ed81bb /llvm/lib/Target/AMDGPU/SIISelLowering.cpp
parentd395bc1cc23e1c77ede517611c29c1037a4d8a4b (diff)
downloadbcm5719-llvm-055e4dce45c3f2194c0610db1cee6dedfa6040ab.tar.gz
bcm5719-llvm-055e4dce45c3f2194c0610db1cee6dedfa6040ab.zip
AMDGPU: Remove dx10-clamp from subtarget features
Since this can be set with s_setreg*, it should not be a subtarget property. Set a default based on the calling convention, and Introduce a new amdgpu-dx10-clamp attribute to override this if desired. Also introduce a new amdgpu-ieee attribute to match. The values need to match to allow inlining. I think it is OK for the caller's dx10-clamp attribute to override the callee, but there doesn't appear to be the infrastructure to do this currently without definining the attribute in the generic Attributes.td. Eventually the calling convention lowering will need to insert a mode switch somewhere for these. llvm-svn: 357302
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp23
1 files changed, 18 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 8437e4bb34e..c3995c2291f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4145,7 +4145,9 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ bool IsIEEEMode = Info->getMode().IEEE;
// FIXME: Assert during eslection that this is only selected for
// ieee_mode. Currently a combine can produce the ieee version for non-ieee
@@ -8300,9 +8302,12 @@ SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
if (Cmp == APFloat::cmpGreaterThan)
return SDValue();
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
// TODO: Check IEEE bit enabled?
EVT VT = Op0.getValueType();
- if (Subtarget->enableDX10Clamp()) {
+ if (Info->getMode().DX10Clamp) {
// If dx10_clamp is enabled, NaNs clamp to 0.0. This is the same as the
// hardware fmed3 behavior converting to a min.
// FIXME: Should this be allowing -0.0?
@@ -8436,9 +8441,12 @@ SDValue SITargetLowering::performFMed3Combine(SDNode *N,
return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Src2);
}
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
// FIXME: dx10_clamp behavior assumed in instcombine. Should we really bother
// handling no dx10-clamp?
- if (Subtarget->enableDX10Clamp()) {
+ if (Info->getMode().DX10Clamp) {
// If NaNs is clamped to 0, we are free to reorder the inputs.
if (isa<ConstantFPSDNode>(Src0) && !isa<ConstantFPSDNode>(Src1))
@@ -9128,11 +9136,13 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
if (!CSrc)
return SDValue();
+ const MachineFunction &MF = DCI.DAG.getMachineFunction();
const APFloat &F = CSrc->getValueAPF();
APFloat Zero = APFloat::getZero(F.getSemantics());
APFloat::cmpResult Cmp0 = F.compare(Zero);
if (Cmp0 == APFloat::cmpLessThan ||
- (Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) {
+ (Cmp0 == APFloat::cmpUnordered &&
+ MF.getInfo<SIMachineFunctionInfo>()->getMode().DX10Clamp)) {
return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0));
}
@@ -9967,7 +9977,10 @@ bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
bool SNaN,
unsigned Depth) const {
if (Op.getOpcode() == AMDGPUISD::CLAMP) {
- if (Subtarget->enableDX10Clamp())
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ if (Info->getMode().DX10Clamp)
return true; // Clamped to 0.
return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
}
OpenPOWER on IntegriCloud