summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/R600/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/R600/SIISelLowering.cpp')
-rw-r--r--llvm/lib/Target/R600/SIISelLowering.cpp49
1 files changed, 20 insertions, 29 deletions
diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp
index 2821a0ce475..74287144cc2 100644
--- a/llvm/lib/Target/R600/SIISelLowering.cpp
+++ b/llvm/lib/Target/R600/SIISelLowering.cpp
@@ -680,8 +680,9 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
case MVT::f32:
// This is as fast on some subtargets. However, we always have full rate f32
// mad available which returns the same result as the separate operations
- // which we should prefer over fma.
- return false;
+ // which we should prefer over fma. We can't use this if we want to support
+ // denormals, so only report this in these cases.
+ return Subtarget->hasFP32Denormals() && Subtarget->hasFastFMAF32();
case MVT::f64:
return true;
default:
@@ -1642,6 +1643,11 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
if (VT != MVT::f32)
break;
+ // Only do this if we are not trying to support denormals. v_mad_f32 does
+ // not support denormals ever.
+ if (Subtarget->hasFP32Denormals())
+ break;
+
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -1653,7 +1659,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
SDValue A = LHS.getOperand(0);
if (A == LHS.getOperand(1)) {
const SDValue Two = DAG.getConstantFP(2.0, MVT::f32);
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, Two, A, RHS);
+ return DAG.getNode(ISD::FMAD, DL, VT, Two, A, RHS);
}
}
@@ -1662,11 +1668,11 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
SDValue A = RHS.getOperand(0);
if (A == RHS.getOperand(1)) {
const SDValue Two = DAG.getConstantFP(2.0, MVT::f32);
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, Two, A, LHS);
+ return DAG.getNode(ISD::FMAD, DL, VT, Two, A, LHS);
}
}
- break;
+ return SDValue();
}
case ISD::FSUB: {
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
@@ -1676,30 +1682,13 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
// Try to get the fneg to fold into the source modifier. This undoes generic
// DAG combines and folds them into the mad.
- if (VT == MVT::f32) {
+ //
+ // Only do this if we are not trying to support denormals. v_mad_f32 does
+ // not support denormals ever.
+ if (VT == MVT::f32 &&
+ !Subtarget->hasFP32Denormals()) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
-
- if (LHS.getOpcode() == ISD::FMUL) {
- // (fsub (fmul a, b), c) -> mad a, b, (fneg c)
-
- SDValue A = LHS.getOperand(0);
- SDValue B = LHS.getOperand(1);
- SDValue C = DAG.getNode(ISD::FNEG, DL, VT, RHS);
-
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, A, B, C);
- }
-
- if (RHS.getOpcode() == ISD::FMUL) {
- // (fsub c, (fmul a, b)) -> mad (fneg a), b, c
-
- SDValue A = DAG.getNode(ISD::FNEG, DL, VT, RHS.getOperand(0));
- SDValue B = RHS.getOperand(1);
- SDValue C = LHS;
-
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, A, B, C);
- }
-
if (LHS.getOpcode() == ISD::FADD) {
// (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
@@ -1708,7 +1697,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
const SDValue Two = DAG.getConstantFP(2.0, MVT::f32);
SDValue NegRHS = DAG.getNode(ISD::FNEG, DL, VT, RHS);
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, Two, A, NegRHS);
+ return DAG.getNode(ISD::FMAD, DL, VT, Two, A, NegRHS);
}
}
@@ -1718,9 +1707,11 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
SDValue A = RHS.getOperand(0);
if (A == RHS.getOperand(1)) {
const SDValue NegTwo = DAG.getConstantFP(-2.0, MVT::f32);
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, NegTwo, A, LHS);
+ return DAG.getNode(ISD::FMAD, DL, VT, NegTwo, A, LHS);
}
}
+
+ return SDValue();
}
break;
OpenPOWER on IntegriCloud