diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-07-09 07:48:11 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-07-09 07:48:11 +0000 |
| commit | dfec5ce0325228a2f748261a22f6a9673a87e313 (patch) | |
| tree | 51e4be658370ea6b674d6286a9c1c12b60f8e307 /llvm/lib | |
| parent | 95b61b0544a024661c86215506ce035b1d966ebc (diff) | |
| download | bcm5719-llvm-dfec5ce0325228a2f748261a22f6a9673a87e313.tar.gz bcm5719-llvm-dfec5ce0325228a2f748261a22f6a9673a87e313.zip | |
AMDGPU: Fix fdiv lowering when f32 denormals supported
Also fix test not actually using function labels.
llvm-svn: 274969
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 8 |
1 files changed, 3 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f224e197ea4..29c40851ea9 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2073,17 +2073,13 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { if (SDValue FastLowered = LowerFastFDIV(Op, DAG)) return FastLowered; - // This uses v_rcp_f32 which does not handle denormals. Let this hit a - // selection error for now rather than do something incorrect. - if (Subtarget->hasFP32Denormals()) - return SDValue(); - SDLoc SL(Op); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); // faster 2.5 ulp fdiv when using -amdgpu-fast-fdiv flag if (EnableAMDGPUFastFDIV) { + // This does not support denormals. SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS); const APFloat K0Val(BitsToFloat(0x6f800000)); @@ -2105,6 +2101,7 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3); + // rcp does not support denormals. SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1); SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0); @@ -2120,6 +2117,7 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, RHS, RHS, LHS); SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, LHS, RHS, LHS); + // Denominator is scaled to not be denormal, so using rcp is ok. SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, DenominatorScaled); SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32, DenominatorScaled); |

