diff options
| author | Wei Ding <wei.ding2@amd.com> | 2017-02-24 23:00:29 +0000 |
|---|---|---|
| committer | Wei Ding <wei.ding2@amd.com> | 2017-02-24 23:00:29 +0000 |
| commit | 4d3d4ca1b37a5f236185dca599bfb32562958dd3 (patch) | |
| tree | 118022d3dce242e9ef03457eff32e4912957f992 | |
| parent | 9ea0817c5ac5fa59bb56dc78eb335f074e33a2c8 (diff) | |
| download | bcm5719-llvm-4d3d4ca1b37a5f236185dca599bfb32562958dd3.tar.gz bcm5719-llvm-4d3d4ca1b37a5f236185dca599bfb32562958dd3.zip | |
AMDGPU : Replace FMAD with FMA when denormals are enabled.
Differential Revision: http://reviews.llvm.org/D29958
llvm-svn: 296186
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/udiv.ll | 20 |
5 files changed, 39 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 9fa690bf8a0..f28afa89bd2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1290,7 +1290,10 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq); // float fr = mad(fqneg, fb, fa); - SDValue fr = DAG.getNode(ISD::FMAD, DL, FltVT, fqneg, fb, fa); + unsigned OpCode = Subtarget->hasFP32Denormals() ? + (unsigned)AMDGPUISD::FMAD_FTZ : + (unsigned)ISD::FMAD; + SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa); // int iq = (int)fq; SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq); @@ -3416,6 +3419,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(DIV_SCALE) NODE_NAME_CASE(DIV_FMAS) NODE_NAME_CASE(DIV_FIXUP) + NODE_NAME_CASE(FMAD_FTZ) NODE_NAME_CASE(TRIG_PREOP) NODE_NAME_CASE(RCP) NODE_NAME_CASE(RSQ) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 0ad9b1ce988..1964fc223cf 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -271,6 +271,9 @@ enum NodeType : unsigned { DIV_SCALE, DIV_FMAS, DIV_FIXUP, + // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is + // treated as an illegal operation. + FMAD_FTZ, TRIG_PREOP, // 1 ULP max error for f64 // RCP, RSQ - For f32, 1 ULP max error, no denormal handling. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 63053f369c4..54be4aec951 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -190,6 +190,8 @@ def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>; // Denominator, src2 = Numerator). def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>; +def AMDGPUfmad_ftz : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>; + // Look Up 2.0 / pi src0 with segment select src1[4:0] def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 77a61b0287f..bd7c9ddf728 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -508,6 +508,16 @@ multiclass FMADPat <ValueType vt, Instruction inst> { defm : FMADPat <f16, V_MAC_F16_e64>; defm : FMADPat <f32, V_MAC_F32_e64>; +class FMADModsPat<Instruction inst, SDPatternOperator mad_opr> : Pat< + (f32 (mad_opr (VOP3Mods f32:$src0, i32:$src0_mod), + (VOP3Mods f32:$src1, i32:$src1_mod), + (VOP3Mods f32:$src2, i32:$src2_mod))), + (inst $src0_mod, $src0, $src1_mod, $src1, + $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +def : FMADModsPat<V_MAD_F32, AMDGPUfmad_ftz>; + multiclass SelectPat <ValueType vt, Instruction inst> { def : Pat < (vt (select i1:$src0, vt:$src1, vt:$src2)), diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll index d081d5ef1ae..ed791bc3a5c 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv.ll @@ -1,5 +1,8 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=SI -check-prefix=FUNC -check-prefix=VI %s + +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}udiv_i32: @@ -180,3 +183,18 @@ define void @test_udiv_3_mulhu(i32 %p) { store volatile i32 %i, i32 addrspace(1)* undef ret void } + +; GCN-LABEL: {{^}}fdiv_test_denormals +; VI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define amdgpu_kernel void @fdiv_test_denormals(i8 addrspace(1)* nocapture readonly %arg) { +bb: + %tmp = load i8, i8 addrspace(1)* null, align 1 + %tmp1 = sext i8 %tmp to i32 + %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 undef + %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1 + %tmp4 = sext i8 %tmp3 to i32 + %tmp5 = sdiv i32 %tmp1, %tmp4 + %tmp6 = trunc i32 %tmp5 to i8 + store i8 %tmp6, i8 addrspace(1)* null, align 1 + ret void +} |

