summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWei Ding <wei.ding2@amd.com>2017-02-24 23:00:29 +0000
committerWei Ding <wei.ding2@amd.com>2017-02-24 23:00:29 +0000
commit4d3d4ca1b37a5f236185dca599bfb32562958dd3 (patch)
tree118022d3dce242e9ef03457eff32e4912957f992
parent9ea0817c5ac5fa59bb56dc78eb335f074e33a2c8 (diff)
downloadbcm5719-llvm-4d3d4ca1b37a5f236185dca599bfb32562958dd3.tar.gz
bcm5719-llvm-4d3d4ca1b37a5f236185dca599bfb32562958dd3.zip
AMDGPU : Replace FMAD with FMA when denormals are enabled.
Differential Revision: http://reviews.llvm.org/D29958 llvm-svn: 296186
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td10
-rw-r--r--llvm/test/CodeGen/AMDGPU/udiv.ll20
5 files changed, 39 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 9fa690bf8a0..f28afa89bd2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1290,7 +1290,10 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
// float fr = mad(fqneg, fb, fa);
- SDValue fr = DAG.getNode(ISD::FMAD, DL, FltVT, fqneg, fb, fa);
+ unsigned OpCode = Subtarget->hasFP32Denormals() ?
+ (unsigned)AMDGPUISD::FMAD_FTZ :
+ (unsigned)ISD::FMAD;
+ SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);
// int iq = (int)fq;
SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
@@ -3416,6 +3419,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(DIV_SCALE)
NODE_NAME_CASE(DIV_FMAS)
NODE_NAME_CASE(DIV_FIXUP)
+ NODE_NAME_CASE(FMAD_FTZ)
NODE_NAME_CASE(TRIG_PREOP)
NODE_NAME_CASE(RCP)
NODE_NAME_CASE(RSQ)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 0ad9b1ce988..1964fc223cf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -271,6 +271,9 @@ enum NodeType : unsigned {
DIV_SCALE,
DIV_FMAS,
DIV_FIXUP,
+ // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is
+ // treated as an illegal operation.
+ FMAD_FTZ,
TRIG_PREOP, // 1 ULP max error for f64
// RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 63053f369c4..54be4aec951 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -190,6 +190,8 @@ def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>;
// Denominator, src2 = Numerator).
def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
+def AMDGPUfmad_ftz : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
+
// Look Up 2.0 / pi src0 with segment select src1[4:0]
def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 77a61b0287f..bd7c9ddf728 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -508,6 +508,16 @@ multiclass FMADPat <ValueType vt, Instruction inst> {
defm : FMADPat <f16, V_MAC_F16_e64>;
defm : FMADPat <f32, V_MAC_F32_e64>;
+class FMADModsPat<Instruction inst, SDPatternOperator mad_opr> : Pat<
+ (f32 (mad_opr (VOP3Mods f32:$src0, i32:$src0_mod),
+ (VOP3Mods f32:$src1, i32:$src1_mod),
+ (VOP3Mods f32:$src2, i32:$src2_mod))),
+ (inst $src0_mod, $src0, $src1_mod, $src1,
+ $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+def : FMADModsPat<V_MAD_F32, AMDGPUfmad_ftz>;
+
multiclass SelectPat <ValueType vt, Instruction inst> {
def : Pat <
(vt (select i1:$src0, vt:$src1, vt:$src2)),
diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll
index d081d5ef1ae..ed791bc3a5c 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv.ll
@@ -1,5 +1,8 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=SI -check-prefix=FUNC -check-prefix=VI %s
+
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}udiv_i32:
@@ -180,3 +183,18 @@ define void @test_udiv_3_mulhu(i32 %p) {
store volatile i32 %i, i32 addrspace(1)* undef
ret void
}
+
+; GCN-LABEL: {{^}}fdiv_test_denormals
+; VI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+define amdgpu_kernel void @fdiv_test_denormals(i8 addrspace(1)* nocapture readonly %arg) {
+bb:
+ %tmp = load i8, i8 addrspace(1)* null, align 1
+ %tmp1 = sext i8 %tmp to i32
+ %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 undef
+ %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1
+ %tmp4 = sext i8 %tmp3 to i32
+ %tmp5 = sdiv i32 %tmp1, %tmp4
+ %tmp6 = trunc i32 %tmp5 to i8
+ store i8 %tmp6, i8 addrspace(1)* null, align 1
+ ret void
+}
OpenPOWER on IntegriCloud