diff options
author | Jan Vesely <jan.vesely@rutgers.edu> | 2020-02-04 19:27:19 -0500 |
---|---|---|
committer | Hans Wennborg <hans@chromium.org> | 2020-02-10 14:23:15 +0100 |
commit | b73942dbc144c11dc94fd32a7d8025a22e7e1d6b (patch) | |
tree | 6cb8a9d4ae0eb1b39319fd14187d9ae4d0cd370d /llvm/lib | |
parent | 84cda4cceabdfec4f130bfafe7bbd050aa65b2ec (diff) | |
download | bcm5719-llvm-b73942dbc144c11dc94fd32a7d8025a22e7e1d6b.tar.gz bcm5719-llvm-b73942dbc144c11dc94fd32a7d8025a22e7e1d6b.zip |
AMDGPU/EG,CM: Implement fsqrt using recip(rsqrt(x)) instead of x * rsqrt(x)
The old version might be faster on EG (RECIP_IEEE is Trans only),
but it'd need extra corner case checks.
This gives correct corner case behaviour and saves a register.
Fixes OCL CTS sqrt test (1-thread, scalar) on Turks.
Reviewer: arsenm
Differential Revision: https://reviews.llvm.org/D74017
(cherry picked from commit e6686adf8a743564f0c455c34f04752ab08cf642)
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/CaymanInstructions.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/EvergreenInstructions.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600Instructions.td | 7 |
3 files changed, 10 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/llvm/lib/Target/AMDGPU/CaymanInstructions.td index 1a526675164..e2978624811 100644 --- a/llvm/lib/Target/AMDGPU/CaymanInstructions.td +++ b/llvm/lib/Target/AMDGPU/CaymanInstructions.td @@ -50,6 +50,8 @@ def COS_cm : COS_Common<0x8E>; def : RsqPat<RECIPSQRT_IEEE_cm, f32>; +def : SqrtPat<RECIPSQRT_IEEE_cm, RECIP_IEEE_cm>; + def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; @@ -70,8 +72,6 @@ def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { -def : R600Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; - class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> : CF_MEM_RAT_CACHELESS <0x14, 0, mask, (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr), diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td index 792e26d21f9..88e554ae0bc 100644 --- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -118,11 +118,12 @@ def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; def : RsqPat<RECIPSQRT_IEEE_eg, f32>; +def : SqrtPat<RECIPSQRT_IEEE_eg, RECIP_IEEE_eg>; + def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>; -def : EGPat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; } // End SubtargetPredicate = isEG //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index cbdf0de44f8..869c183e224 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -1233,6 +1233,11 @@ def : R600Pat< def : RcpPat<recip_ieee, f32>; } +class SqrtPat<Instruction RsqInst, Instruction RecipInst> : R600Pat < + (fsqrt f32:$src), + (RecipInst (RsqInst $src)) +>; + //===----------------------------------------------------------------------===// // R600 / R700 Instructions //===----------------------------------------------------------------------===// @@ -1272,8 +1277,8 @@ let Predicates = [isR600] in { defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>; - def : R600Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; def : RsqPat<RECIPSQRT_IEEE_r600, f32>; + def : SqrtPat<RECIPSQRT_IEEE_r600, RECIP_IEEE_r600>; def R600_ExportSwz : ExportSwzInst { let Word1{20-17} = 0; // BURST_COUNT |