summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorJan Vesely <jan.vesely@rutgers.edu>2020-02-04 19:27:19 -0500
committerHans Wennborg <hans@chromium.org>2020-02-10 14:23:15 +0100
commitb73942dbc144c11dc94fd32a7d8025a22e7e1d6b (patch)
tree6cb8a9d4ae0eb1b39319fd14187d9ae4d0cd370d /llvm/lib
parent84cda4cceabdfec4f130bfafe7bbd050aa65b2ec (diff)
downloadbcm5719-llvm-b73942dbc144c11dc94fd32a7d8025a22e7e1d6b.tar.gz
bcm5719-llvm-b73942dbc144c11dc94fd32a7d8025a22e7e1d6b.zip
AMDGPU/EG,CM: Implement fsqrt using recip(rsqrt(x)) instead of x * rsqrt(x)
The old version might be faster on EG (RECIP_IEEE is Trans only), but it'd need extra corner case checks. This gives correct corner case behaviour and saves a register. Fixes OCL CTS sqrt test (1-thread, scalar) on Turks. Reviewer: arsenm Differential Revision: https://reviews.llvm.org/D74017 (cherry picked from commit e6686adf8a743564f0c455c34f04752ab08cf642)
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/CaymanInstructions.td4
-rw-r--r--llvm/lib/Target/AMDGPU/EvergreenInstructions.td3
-rw-r--r--llvm/lib/Target/AMDGPU/R600Instructions.td7
3 files changed, 10 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/llvm/lib/Target/AMDGPU/CaymanInstructions.td
index 1a526675164..e2978624811 100644
--- a/llvm/lib/Target/AMDGPU/CaymanInstructions.td
+++ b/llvm/lib/Target/AMDGPU/CaymanInstructions.td
@@ -50,6 +50,8 @@ def COS_cm : COS_Common<0x8E>;
def : RsqPat<RECIPSQRT_IEEE_cm, f32>;
+def : SqrtPat<RECIPSQRT_IEEE_cm, RECIP_IEEE_cm>;
+
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
@@ -70,8 +72,6 @@ def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
-def : R600Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
-
class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :
CF_MEM_RAT_CACHELESS <0x14, 0, mask,
(ins rc:$rw_gpr, R600_TReg32_X:$index_gpr),
diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index 792e26d21f9..88e554ae0bc 100644
--- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -118,11 +118,12 @@ def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
def : RsqPat<RECIPSQRT_IEEE_eg, f32>;
+def : SqrtPat<RECIPSQRT_IEEE_eg, RECIP_IEEE_eg>;
+
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
-def : EGPat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
} // End SubtargetPredicate = isEG
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td
index cbdf0de44f8..869c183e224 100644
--- a/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -1233,6 +1233,11 @@ def : R600Pat<
def : RcpPat<recip_ieee, f32>;
}
+class SqrtPat<Instruction RsqInst, Instruction RecipInst> : R600Pat <
+ (fsqrt f32:$src),
+ (RecipInst (RsqInst $src))
+>;
+
//===----------------------------------------------------------------------===//
// R600 / R700 Instructions
//===----------------------------------------------------------------------===//
@@ -1272,8 +1277,8 @@ let Predicates = [isR600] in {
defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
- def : R600Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
def : RsqPat<RECIPSQRT_IEEE_r600, f32>;
+ def : SqrtPat<RECIPSQRT_IEEE_r600, RECIP_IEEE_r600>;
def R600_ExportSwz : ExportSwzInst {
let Word1{20-17} = 0; // BURST_COUNT
OpenPOWER on IntegriCloud