summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
diff options
context:
space:
mode:
authorTim Renouf <tpr.llvm@botech.co.uk>2018-08-02 22:53:57 +0000
committerTim Renouf <tpr.llvm@botech.co.uk>2018-08-02 22:53:57 +0000
commitf1c7b92a6a714da858f678f2ec7edd17f177c552 (patch)
tree1d40c036151e7301e84203de145f618dc2a4aa43 /llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
parent666de23fbf99430d4ab24658efbf4228e9215a32 (diff)
downloadbcm5719-llvm-f1c7b92a6a714da858f678f2ec7edd17f177c552.tar.gz
bcm5719-llvm-f1c7b92a6a714da858f678f2ec7edd17f177c552.zip
[AMDGPU] Avoid using divergent value in mubuf addr64 descriptor
Summary: This fixes a problem where a load from global+idx generated incorrect code on <=gfx7 when the index is divergent. Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D47383 Change-Id: Ib4d177d6254b1dd3f8ec0203fdddec94bd8bc5ed llvm-svn: 338779
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll40
1 files changed, 18 insertions, 22 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
index 6a9191e7dcb..5df1d55b904 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -59,10 +59,9 @@ define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 {
; GCN-LABEL: {{^}}func_implicitarg_ptr:
; GCN: s_waitcnt
-; MESA: s_mov_b64 s[8:9], s[6:7]
-; MESA: s_mov_b32 s11, 0xf000
-; MESA: s_mov_b32 s10, -1
-; MESA: buffer_load_dword v0, off, s[8:11], 0
+; MESA: v_mov_b32_e32 v0, s6
+; MESA: v_mov_b32_e32 v1, s7
+; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
; HSA: v_mov_b32_e32 v0, s6
; HSA: v_mov_b32_e32 v1, s7
; HSA: flat_load_dword v0, v[0:1]
@@ -77,10 +76,9 @@ define void @func_implicitarg_ptr() #0 {
; GCN-LABEL: {{^}}opencl_func_implicitarg_ptr:
; GCN: s_waitcnt
-; MESA: s_mov_b64 s[8:9], s[6:7]
-; MESA: s_mov_b32 s11, 0xf000
-; MESA: s_mov_b32 s10, -1
-; MESA: buffer_load_dword v0, off, s[8:11], 0
+; MESA: v_mov_b32_e32 v0, s6
+; MESA: v_mov_b32_e32 v1, s7
+; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
; HSA: v_mov_b32_e32 v0, s6
; HSA: v_mov_b32_e32 v1, s7
; HSA: flat_load_dword v0, v[0:1]
@@ -164,16 +162,15 @@ define void @opencl_func_call_implicitarg_ptr_func() #0 {
; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr:
; GCN: s_waitcnt
-; MESA: s_mov_b64 s[12:13], s[6:7]
-; MESA: s_mov_b32 s15, 0xf000
-; MESA: s_mov_b32 s14, -1
-; MESA: buffer_load_dword v0, off, s[12:15], 0
+; MESA: v_mov_b32_e32 v0, s6
+; MESA: v_mov_b32_e32 v1, s7
+; MESA: v_mov_b32_e32 v2, s8
+; MESA: v_mov_b32_e32 v3, s9
+; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
; HSA: v_mov_b32_e32 v0, s6
; HSA: v_mov_b32_e32 v1, s7
; HSA: flat_load_dword v0, v[0:1]
-; MESA: s_mov_b32 s10, s14
-; MESA: s_mov_b32 s11, s15
-; MESA: buffer_load_dword v0, off, s[8:11], 0
+; MESA: buffer_load_dword v0, v[2:3], s[8:11], 0 addr64
; HSA: v_mov_b32_e32 v0, s8
; HSA: v_mov_b32_e32 v1, s9
; HSA: flat_load_dword v0, v[0:1]
@@ -191,16 +188,15 @@ define void @func_kernarg_implicitarg_ptr() #0 {
; GCN-LABEL: {{^}}opencl_func_kernarg_implicitarg_ptr:
; GCN: s_waitcnt
-; MESA: s_mov_b64 s[12:13], s[6:7]
-; MESA: s_mov_b32 s15, 0xf000
-; MESA: s_mov_b32 s14, -1
-; MESA: buffer_load_dword v0, off, s[12:15], 0
+; MESA: v_mov_b32_e32 v0, s6
+; MESA: v_mov_b32_e32 v1, s7
+; MESA: v_mov_b32_e32 v2, s8
+; MESA: v_mov_b32_e32 v3, s9
+; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
; HSA: v_mov_b32_e32 v0, s6
; HSA: v_mov_b32_e32 v1, s7
; HSA: flat_load_dword v0, v[0:1]
-; MESA: s_mov_b32 s10, s14
-; MESA: s_mov_b32 s11, s15
-; MESA: buffer_load_dword v0, off, s[8:11], 0
+; MESA: buffer_load_dword v0, v[2:3], s[8:11], 0 addr64
; HSA: v_mov_b32_e32 v0, s8
; HSA: v_mov_b32_e32 v1, s9
; HSA: flat_load_dword v0, v[0:1]
OpenPOWER on IntegriCloud