diff options
author | Tim Renouf <tpr.llvm@botech.co.uk> | 2018-08-02 22:53:57 +0000 |
---|---|---|
committer | Tim Renouf <tpr.llvm@botech.co.uk> | 2018-08-02 22:53:57 +0000 |
commit | f1c7b92a6a714da858f678f2ec7edd17f177c552 (patch) | |
tree | 1d40c036151e7301e84203de145f618dc2a4aa43 /llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll | |
parent | 666de23fbf99430d4ab24658efbf4228e9215a32 (diff) | |
download | bcm5719-llvm-f1c7b92a6a714da858f678f2ec7edd17f177c552.tar.gz bcm5719-llvm-f1c7b92a6a714da858f678f2ec7edd17f177c552.zip |
[AMDGPU] Avoid using divergent value in mubuf addr64 descriptor
Summary:
This fixes a problem where a load from global+idx generated incorrect
code on <=gfx7 when the index is divergent.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D47383
Change-Id: Ib4d177d6254b1dd3f8ec0203fdddec94bd8bc5ed
llvm-svn: 338779
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll | 40 |
1 files changed, 18 insertions, 22 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll index 6a9191e7dcb..5df1d55b904 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll @@ -59,10 +59,9 @@ define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 { ; GCN-LABEL: {{^}}func_implicitarg_ptr: ; GCN: s_waitcnt -; MESA: s_mov_b64 s[8:9], s[6:7] -; MESA: s_mov_b32 s11, 0xf000 -; MESA: s_mov_b32 s10, -1 -; MESA: buffer_load_dword v0, off, s[8:11], 0 +; MESA: v_mov_b32_e32 v0, s6 +; MESA: v_mov_b32_e32 v1, s7 +; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 ; HSA: v_mov_b32_e32 v0, s6 ; HSA: v_mov_b32_e32 v1, s7 ; HSA: flat_load_dword v0, v[0:1] @@ -77,10 +76,9 @@ define void @func_implicitarg_ptr() #0 { ; GCN-LABEL: {{^}}opencl_func_implicitarg_ptr: ; GCN: s_waitcnt -; MESA: s_mov_b64 s[8:9], s[6:7] -; MESA: s_mov_b32 s11, 0xf000 -; MESA: s_mov_b32 s10, -1 -; MESA: buffer_load_dword v0, off, s[8:11], 0 +; MESA: v_mov_b32_e32 v0, s6 +; MESA: v_mov_b32_e32 v1, s7 +; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 ; HSA: v_mov_b32_e32 v0, s6 ; HSA: v_mov_b32_e32 v1, s7 ; HSA: flat_load_dword v0, v[0:1] @@ -164,16 +162,15 @@ define void @opencl_func_call_implicitarg_ptr_func() #0 { ; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr: ; GCN: s_waitcnt -; MESA: s_mov_b64 s[12:13], s[6:7] -; MESA: s_mov_b32 s15, 0xf000 -; MESA: s_mov_b32 s14, -1 -; MESA: buffer_load_dword v0, off, s[12:15], 0 +; MESA: v_mov_b32_e32 v0, s6 +; MESA: v_mov_b32_e32 v1, s7 +; MESA: v_mov_b32_e32 v2, s8 +; MESA: v_mov_b32_e32 v3, s9 +; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 ; HSA: v_mov_b32_e32 v0, s6 ; HSA: v_mov_b32_e32 v1, s7 ; HSA: flat_load_dword v0, v[0:1] -; MESA: s_mov_b32 s10, s14 -; MESA: s_mov_b32 s11, s15 -; MESA: buffer_load_dword v0, off, s[8:11], 0 +; MESA: buffer_load_dword v0, v[2:3], s[8:11], 0 addr64 ; HSA: v_mov_b32_e32 v0, s8 ; HSA: v_mov_b32_e32 v1, s9 ; HSA: flat_load_dword v0, v[0:1] @@ -191,16 +188,15 @@ define void @func_kernarg_implicitarg_ptr() #0 { ; GCN-LABEL: {{^}}opencl_func_kernarg_implicitarg_ptr: ; GCN: s_waitcnt -; MESA: s_mov_b64 s[12:13], s[6:7] -; MESA: s_mov_b32 s15, 0xf000 -; MESA: s_mov_b32 s14, -1 -; MESA: buffer_load_dword v0, off, s[12:15], 0 +; MESA: v_mov_b32_e32 v0, s6 +; MESA: v_mov_b32_e32 v1, s7 +; MESA: v_mov_b32_e32 v2, s8 +; MESA: v_mov_b32_e32 v3, s9 +; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 ; HSA: v_mov_b32_e32 v0, s6 ; HSA: v_mov_b32_e32 v1, s7 ; HSA: flat_load_dword v0, v[0:1] -; MESA: s_mov_b32 s10, s14 -; MESA: s_mov_b32 s11, s15 -; MESA: buffer_load_dword v0, off, s[8:11], 0 +; MESA: buffer_load_dword v0, v[2:3], s[8:11], 0 addr64 ; HSA: v_mov_b32_e32 v0, s8 ; HSA: v_mov_b32_e32 v1, s9 ; HSA: flat_load_dword v0, v[0:1] |