From f1c7b92a6a714da858f678f2ec7edd17f177c552 Mon Sep 17 00:00:00 2001 From: Tim Renouf Date: Thu, 2 Aug 2018 22:53:57 +0000 Subject: [AMDGPU] Avoid using divergent value in mubuf addr64 descriptor Summary: This fixes a problem where a load from global+idx generated incorrect code on <=gfx7 when the index is divergent. Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D47383 Change-Id: Ib4d177d6254b1dd3f8ec0203fdddec94bd8bc5ed llvm-svn: 338779 --- .../CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll | 40 ++++++++++------------ 1 file changed, 18 insertions(+), 22 deletions(-) (limited to 'llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll') diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll index 6a9191e7dcb..5df1d55b904 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll @@ -59,10 +59,9 @@ define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 { ; GCN-LABEL: {{^}}func_implicitarg_ptr: ; GCN: s_waitcnt -; MESA: s_mov_b64 s[8:9], s[6:7] -; MESA: s_mov_b32 s11, 0xf000 -; MESA: s_mov_b32 s10, -1 -; MESA: buffer_load_dword v0, off, s[8:11], 0 +; MESA: v_mov_b32_e32 v0, s6 +; MESA: v_mov_b32_e32 v1, s7 +; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 ; HSA: v_mov_b32_e32 v0, s6 ; HSA: v_mov_b32_e32 v1, s7 ; HSA: flat_load_dword v0, v[0:1] @@ -77,10 +76,9 @@ define void @func_implicitarg_ptr() #0 { ; GCN-LABEL: {{^}}opencl_func_implicitarg_ptr: ; GCN: s_waitcnt -; MESA: s_mov_b64 s[8:9], s[6:7] -; MESA: s_mov_b32 s11, 0xf000 -; MESA: s_mov_b32 s10, -1 -; MESA: buffer_load_dword v0, off, s[8:11], 0 +; MESA: v_mov_b32_e32 v0, s6 +; MESA: v_mov_b32_e32 v1, s7 +; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 ; HSA: v_mov_b32_e32 v0, s6 ; HSA: v_mov_b32_e32 v1, s7 ; HSA: flat_load_dword v0, v[0:1] @@ -164,16 +162,15 @@ define void @opencl_func_call_implicitarg_ptr_func() #0 { ; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr: ; GCN: s_waitcnt -; MESA: s_mov_b64 s[12:13], s[6:7] -; MESA: s_mov_b32 s15, 0xf000 -; MESA: s_mov_b32 s14, -1 -; MESA: buffer_load_dword v0, off, s[12:15], 0 +; MESA: v_mov_b32_e32 v0, s6 +; MESA: v_mov_b32_e32 v1, s7 +; MESA: v_mov_b32_e32 v2, s8 +; MESA: v_mov_b32_e32 v3, s9 +; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 ; HSA: v_mov_b32_e32 v0, s6 ; HSA: v_mov_b32_e32 v1, s7 ; HSA: flat_load_dword v0, v[0:1] -; MESA: s_mov_b32 s10, s14 -; MESA: s_mov_b32 s11, s15 -; MESA: buffer_load_dword v0, off, s[8:11], 0 +; MESA: buffer_load_dword v0, v[2:3], s[8:11], 0 addr64 ; HSA: v_mov_b32_e32 v0, s8 ; HSA: v_mov_b32_e32 v1, s9 ; HSA: flat_load_dword v0, v[0:1] @@ -191,16 +188,15 @@ define void @func_kernarg_implicitarg_ptr() #0 { ; GCN-LABEL: {{^}}opencl_func_kernarg_implicitarg_ptr: ; GCN: s_waitcnt -; MESA: s_mov_b64 s[12:13], s[6:7] -; MESA: s_mov_b32 s15, 0xf000 -; MESA: s_mov_b32 s14, -1 -; MESA: buffer_load_dword v0, off, s[12:15], 0 +; MESA: v_mov_b32_e32 v0, s6 +; MESA: v_mov_b32_e32 v1, s7 +; MESA: v_mov_b32_e32 v2, s8 +; MESA: v_mov_b32_e32 v3, s9 +; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 ; HSA: v_mov_b32_e32 v0, s6 ; HSA: v_mov_b32_e32 v1, s7 ; HSA: flat_load_dword v0, v[0:1] -; MESA: s_mov_b32 s10, s14 -; MESA: s_mov_b32 s11, s15 -; MESA: buffer_load_dword v0, off, s[8:11], 0 +; MESA: buffer_load_dword v0, v[2:3], s[8:11], 0 addr64 ; HSA: v_mov_b32_e32 v0, s8 ; HSA: v_mov_b32_e32 v1, s9 ; HSA: flat_load_dword v0, v[0:1] -- cgit v1.2.3