AMDGPU: Fix emitting multiple stack loads for stack passed workitems

The same stack is loaded for each workitem ID, and each use. Nothing prevents you from creating multiple fixed stack objects with the same offsets, so this was creating a load for each unique frame index, despite them being the same offset. Re-use the same frame index so the loads are CSEable. llvm-svn: 371148
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2019-09-05 23:40:14 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2019-09-05 23:40:14 +0000
commit: 59ff77ee38c6902ffa4b4bc71e80ab01032babec (patch)
tree: 0fb3b583d9e5cfa82d24ecde9bda472db42e787c /llvm/test/CodeGen/AMDGPU
parent: 9dd453ce8d6b0d0970ed11a93d5180f74f620078 (diff)
download: bcm5719-llvm-59ff77ee38c6902ffa4b4bc71e80ab01032babec.tar.gz
bcm5719-llvm-59ff77ee38c6902ffa4b4bc71e80ab01032babec.zip
1 files changed, 17 insertions, 11 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
index 2df3ad9cdef..d5211ceede3 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
@@ -543,19 +543,25 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
   ret void
 }
 
+; Only one stack load should be emitted for all 3 values.
 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
-; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}}
-; GCN: v_and_b32_e32 v32, 0x3ff, v32
-; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
-; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}}
-; GCN: v_bfe_u32 v32, v32, 10, 10
-; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
+; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GCN-NOT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32{{$}}
 ; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}}
-; GCN: v_bfe_u32 v32, v32, 20, 10
-; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
-
-; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GCN-NOT: buffer_load_dword
+
+; GCN: v_and_b32_e32 [[AND_X:v[0-9]+]], 0x3ff, v32
+; GCN-NOT: buffer_load_dword
+; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[AND_X]]
+; GCN-NOT: buffer_load_dword
+; GCN: v_bfe_u32 [[BFE_Y:v[0-9]+]], v32, 10, 10
+; GCN-NEXT: v_bfe_u32 [[BFE_Z:v[0-9]+]], v32, 20, 10
+; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Y]]
+; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Z]]
+
+; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
 define void @too_many_args_use_workitem_id_xyz(
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2019-09-05 23:40:14 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2019-09-05 23:40:14 +0000
commit	59ff77ee38c6902ffa4b4bc71e80ab01032babec (patch)
tree	0fb3b583d9e5cfa82d24ecde9bda472db42e787c /llvm/test/CodeGen/AMDGPU
parent	9dd453ce8d6b0d0970ed11a93d5180f74f620078 (diff)
download	bcm5719-llvm-59ff77ee38c6902ffa4b4bc71e80ab01032babec.tar.gz bcm5719-llvm-59ff77ee38c6902ffa4b4bc71e80ab01032babec.zip