From ab1d3a9d505ab01658d0a1e7adf06fc7415fc64c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 12 Apr 2016 18:40:43 +0000 Subject: AMDGPU/SI: Insert wait states required after v_readfirstlane on SI Summary: We will be able to handle this case much better once the hazard recognizer is finished, but this conservative implementation fixes a hang with the piglit test: spec/arb_arrays_of_arrays/execution/sampler/fs-nested-struct-arrays-nonconst-nested-arra Reviewers: arsenm, nhaehnle Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18988 llvm-svn: 266105 --- llvm/test/CodeGen/AMDGPU/missing-store.ll | 1 + llvm/test/CodeGen/AMDGPU/salu-to-valu.ll | 1 + 2 files changed, 2 insertions(+) (limited to 'llvm/test/CodeGen') diff --git a/llvm/test/CodeGen/AMDGPU/missing-store.ll b/llvm/test/CodeGen/AMDGPU/missing-store.ll index c919b3b5819..658666304f6 100644 --- a/llvm/test/CodeGen/AMDGPU/missing-store.ll +++ b/llvm/test/CodeGen/AMDGPU/missing-store.ll @@ -10,6 +10,7 @@ ; SI: buffer_store_dword ; SI: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} ; SI: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} +; SI-NEXT: s_nop ; SI: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}} ; SI: buffer_store_dword ; SI: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll index abd6b7a2c21..457695e5a0e 100644 --- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -56,6 +56,7 @@ done: ; preds = %loop ; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x2ee0 ; GCN: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} ; GCN: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} +; SI-NEXT: s_nop ; SI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, [[OFFSET]] ; CI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0xbb8 ; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]] -- cgit v1.2.3