diff options
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/missing-store.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/salu-to-valu.ll | 1 |
3 files changed, 8 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index f250782de58..bf0d6a74336 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -601,6 +601,12 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { insertDPPWaitStates(I); } + // Insert required wait states for SMRD reading an SGPR written by a VALU + // instruction. + if (ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS && + I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) + TII->insertWaitStates(MBB, std::next(I), 4); + // Wait for everything before a barrier. if (I->getOpcode() == AMDGPU::S_BARRIER) Changes |= insertWait(MBB, I, LastIssued); diff --git a/llvm/test/CodeGen/AMDGPU/missing-store.ll b/llvm/test/CodeGen/AMDGPU/missing-store.ll index c919b3b5819..658666304f6 100644 --- a/llvm/test/CodeGen/AMDGPU/missing-store.ll +++ b/llvm/test/CodeGen/AMDGPU/missing-store.ll @@ -10,6 +10,7 @@ ; SI: buffer_store_dword ; SI: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} ; SI: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} +; SI-NEXT: s_nop ; SI: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}} ; SI: buffer_store_dword ; SI: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll index abd6b7a2c21..457695e5a0e 100644 --- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -56,6 +56,7 @@ done: ; preds = %loop ; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x2ee0 ; GCN: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} ; GCN: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} +; SI-NEXT: s_nop ; SI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, [[OFFSET]] ; CI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0xbb8 ; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]] |