diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-07-19 19:47:30 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-07-19 19:47:30 +0000 |
| commit | 85f38901266a6e5ec9771a82efdcc16dcd364022 (patch) | |
| tree | a064f4d2b3a4b2bded0901fcd86cf382b214263c /llvm/lib/Target/AMDGPU | |
| parent | 4e8c8aa959aab36913416143bb777b40644a4ba6 (diff) | |
| download | bcm5719-llvm-85f38901266a6e5ec9771a82efdcc16dcd364022.tar.gz bcm5719-llvm-85f38901266a6e5ec9771a82efdcc16dcd364022.zip | |
AMDGPU: Force s_waitcnt after GWS instructions
This is apparently required to be the immediately following
instruction, so force it into a bundle with a waitcnt.
llvm-svn: 366607
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/DSInstructions.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 23 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 |
4 files changed, 26 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index c52eaaa3fdc..0cc21a6aa16 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -317,13 +317,16 @@ class DS_GWS <string opName, dag ins, string asmOps> class DS_GWS_0D <string opName> : DS_GWS<opName, - (ins offset:$offset, gds:$gds), "$offset gds">; + (ins offset:$offset, gds:$gds), "$offset gds"> { + let hasSideEffects = 1; +} class DS_GWS_1D <string opName> : DS_GWS<opName, (ins VGPR_32:$data0, offset:$offset, gds:$gds), "$data0$offset gds"> { let has_gws_data0 = 1; + let hasSideEffects = 1; } class DS_VOID <string opName> : DS_Pseudo<opName, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 40b93f4ae3b..e1c38456de0 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3069,6 +3069,20 @@ splitBlockForLoop(MachineInstr &MI, MachineBasicBlock &MBB, bool InstInLoop) { return std::make_pair(LoopBB, RemainderBB); } +/// Insert \p MI into a BUNDLE with an S_WAITCNT 0 immediately following it. +void SITargetLowering::bundleInstWithWaitcnt(MachineInstr &MI) const { + MachineBasicBlock *MBB = MI.getParent(); + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); + auto I = MI.getIterator(); + auto E = std::next(I); + + BuildMI(*MBB, E, MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT)) + .addImm(0); + + MIBundleBuilder Bundler(*MBB, I, E); + finalizeBundle(*MBB, Bundler.begin()); +} + MachineBasicBlock * SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI, MachineBasicBlock *BB) const { @@ -3108,8 +3122,7 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI, MRI.setSimpleHint(Data0, Src->getReg()); } - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_WAITCNT)) - .addImm(0); + bundleInstWithWaitcnt(MI); unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); @@ -3828,8 +3841,12 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( case AMDGPU::DS_GWS_SEMA_P: case AMDGPU::DS_GWS_SEMA_RELEASE_ALL: case AMDGPU::DS_GWS_BARRIER: - if (getSubtarget()->hasGWSAutoReplay()) + // A s_waitcnt 0 is required to be the instruction immediately following. + if (getSubtarget()->hasGWSAutoReplay()) { + bundleInstWithWaitcnt(MI); return BB; + } + return emitGWSMemViolTestLoop(MI, BB); default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 1f8cf4ffe1a..27c6445d60a 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -315,6 +315,7 @@ public: MachineBasicBlock *splitKillBlock(MachineInstr &MI, MachineBasicBlock *BB) const; + void bundleInstWithWaitcnt(MachineInstr &MI) const; MachineBasicBlock *emitGWSMemViolTestLoop(MachineInstr &MI, MachineBasicBlock *BB) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ba8ed6993a5..1fce2dbe774 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1531,7 +1531,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { break; } case TargetOpcode::BUNDLE: { - if (!MI.mayLoad()) + if (!MI.mayLoad() || MI.hasUnmodeledSideEffects()) return false; // If it is a load it must be a memory clause |

