summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-07-19 19:47:30 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-07-19 19:47:30 +0000
commit85f38901266a6e5ec9771a82efdcc16dcd364022 (patch)
treea064f4d2b3a4b2bded0901fcd86cf382b214263c /llvm/lib/Target/AMDGPU
parent4e8c8aa959aab36913416143bb777b40644a4ba6 (diff)
downloadbcm5719-llvm-85f38901266a6e5ec9771a82efdcc16dcd364022.tar.gz
bcm5719-llvm-85f38901266a6e5ec9771a82efdcc16dcd364022.zip
AMDGPU: Force s_waitcnt after GWS instructions
This is apparently required to be the immediately following instruction, so force it into a bundle with a waitcnt. llvm-svn: 366607
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/DSInstructions.td5
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp23
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h1
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp2
4 files changed, 26 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index c52eaaa3fdc..0cc21a6aa16 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -317,13 +317,16 @@ class DS_GWS <string opName, dag ins, string asmOps>
class DS_GWS_0D <string opName>
: DS_GWS<opName,
- (ins offset:$offset, gds:$gds), "$offset gds">;
+ (ins offset:$offset, gds:$gds), "$offset gds"> {
+ let hasSideEffects = 1;
+}
class DS_GWS_1D <string opName>
: DS_GWS<opName,
(ins VGPR_32:$data0, offset:$offset, gds:$gds), "$data0$offset gds"> {
let has_gws_data0 = 1;
+ let hasSideEffects = 1;
}
class DS_VOID <string opName> : DS_Pseudo<opName,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 40b93f4ae3b..e1c38456de0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3069,6 +3069,20 @@ splitBlockForLoop(MachineInstr &MI, MachineBasicBlock &MBB, bool InstInLoop) {
return std::make_pair(LoopBB, RemainderBB);
}
+/// Insert \p MI into a BUNDLE with an S_WAITCNT 0 immediately following it.
+void SITargetLowering::bundleInstWithWaitcnt(MachineInstr &MI) const {
+ MachineBasicBlock *MBB = MI.getParent();
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+ auto I = MI.getIterator();
+ auto E = std::next(I);
+
+ BuildMI(*MBB, E, MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+ .addImm(0);
+
+ MIBundleBuilder Bundler(*MBB, I, E);
+ finalizeBundle(*MBB, Bundler.begin());
+}
+
MachineBasicBlock *
SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI,
MachineBasicBlock *BB) const {
@@ -3108,8 +3122,7 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI,
MRI.setSimpleHint(Data0, Src->getReg());
}
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_WAITCNT))
- .addImm(0);
+ bundleInstWithWaitcnt(MI);
unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
@@ -3828,8 +3841,12 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::DS_GWS_SEMA_P:
case AMDGPU::DS_GWS_SEMA_RELEASE_ALL:
case AMDGPU::DS_GWS_BARRIER:
- if (getSubtarget()->hasGWSAutoReplay())
+ // A s_waitcnt 0 is required to be the instruction immediately following.
+ if (getSubtarget()->hasGWSAutoReplay()) {
+ bundleInstWithWaitcnt(MI);
return BB;
+ }
+
return emitGWSMemViolTestLoop(MI, BB);
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 1f8cf4ffe1a..27c6445d60a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -315,6 +315,7 @@ public:
MachineBasicBlock *splitKillBlock(MachineInstr &MI,
MachineBasicBlock *BB) const;
+ void bundleInstWithWaitcnt(MachineInstr &MI) const;
MachineBasicBlock *emitGWSMemViolTestLoop(MachineInstr &MI,
MachineBasicBlock *BB) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ba8ed6993a5..1fce2dbe774 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1531,7 +1531,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
break;
}
case TargetOpcode::BUNDLE: {
- if (!MI.mayLoad())
+ if (!MI.mayLoad() || MI.hasUnmodeledSideEffects())
return false;
// If it is a load it must be a memory clause
OpenPOWER on IntegriCloud