summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2016-02-08 19:49:20 +0000
committerTom Stellard <thomas.stellard@amd.com>2016-02-08 19:49:20 +0000
commit309617645d44fa7d12fbcc1563891edcf098edc7 (patch)
tree500e6072d9871441eb43b5d94848dd6d8822ee0a /llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
parent303d3dd1105aa14237816a78a2f5da458070bff5 (diff)
downloadbcm5719-llvm-309617645d44fa7d12fbcc1563891edcf098edc7.tar.gz
bcm5719-llvm-309617645d44fa7d12fbcc1563891edcf098edc7.zip
AMDGPU/SI: Implement a work-around for smrd corrupting vccz bit
Summary: We will hit this once we have enabled uniform branches. The smrd-vccz-bug.ll test will be added with the uniform branch commit. Reviewers: mareko, arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D16725 llvm-svn: 260137
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInsertWaits.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaits.cpp56
1 files changed, 55 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
index 322a3e50607..522233f88c7 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -88,6 +88,9 @@ private:
/// \brief Whether the machine function returns void
bool ReturnsVoid;
+ /// Whether the VCCZ bit is possibly corrupt
+ bool VCCZCorrupt;
+
/// \brief Get increment/decrement amount for this instruction.
Counters getHwCounts(MachineInstr &MI);
@@ -116,6 +119,10 @@ private:
/// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
+ /// Return true if there are LGKM instrucitons that haven't been waited on
+ /// yet.
+ bool hasOutstandingLGKM() const;
+
public:
static char ID;
@@ -123,7 +130,8 @@ public:
MachineFunctionPass(ID),
TII(nullptr),
TRI(nullptr),
- ExpInstrTypesSeen(0) { }
+ ExpInstrTypesSeen(0),
+ VCCZCorrupt(false) { }
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -155,6 +163,13 @@ FunctionPass *llvm::createSIInsertWaitsPass() {
const Counters SIInsertWaits::WaitCounts = { { 15, 7, 15 } };
const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
+static bool readsVCCZ(unsigned Opcode) {
+ return Opcode == AMDGPU::S_CBRANCH_VCCNZ || Opcode == AMDGPU::S_CBRANCH_VCCNZ;
+}
+
+bool SIInsertWaits::hasOutstandingLGKM() const {
+ return WaitedOn.Named.LGKM != LastIssued.Named.LGKM;
+}
Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
uint64_t TSFlags = MI.getDesc().TSFlags;
@@ -475,6 +490,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
TRI =
static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
MRI = &MF.getRegInfo();
WaitedOn = ZeroCounts;
@@ -493,6 +509,44 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
+ if (ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {
+ // There is a hardware bug on CI/SI where SMRD instruction may corrupt
+ // vccz bit, so when we detect that an instruction may read from a
+ // corrupt vccz bit, we need to:
+ // 1. Insert s_waitcnt lgkm(0) to wait for all outstanding SMRD operations to
+ // complete.
+ // 2. Restore the correct value of vccz by writing the current value
+ // of vcc back to vcc.
+
+ if (TII->isSMRD(I->getOpcode())) {
+ VCCZCorrupt = true;
+ } else if (!hasOutstandingLGKM() && I->modifiesRegister(AMDGPU::VCC, TRI)) {
+ // FIXME: We only care about SMRD instructions here, not LDS or GDS.
+ // Whenever we store a value in vcc, the correct value of vccz is
+ // restored.
+ VCCZCorrupt = false;
+ }
+
+ // Check if we need to apply the bug work-around
+ if (readsVCCZ(I->getOpcode()) && VCCZCorrupt) {
+ DEBUG(dbgs() << "Inserting vccz bug work-around before: " << *I << '\n');
+
+ // Wait on everything, not just LGKM. vccz reads usually come from
+ // terminators, and we always wait on everything at the end of the
+ // block, so if we only wait on LGKM here, we might end up with
+ // another s_waitcnt inserted right after this if there are non-LGKM
+ // instructions still outstanding.
+ insertWait(MBB, I, LastIssued);
+
+ // Restore the vccz bit. Any time a value is written to vcc, the vcc
+ // bit is updated, so we can restore the bit by reading the value of
+ // vcc and then writing it back to the register.
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
+ AMDGPU::VCC)
+ .addReg(AMDGPU::VCC);
+ }
+ }
+
// Wait for everything before a barrier.
if (I->getOpcode() == AMDGPU::S_BARRIER)
Changes |= insertWait(MBB, I, LastIssued);
OpenPOWER on IntegriCloud