From 52d1b62a287e2c167896039e8f304c9d146d0360 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 8 Mar 2017 01:06:58 +0000 Subject: AMDGPU: Don't wait at end of block with a trivial successor If there is only one successor, and that successor only has one predecessor the wait can obviously be delayed until uses or the end of the next block. This avoids code quality regressions when there are trivial fallthrough blocks inserted for structurization. llvm-svn: 297251 --- llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'llvm/lib/Target/AMDGPU/SIInsertWaits.cpp') diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index 3daf13cb4b5..285bc8a8144 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -524,6 +524,16 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB, } } +/// Return true if \p MBB has one successor immediately following, and is its +/// only predecessor +static bool hasTrivialSuccessor(const MachineBasicBlock &MBB) { + if (MBB.succ_size() != 1) + return false; + + const MachineBasicBlock *Succ = *MBB.succ_begin(); + return (Succ->pred_size() == 1) && MBB.isLayoutSuccessor(Succ); +} + // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States" // around other non-memory instructions. bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { @@ -642,8 +652,10 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { EndPgmBlocks.push_back(&MBB); } - // Wait for everything at the end of the MBB - Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued); + // Wait for everything at the end of the MBB. If there is only one + // successor, we can defer this until the uses there. + if (!hasTrivialSuccessor(MBB)) + Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued); } if (HaveScalarStores) { -- cgit v1.2.3