summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-08-30 03:26:18 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-08-30 03:26:18 +0000
commit3cb61634ff3795ef65b1f618d3809053b6a0a515 (patch)
tree34e9788cbd49a6e0293926931d10d8ab9afb8b8f
parent06cab79e5000c2859f0deff758d7526b7f4aff68 (diff)
downloadbcm5719-llvm-3cb61634ff3795ef65b1f618d3809053b6a0a515.tar.gz
bcm5719-llvm-3cb61634ff3795ef65b1f618d3809053b6a0a515.zip
AMDGPU: Don't look for DS merge candidates with one use address
The merge is only possible if the base address register is the same for the two instructions. If there is only the one use, there's no point in doing an expensive forward scan checking for memory interference looking for a merge candidate. This gives a signficant improvement in one extreme testcase. The code to do the scan is still algorithmically terrible, so this is still the slowest pass in that example. llvm-svn: 312096
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp13
1 files changed, 10 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index bb803b29f6d..1b2e5e6d0f7 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -251,6 +251,16 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
bool SILoadStoreOptimizer::findMatchingDSInst(CombineInfo &CI) {
MachineBasicBlock::iterator E = CI.I->getParent()->end();
MachineBasicBlock::iterator MBBI = CI.I;
+
+ int AddrIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
+ AMDGPU::OpName::addr);
+ const MachineOperand &AddrReg0 = CI.I->getOperand(AddrIdx);
+
+ // We only ever merge operations with the same base address register, so don't
+ // bother scanning forward if there are no other uses.
+ if (MRI->hasOneNonDBGUse(AddrReg0.getReg()))
+ return false;
+
++MBBI;
SmallVector<const MachineOperand *, 8> DefsToMove;
@@ -300,9 +310,6 @@ bool SILoadStoreOptimizer::findMatchingDSInst(CombineInfo &CI) {
if (addToListsIfDependent(*MBBI, DefsToMove, CI.InstsToMove))
continue;
- int AddrIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
- AMDGPU::OpName::addr);
- const MachineOperand &AddrReg0 = CI.I->getOperand(AddrIdx);
const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);
// Check same base pointer. Be careful of subregisters, which can occur with
OpenPOWER on IntegriCloud