summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-08-30 03:26:18 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-08-30 03:26:18 +0000
commit3cb61634ff3795ef65b1f618d3809053b6a0a515 (patch)
tree34e9788cbd49a6e0293926931d10d8ab9afb8b8f /llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
parent06cab79e5000c2859f0deff758d7526b7f4aff68 (diff)
downloadbcm5719-llvm-3cb61634ff3795ef65b1f618d3809053b6a0a515.tar.gz
bcm5719-llvm-3cb61634ff3795ef65b1f618d3809053b6a0a515.zip
AMDGPU: Don't look for DS merge candidates with one use address
The merge is only possible if the base address register is the same for the two instructions. If there is only the one use, there's no point in doing an expensive forward scan checking for memory interference looking for a merge candidate. This gives a signficant improvement in one extreme testcase. The code to do the scan is still algorithmically terrible, so this is still the slowest pass in that example. llvm-svn: 312096
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp13
1 files changed, 10 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index bb803b29f6d..1b2e5e6d0f7 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -251,6 +251,16 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
bool SILoadStoreOptimizer::findMatchingDSInst(CombineInfo &CI) {
MachineBasicBlock::iterator E = CI.I->getParent()->end();
MachineBasicBlock::iterator MBBI = CI.I;
+
+ int AddrIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
+ AMDGPU::OpName::addr);
+ const MachineOperand &AddrReg0 = CI.I->getOperand(AddrIdx);
+
+ // We only ever merge operations with the same base address register, so don't
+ // bother scanning forward if there are no other uses.
+ if (MRI->hasOneNonDBGUse(AddrReg0.getReg()))
+ return false;
+
++MBBI;
SmallVector<const MachineOperand *, 8> DefsToMove;
@@ -300,9 +310,6 @@ bool SILoadStoreOptimizer::findMatchingDSInst(CombineInfo &CI) {
if (addToListsIfDependent(*MBBI, DefsToMove, CI.InstsToMove))
continue;
- int AddrIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
- AMDGPU::OpName::addr);
- const MachineOperand &AddrReg0 = CI.I->getOperand(AddrIdx);
const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);
// Check same base pointer. Be careful of subregisters, which can occur with
OpenPOWER on IntegriCloud