summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Northover <tnorthover@apple.com>2016-02-17 21:16:53 +0000
committerTim Northover <tnorthover@apple.com>2016-02-17 21:16:53 +0000
commit3f2285615a687cfa55fa3f9480295ab60b5a2e41 (patch)
treeb174fb62289a445ac40b6c293fd925bfbf9f5711
parent0cdb055f2329c49b3a114deeef6891e0afa8b21b (diff)
downloadbcm5719-llvm-3f2285615a687cfa55fa3f9480295ab60b5a2e41.tar.gz
bcm5719-llvm-3f2285615a687cfa55fa3f9480295ab60b5a2e41.zip
AArch64: improve redundant copy elimination.
Mostly, this fixes the bug that if the CBZ guaranteed Xn but Wn was used, we didn't sort out the use-def chain properly. I've also made it check more than just the last instruction for a compatible CBZ (so it can cope without fallthroughs). I'd have liked to do that separately, but it's helps writing the test. Finally, I removed some custom loops in favour of MachineInstr helpers and refactored the control flow to flatten it and avoid possibly quadratic iterations in blocks with many copies. NFC for these, just a general tidy-up. llvm-svn: 261154
-rw-r--r--llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp86
-rw-r--r--llvm/test/CodeGen/AArch64/machine-copy-remove.ll27
2 files changed, 69 insertions, 44 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
index 4a1e25254fc..b52c19a026d 100644
--- a/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
@@ -63,6 +63,20 @@ char AArch64RedundantCopyElimination::ID = 0;
INITIALIZE_PASS(AArch64RedundantCopyElimination, "aarch64-copyelim",
"AArch64 redundant copy elimination pass", false, false)
+static bool guaranteesZeroRegInBlock(MachineInstr *MI, MachineBasicBlock *MBB) {
+ unsigned Opc = MI->getOpcode();
+ // Check if the current basic block is the target block to which the
+ // CBZ/CBNZ instruction jumps when its Wt/Xt is zero.
+ if ((Opc == AArch64::CBZW || Opc == AArch64::CBZX) &&
+ MBB == MI->getOperand(1).getMBB())
+ return true;
+ else if ((Opc == AArch64::CBNZW || Opc == AArch64::CBNZX) &&
+ MBB != MI->getOperand(1).getMBB())
+ return true;
+
+ return false;
+}
+
bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
// Check if the current basic block has a single predecessor.
if (MBB->pred_size() != 1)
@@ -73,18 +87,16 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
if (CompBr == PredMBB->end() || PredMBB->succ_size() != 2)
return false;
- unsigned LastOpc = CompBr->getOpcode();
- // Check if the current basic block is the target block to which the cbz/cbnz
- // instruction jumps when its Wt/Xt is zero.
- if (LastOpc == AArch64::CBZW || LastOpc == AArch64::CBZX) {
- if (MBB != CompBr->getOperand(1).getMBB())
- return false;
- } else if (LastOpc == AArch64::CBNZW || LastOpc == AArch64::CBNZX) {
- if (MBB == CompBr->getOperand(1).getMBB())
- return false;
- } else {
+ ++CompBr;
+ do {
+ --CompBr;
+ if (guaranteesZeroRegInBlock(CompBr, MBB))
+ break;
+ } while (CompBr != PredMBB->begin() && CompBr->isTerminator());
+
+ // We've not found a CBZ/CBNZ, time to bail out.
+ if (!guaranteesZeroRegInBlock(CompBr, MBB))
return false;
- }
unsigned TargetReg = CompBr->getOperand(0).getReg();
if (!TargetReg)
@@ -98,6 +110,8 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
TargetRegs.insert(*AI);
bool Changed = false;
+ MachineBasicBlock::iterator LastChange = MBB->begin();
+ unsigned SmallestDef = TargetReg;
// Remove redundant Copy instructions unless TargetReg is modified.
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
MachineInstr *MI = &*I;
@@ -111,48 +125,40 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
if ((SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) &&
!MRI->isReserved(DefReg) &&
(TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) {
-
- CompBr->clearRegisterKills(DefReg, TRI);
- if (MBB->isLiveIn(DefReg))
- // Clear any kills of TargetReg between CompBr and MI.
- for (MachineInstr &MMI :
- make_range(MBB->begin()->getIterator(), MI->getIterator()))
- MMI.clearRegisterKills(DefReg, TRI);
- else
- MBB->addLiveIn(DefReg);
-
DEBUG(dbgs() << "Remove redundant Copy : ");
DEBUG((MI)->print(dbgs()));
MI->eraseFromParent();
Changed = true;
+ LastChange = I;
NumCopiesRemoved++;
+ SmallestDef =
+ TRI->isSubRegister(SmallestDef, DefReg) ? DefReg : SmallestDef;
continue;
}
}
- for (const MachineOperand &MO : MI->operands()) {
- // FIXME: It is possible to use the register mask to check if all
- // registers in TargetRegs are not clobbered. For now, we treat it like
- // a basic block boundary.
- if (MO.isRegMask())
- return Changed;
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
+ if (MI->modifiesRegister(TargetReg, TRI))
+ break;
+ }
- if (!Reg)
- continue;
+ if (!Changed)
+ return false;
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
- "Expect physical register");
+ // Otherwise, we have to fixup the use-def chain, starting with the
+ // CBZ/CBNZ. Conservatively mark as much as we can live.
+ CompBr->clearRegisterKills(SmallestDef, TRI);
- // Stop if the TargetReg is modified.
- if (MO.isDef() && TargetRegs.count(Reg))
- return Changed;
- }
- }
- return Changed;
+ // Clear any kills of TargetReg between CompBr and MI.
+ if (std::any_of(TargetRegs.begin(), TargetRegs.end(),
+ [&](unsigned Reg) { return MBB->isLiveIn(Reg); })) {
+ for (MachineInstr &MMI :
+ make_range(MBB->begin()->getIterator(), LastChange->getIterator()))
+ MMI.clearRegisterKills(SmallestDef, TRI);
+ } else
+ MBB->addLiveIn(TargetReg);
+
+ return true;
}
bool AArch64RedundantCopyElimination::runOnMachineFunction(
diff --git a/llvm/test/CodeGen/AArch64/machine-copy-remove.ll b/llvm/test/CodeGen/AArch64/machine-copy-remove.ll
index 7196d432c9e..6a97ead0de8 100644
--- a/llvm/test/CodeGen/AArch64/machine-copy-remove.ll
+++ b/llvm/test/CodeGen/AArch64/machine-copy-remove.ll
@@ -1,6 +1,6 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -verify-machineinstrs < %s | FileCheck %s
-; CHECK-LABEL: f_XX
+; CHECK-LABEL: f_XX:
; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]]
; CHECK: [[BB]]:
; CHECK-NOT: mov x[[REG]], xzr
@@ -18,7 +18,7 @@ if.end: ; preds = %entry, %if.then
ret i64 %a.0
}
-; CHECK-LABEL: f_WW
+; CHECK-LABEL: f_WW:
; CHECK: cbz w[[REG:[0-9]+]], [[BB:.LBB.*]]
; CHECK: [[BB]]:
; CHECK-NOT: mov w[[REG]], wzr
@@ -36,7 +36,7 @@ if.end: ; preds = %entry, %if.then
ret i32 %a.0
}
-; CHECK-LABEL: f_XW
+; CHECK-LABEL: f_XW:
; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]]
; CHECK: [[BB]]:
; CHECK-NOT: mov w[[REG]], wzr
@@ -54,7 +54,7 @@ if.end: ; preds = %entry, %if.then
ret i32 %a.0
}
-; CHECK-LABEL: f_WX
+; CHECK-LABEL: f_WX:
; CHECK: cbz w[[REG:[0-9]+]], [[BB:.LBB.*]]
; CHECK: [[BB]]:
; CHECK: mov x[[REG]], xzr
@@ -73,3 +73,22 @@ if.end: ; preds = %entry, %if.then
%a.0 = phi i64 [ %0, %if.then ], [ 0, %entry ]
ret i64 %a.0
}
+
+; CHECK-LABEL: test_superreg:
+; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]]
+; CHECK: [[BB]]:
+; CHECK: str x[[REG]], [x1]
+; CHECK-NOT: mov w[[REG]], wzr
+; Because we returned w0 but x0 was marked live-in to the block, we didn't
+; remove the <kill> on the str leading to a verification failure.
+define i32 @test_superreg(i64 %in, i64* %dest) {
+ %tst = icmp eq i64 %in, 0
+ br i1 %tst, label %true, label %false
+
+false:
+ ret i32 42
+
+true:
+ store volatile i64 %in, i64* %dest
+ ret i32 0
+} \ No newline at end of file
OpenPOWER on IntegriCloud