summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp97
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.h2
-rw-r--r--llvm/test/CodeGen/ARM/intrinsics-overflow.ll15
-rw-r--r--llvm/test/CodeGen/ARM/su-addsub-overflow.ll63
4 files changed, 130 insertions, 47 deletions
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index bd90fe90aab..9981b0586d6 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2534,14 +2534,28 @@ inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
}
}
+/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
+/// the condition code if we modify the instructions such that flags are
+/// set by ADD(a,b,X).
+inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: return ARMCC::AL;
+ case ARMCC::HS: return ARMCC::LO;
+ case ARMCC::LO: return ARMCC::HS;
+ case ARMCC::VS: return ARMCC::VS;
+ case ARMCC::VC: return ARMCC::VC;
+ }
+}
+
/// isRedundantFlagInstr - check whether the first instruction, whose only
/// purpose is to update flags, can be made redundant.
/// CMPrr can be made redundant by SUBrr if the operands are the same.
/// CMPri can be made redundant by SUBri if the operands are the same.
+/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
/// This function can be extended later on.
-inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
- unsigned SrcReg2, int ImmValue,
- MachineInstr *OI) {
+inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
+ unsigned SrcReg, unsigned SrcReg2,
+ int ImmValue, const MachineInstr *OI) {
if ((CmpI->getOpcode() == ARM::CMPrr ||
CmpI->getOpcode() == ARM::t2CMPrr) &&
(OI->getOpcode() == ARM::SUBrr ||
@@ -2559,6 +2573,14 @@ inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
OI->getOperand(1).getReg() == SrcReg &&
OI->getOperand(2).getImm() == ImmValue)
return true;
+
+ if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
+ (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
+ OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
+ OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
+ OI->getOperand(0).getReg() == SrcReg &&
+ OI->getOperand(1).getReg() == SrcReg2)
+ return true;
return false;
}
@@ -2661,17 +2683,18 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
if (I == B) return false;
// There are two possible candidates which can be changed to set CPSR:
- // One is MI, the other is a SUB instruction.
- // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
+ // One is MI, the other is a SUB or ADD instruction.
+ // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
+ // ADDr[ri](r1, r2, X).
// For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
- MachineInstr *Sub = nullptr;
+ MachineInstr *SubAdd = nullptr;
if (SrcReg2 != 0)
// MI is not a candidate for CMPrr.
MI = nullptr;
else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
// Conservatively refuse to convert an instruction which isn't in the same
// BB as the comparison.
- // For CMPri w/ CmpValue != 0, a Sub may still be a candidate.
+ // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
// Thus we cannot return here.
if (CmpInstr.getOpcode() == ARM::CMPri ||
CmpInstr.getOpcode() == ARM::t2CMPri)
@@ -2713,38 +2736,43 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
}
I = CmpInstr;
E = MI;
+ } else {
+ // Allow the loop below to search E (which was initially MI). Since MI and
+ // SubAdd have different tests, even if that instruction could not be MI, it
+ // could still potentially be SubAdd.
+ --E;
}
// Check that CPSR isn't set between the comparison instruction and the one we
- // want to change. At the same time, search for Sub.
+ // want to change. At the same time, search for SubAdd.
const TargetRegisterInfo *TRI = &getRegisterInfo();
--I;
for (; I != E; --I) {
const MachineInstr &Instr = *I;
+ // Check whether CmpInstr can be made redundant by the current instruction.
+ if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
+ SubAdd = &*I;
+ break;
+ }
+
if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
Instr.readsRegister(ARM::CPSR, TRI))
// This instruction modifies or uses CPSR after the one we want to
// change. We can't do this transformation.
return false;
- // Check whether CmpInstr can be made redundant by the current instruction.
- if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
- Sub = &*I;
- break;
- }
-
if (I == B)
// The 'and' is below the comparison instruction.
return false;
}
// Return false if no candidates exist.
- if (!MI && !Sub)
+ if (!MI && !SubAdd)
return false;
// The single candidate is called MI.
- if (!MI) MI = Sub;
+ if (!MI) MI = SubAdd;
// We can't use a predicated instruction - it doesn't always write the flags.
if (isPredicated(*MI))
@@ -2802,25 +2830,31 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
break;
}
- if (Sub) {
- ARMCC::CondCodes NewCC = getSwappedCondition(CC);
- if (NewCC == ARMCC::AL)
- return false;
+ if (SubAdd) {
// If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
// on CMP needs to be updated to be based on SUB.
+ // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
+ // needs to be modified.
// Push the condition code operands to OperandsToUpdate.
// If it is safe to remove CmpInstr, the condition code of these
// operands will be modified.
- if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
- Sub->getOperand(2).getReg() == SrcReg) {
+ unsigned Opc = SubAdd->getOpcode();
+ bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
+ Opc == ARM::SUBri || Opc == ARM::t2SUBri;
+ if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 &&
+ SubAdd->getOperand(2).getReg() == SrcReg)) {
// VSel doesn't support condition code update.
if (IsInstrVSel)
return false;
+ // Ensure we can swap the condition.
+ ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
+ if (NewCC == ARMCC::AL)
+ return false;
OperandsToUpdate.push_back(
std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
}
} else {
- // No Sub, so this is x = <op> y, z; cmp x, 0.
+ // No SubAdd, so this is x = <op> y, z; cmp x, 0.
switch (CC) {
case ARMCC::EQ: // Z
case ARMCC::NE: // Z
@@ -2874,6 +2908,23 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
return true;
}
+bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
+ // Do not sink MI if it might be used to optimize a redundant compare.
+ // We heuristically only look at the instruction immediately following MI to
+ // avoid potentially searching the entire basic block.
+ if (isPredicated(MI))
+ return true;
+ MachineBasicBlock::const_iterator Next = &MI;
+ ++Next;
+ unsigned SrcReg, SrcReg2;
+ int CmpMask, CmpValue;
+ if (Next != MI.getParent()->end() &&
+ analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
+ isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI))
+ return false;
+ return true;
+}
+
bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned Reg,
MachineRegisterInfo *MRI) const {
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index d375f40d6e1..282a6874910 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -215,6 +215,8 @@ public:
bool expandPostRAPseudo(MachineInstr &MI) const override;
+ bool shouldSink(const MachineInstr &MI) const override;
+
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
const MachineInstr &Orig,
diff --git a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll
index af555d2240c..5f78b13c18d 100644
--- a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll
+++ b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll
@@ -33,10 +33,10 @@ define i32 @sadd_overflow(i32 %a, i32 %b) #0 {
; CHECK-LABEL: sadd_overflow:
- ; ARM: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
- ; ARM: mov r[[R1]], #1
- ; ARM: cmp r[[R2]], r[[R0]]
- ; ARM: movvc r[[R1]], #0
+ ; ARM: adds r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
+ ; ARM: mov r[[R0]], #1
+ ; ARM: movvc r[[R0]], #0
+ ; ARM: mov pc, lr
; THUMBV6: mov r[[R2:[0-9]+]], r[[R0:[0-9]+]]
; THUMBV6: adds r[[R3:[0-9]+]], r[[R2]], r[[R1:[0-9]+]]
@@ -47,11 +47,10 @@ define i32 @sadd_overflow(i32 %a, i32 %b) #0 {
; THUMBV6: mov r[[R0]], r[[R1]]
; THUMBV6: .L[[LABEL]]:
- ; THUMBV7: movs r[[R1]], #1
- ; THUMBV7: cmp r[[R2]], r[[R0]]
+ ; THUMBV7: adds r[[R2:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
+ ; THUMBV7: mov.w r[[R0:[0-9]+]], #1
; THUMBV7: it vc
- ; THUMBV7: movvc r[[R1]], #0
- ; THUMBV7: mov r[[R0]], r[[R1]]
+ ; THUMBV7: movvc r[[R0]], #0
}
define i32 @usub_overflow(i32 %a, i32 %b) #0 {
diff --git a/llvm/test/CodeGen/ARM/su-addsub-overflow.ll b/llvm/test/CodeGen/ARM/su-addsub-overflow.ll
index eef53128203..04e59e05b6d 100644
--- a/llvm/test/CodeGen/ARM/su-addsub-overflow.ll
+++ b/llvm/test/CodeGen/ARM/su-addsub-overflow.ll
@@ -2,9 +2,7 @@
define i32 @sadd(i32 %a, i32 %b) local_unnamed_addr #0 {
; CHECK-LABEL: sadd:
-; CHECK: mov r[[R0:[0-9]+]], r0
-; CHECK-NEXT: add r[[R1:[0-9]+]], r[[R0]], r1
-; CHECK-NEXT: cmp r[[R1]], r[[R0]]
+; CHECK: adds r0, r0, r1
; CHECK-NEXT: movvc pc, lr
entry:
%0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
@@ -23,10 +21,8 @@ cont:
define i32 @uadd(i32 %a, i32 %b) local_unnamed_addr #0 {
; CHECK-LABEL: uadd:
-; CHECK: mov r[[R0:[0-9]+]], r0
-; CHECK-NEXT: adds r[[R1:[0-9]+]], r[[R0]], r1
-; CHECK-NEXT: cmp r[[R1]], r[[R0]]
-; CHECK-NEXT: movhs pc, lr
+; CHECK: adds r0, r0, r1
+; CHECK-NEXT: movlo pc, lr
entry:
%0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
%1 = extractvalue { i32, i1 } %0, 1
@@ -44,8 +40,7 @@ cont:
define i32 @ssub(i32 %a, i32 %b) local_unnamed_addr #0 {
; CHECK-LABEL: ssub:
-; CHECK: cmp r0, r1
-; CHECK-NEXT: subvc r0, r0, r1
+; CHECK: subs r0, r0, r1
; CHECK-NEXT: movvc pc, lr
entry:
%0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
@@ -64,9 +59,7 @@ cont:
define i32 @usub(i32 %a, i32 %b) local_unnamed_addr #0 {
; CHECK-LABEL: usub:
-; CHECK: mov r[[R0:[0-9]+]], r0
-; CHECK-NEXT: subs r[[R1:[0-9]+]], r[[R0]], r1
-; CHECK-NEXT: cmp r[[R0]], r1
+; CHECK: subs r0, r0, r1
; CHECK-NEXT: movhs pc, lr
entry:
%0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
@@ -87,11 +80,9 @@ define void @sum(i32* %a, i32* %b, i32 %n) local_unnamed_addr #0 {
; CHECK-LABEL: sum:
; CHECK: ldr [[R0:r[0-9]+]],
; CHECK-NEXT: ldr [[R1:r[0-9]+|lr]],
-; CHECK-NEXT: add [[R2:r[0-9]+]], [[R1]], [[R0]]
-; CHECK-NEXT: cmp [[R2]], [[R1]]
+; CHECK-NEXT: adds [[R2:r[0-9]+]], [[R1]], [[R0]]
; CHECK-NEXT: strvc [[R2]],
-; CHECK-NEXT: addvc
-; CHECK-NEXT: cmpvc
+; CHECK-NEXT: addsvc
; CHECK-NEXT: bvs
entry:
%cmp7 = icmp eq i32 %n, 0
@@ -128,6 +119,46 @@ cont2:
}
+define void @extern_loop(i32 %n) local_unnamed_addr #0 {
+; Do not replace the compare around the clobbering call.
+; CHECK: add {{r[0-9]+}}, {{r[0-9]+}}, #1
+; CHECK-NEXT: bl external_fn
+; CHECK: cmp
+entry:
+ %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %n, i32 1)
+ %1 = extractvalue { i32, i1 } %0, 1
+ br i1 %1, label %trap, label %cont.lr.ph
+
+cont.lr.ph:
+ %2 = extractvalue { i32, i1 } %0, 0
+ %cmp5 = icmp sgt i32 %2, 0
+ br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+trap:
+ tail call void @llvm.trap() #2
+ unreachable
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %i.046 = phi i32 [ %5, %cont1 ], [ 0, %for.body.preheader ]
+ tail call void bitcast (void (...)* @external_fn to void ()*)() #4
+ %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.046, i32 1)
+ %4 = extractvalue { i32, i1 } %3, 1
+ br i1 %4, label %trap, label %cont1
+
+cont1:
+ %5 = extractvalue { i32, i1 } %3, 0
+ %cmp = icmp slt i32 %5, %2
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+declare void @external_fn(...) local_unnamed_addr #0
+
declare void @llvm.trap() #2
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
OpenPOWER on IntegriCloud