summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJuergen Ributzka <juergen@apple.com>2014-09-17 18:05:34 +0000
committerJuergen Ributzka <juergen@apple.com>2014-09-17 18:05:34 +0000
commitd8e30c0db81ee898a47f37e9673097d9b4a3314f (patch)
tree56192a90c754e052569bfdc9a0b75aa44012b295
parent1947bf99217a233bc7f147de19738f77845ec8b8 (diff)
downloadbcm5719-llvm-d8e30c0db81ee898a47f37e9673097d9b4a3314f.tar.gz
bcm5719-llvm-d8e30c0db81ee898a47f37e9673097d9b4a3314f.zip
[FastISel][AArch64] Fold compare with zero and branch into CBZ and CBNZ.
This takes advanatage of the CBZ and CBNZ instruction to further optimize the common null check pattern into a single instruction. This is related to rdar://problem/18358882. llvm-svn: 217972
-rw-r--r--llvm/lib/Target/AArch64/AArch64FastISel.cpp64
-rw-r--r--llvm/test/CodeGen/AArch64/fast-isel-cbz.ll57
2 files changed, 121 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 826c4c089a8..da69735c8f1 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -1673,6 +1673,32 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
}
}
+/// \brief Check if the comparison against zero and the following branch can be
+/// folded into a single instruction (CBZ or CBNZ).
+static bool canFoldZeroIntoBranch(const CmpInst *CI) {
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE))
+ return false;
+
+ Type *Ty = CI->getOperand(0)->getType();
+ if (!Ty->isIntegerTy())
+ return false;
+
+ unsigned BW = cast<IntegerType>(Ty)->getBitWidth();
+ if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64)
+ return false;
+
+ if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(0)))
+ if (C->isNullValue())
+ return true;
+
+ if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(1)))
+ if (C->isNullValue())
+ return true;
+
+ return false;
+}
+
bool AArch64FastISel::selectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
if (BI->isUnconditional()) {
@@ -1706,6 +1732,44 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
Predicate = CmpInst::getInversePredicate(Predicate);
}
+ // Try to optimize comparisons against zero.
+ if (canFoldZeroIntoBranch(CI)) {
+ const Value *LHS = CI->getOperand(0);
+ const Value *RHS = CI->getOperand(1);
+
+ // Canonicalize zero values to the RHS.
+ if (const auto *C = dyn_cast<ConstantInt>(LHS))
+ if (C->isNullValue())
+ std::swap(LHS, RHS);
+
+ static const unsigned OpcTable[2][2] = {
+ {AArch64::CBZW, AArch64::CBZX }, {AArch64::CBNZW, AArch64::CBNZX}
+ };
+ bool IsCmpNE = Predicate == CmpInst::ICMP_NE;
+ bool Is64Bit = LHS->getType()->isIntegerTy(64);
+ unsigned Opc = OpcTable[IsCmpNE][Is64Bit];
+
+ unsigned SrcReg = getRegForValue(LHS);
+ if (!SrcReg)
+ return false;
+ bool SrcIsKill = hasTrivialKill(LHS);
+
+ // Emit the combined compare and branch instruction.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg, getKillRegState(SrcIsKill))
+ .addMBB(TBB);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+
+ fastEmitBranch(FBB, DbgLoc);
+ return true;
+ }
+
// Emit the cmp.
if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-cbz.ll b/llvm/test/CodeGen/AArch64/fast-isel-cbz.ll
new file mode 100644
index 00000000000..43263406160
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fast-isel-cbz.ll
@@ -0,0 +1,57 @@
+; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+
+define i32 @icmp_eq_i1(i1 signext %a) {
+; CHECK-LABEL: icmp_eq_i1
+; CHECK: cbz w0, {{LBB.+_2}}
+ %1 = icmp eq i1 %a, 0
+ br i1 %1, label %bb1, label %bb2
+bb2:
+ ret i32 1
+bb1:
+ ret i32 0
+}
+
+define i32 @icmp_eq_i8(i8 signext %a) {
+; CHECK-LABEL: icmp_eq_i8
+; CHECK: cbz w0, {{LBB.+_2}}
+ %1 = icmp eq i8 %a, 0
+ br i1 %1, label %bb1, label %bb2
+bb2:
+ ret i32 1
+bb1:
+ ret i32 0
+}
+
+define i32 @icmp_eq_i16(i16 signext %a) {
+; CHECK-LABEL: icmp_eq_i16
+; CHECK: cbz w0, {{LBB.+_2}}
+ %1 = icmp eq i16 %a, 0
+ br i1 %1, label %bb1, label %bb2
+bb2:
+ ret i32 1
+bb1:
+ ret i32 0
+}
+
+define i32 @icmp_eq_i32(i32 %a) {
+; CHECK-LABEL: icmp_eq_i32
+; CHECK: cbz w0, {{LBB.+_2}}
+ %1 = icmp eq i32 %a, 0
+ br i1 %1, label %bb1, label %bb2
+bb2:
+ ret i32 1
+bb1:
+ ret i32 0
+}
+
+define i32 @icmp_eq_i64(i64 %a) {
+; CHECK-LABEL: icmp_eq_i64
+; CHECK: cbz x0, {{LBB.+_2}}
+ %1 = icmp eq i64 %a, 0
+ br i1 %1, label %bb1, label %bb2
+bb2:
+ ret i32 1
+bb1:
+ ret i32 0
+}
+
OpenPOWER on IntegriCloud