diff options
author | Juergen Ributzka <juergen@apple.com> | 2014-09-17 18:05:34 +0000 |
---|---|---|
committer | Juergen Ributzka <juergen@apple.com> | 2014-09-17 18:05:34 +0000 |
commit | d8e30c0db81ee898a47f37e9673097d9b4a3314f (patch) | |
tree | 56192a90c754e052569bfdc9a0b75aa44012b295 | |
parent | 1947bf99217a233bc7f147de19738f77845ec8b8 (diff) | |
download | bcm5719-llvm-d8e30c0db81ee898a47f37e9673097d9b4a3314f.tar.gz bcm5719-llvm-d8e30c0db81ee898a47f37e9673097d9b4a3314f.zip |
[FastISel][AArch64] Fold compare with zero and branch into CBZ and CBNZ.
This takes advanatage of the CBZ and CBNZ instruction to further optimize the
common null check pattern into a single instruction.
This is related to rdar://problem/18358882.
llvm-svn: 217972
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64FastISel.cpp | 64 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/fast-isel-cbz.ll | 57 |
2 files changed, 121 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 826c4c089a8..da69735c8f1 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -1673,6 +1673,32 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { } } +/// \brief Check if the comparison against zero and the following branch can be +/// folded into a single instruction (CBZ or CBNZ). +static bool canFoldZeroIntoBranch(const CmpInst *CI) { + CmpInst::Predicate Predicate = CI->getPredicate(); + if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE)) + return false; + + Type *Ty = CI->getOperand(0)->getType(); + if (!Ty->isIntegerTy()) + return false; + + unsigned BW = cast<IntegerType>(Ty)->getBitWidth(); + if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64) + return false; + + if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(0))) + if (C->isNullValue()) + return true; + + if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(1))) + if (C->isNullValue()) + return true; + + return false; +} + bool AArch64FastISel::selectBranch(const Instruction *I) { const BranchInst *BI = cast<BranchInst>(I); if (BI->isUnconditional()) { @@ -1706,6 +1732,44 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { Predicate = CmpInst::getInversePredicate(Predicate); } + // Try to optimize comparisons against zero. + if (canFoldZeroIntoBranch(CI)) { + const Value *LHS = CI->getOperand(0); + const Value *RHS = CI->getOperand(1); + + // Canonicalize zero values to the RHS. + if (const auto *C = dyn_cast<ConstantInt>(LHS)) + if (C->isNullValue()) + std::swap(LHS, RHS); + + static const unsigned OpcTable[2][2] = { + {AArch64::CBZW, AArch64::CBZX }, {AArch64::CBNZW, AArch64::CBNZX} + }; + bool IsCmpNE = Predicate == CmpInst::ICMP_NE; + bool Is64Bit = LHS->getType()->isIntegerTy(64); + unsigned Opc = OpcTable[IsCmpNE][Is64Bit]; + + unsigned SrcReg = getRegForValue(LHS); + if (!SrcReg) + return false; + bool SrcIsKill = hasTrivialKill(LHS); + + // Emit the combined compare and branch instruction. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addMBB(TBB); + + // Obtain the branch weight and add the TrueBB to the successor list. + uint32_t BranchWeight = 0; + if (FuncInfo.BPI) + BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), + TBB->getBasicBlock()); + FuncInfo.MBB->addSuccessor(TBB, BranchWeight); + + fastEmitBranch(FBB, DbgLoc); + return true; + } + // Emit the cmp. if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; diff --git a/llvm/test/CodeGen/AArch64/fast-isel-cbz.ll b/llvm/test/CodeGen/AArch64/fast-isel-cbz.ll new file mode 100644 index 00000000000..43263406160 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fast-isel-cbz.ll @@ -0,0 +1,57 @@ +; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s + +define i32 @icmp_eq_i1(i1 signext %a) { +; CHECK-LABEL: icmp_eq_i1 +; CHECK: cbz w0, {{LBB.+_2}} + %1 = icmp eq i1 %a, 0 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_eq_i8(i8 signext %a) { +; CHECK-LABEL: icmp_eq_i8 +; CHECK: cbz w0, {{LBB.+_2}} + %1 = icmp eq i8 %a, 0 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_eq_i16(i16 signext %a) { +; CHECK-LABEL: icmp_eq_i16 +; CHECK: cbz w0, {{LBB.+_2}} + %1 = icmp eq i16 %a, 0 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_eq_i32(i32 %a) { +; CHECK-LABEL: icmp_eq_i32 +; CHECK: cbz w0, {{LBB.+_2}} + %1 = icmp eq i32 %a, 0 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_eq_i64(i64 %a) { +; CHECK-LABEL: icmp_eq_i64 +; CHECK: cbz x0, {{LBB.+_2}} + %1 = icmp eq i64 %a, 0 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + |