Recommit r218010 [FastISel][AArch64] Fold bit test and branch into TBZ and TBNZ.

Note: This version fixed an issue with the TBZ/TBNZ instructions that were generated in FastISel. The issue was that the 64bit version of TBZ (TBZX) automagically sets the upper bit of the immediate field that is used to specify the bit we want to test. To test for any of the lower 32bits we have to first extract the subregister and use the 32bit version of the TBZ instruction (TBZW). Original commit message: Teach selectBranch to fold bit test and branch into a single instruction (TBZ or TBNZ). llvm-svn: 218693
author: Juergen Ributzka <juergen@apple.com> 2014-09-30 19:59:35 +0000
committer: Juergen Ributzka <juergen@apple.com> 2014-09-30 19:59:35 +0000
commit: c110c0b99a2b1ef9f5bbad280abc42664971dc80 (patch)
tree: 8906dc6ab5b0ee17d068c50f3527597e51aec8a5 /llvm/lib/Target
parent: 9706978077b99d0da8273b7499af4d50812defae (diff)
download: bcm5719-llvm-c110c0b99a2b1ef9f5bbad280abc42664971dc80.tar.gz
bcm5719-llvm-c110c0b99a2b1ef9f5bbad280abc42664971dc80.zip
1 files changed, 116 insertions, 54 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index f20dcae7168..873b2ec328e 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -173,6 +173,7 @@ private:
                          bool WantResult = true);
 
   // Emit functions.
+  bool emitCompareAndBranch(const BranchInst *BI);
   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
@@ -1927,30 +1928,125 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
   }
 }
 
-/// \brief Check if the comparison against zero and the following branch can be
-/// folded into a single instruction (CBZ or CBNZ).
-static bool canFoldZeroIntoBranch(const CmpInst *CI) {
-  CmpInst::Predicate Predicate = CI->getPredicate();
-  if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE))
-    return false;
+/// \brief Try to emit a combined compare-and-branch instruction.
+bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
+  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
+  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
+  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
 
-  Type *Ty = CI->getOperand(0)->getType();
-  if (!Ty->isIntegerTy())
-    return false;
+  const Value *LHS = CI->getOperand(0);
+  const Value *RHS = CI->getOperand(1);
+
+  Type *Ty = LHS->getType();
+    if (!Ty->isIntegerTy())
+      return false;
 
   unsigned BW = cast<IntegerType>(Ty)->getBitWidth();
   if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64)
     return false;
 
-  if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(0)))
-    if (C->isNullValue())
-      return true;
+  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
 
-  if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(1)))
-    if (C->isNullValue())
-      return true;
+  // Try to take advantage of fallthrough opportunities.
+  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+    std::swap(TBB, FBB);
+    Predicate = CmpInst::getInversePredicate(Predicate);
+  }
 
-  return false;
+  int TestBit = -1;
+  bool IsCmpNE;
+  if ((Predicate == CmpInst::ICMP_EQ) || (Predicate == CmpInst::ICMP_NE)) {
+    if (const auto *C = dyn_cast<ConstantInt>(LHS))
+      if (C->isNullValue())
+        std::swap(LHS, RHS);
+
+    if (!isa<ConstantInt>(RHS))
+      return false;
+
+    if (!cast<ConstantInt>(RHS)->isNullValue())
+      return false;
+
+    if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
+      if (AI->getOpcode() == Instruction::And) {
+        const Value *AndLHS = AI->getOperand(0);
+        const Value *AndRHS = AI->getOperand(1);
+
+        if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
+          if (C->getValue().isPowerOf2())
+            std::swap(AndLHS, AndRHS);
+
+        if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
+          if (C->getValue().isPowerOf2()) {
+            TestBit = C->getValue().logBase2();
+            LHS = AndLHS;
+          }
+      }
+    IsCmpNE = Predicate == CmpInst::ICMP_NE;
+  } else if (Predicate == CmpInst::ICMP_SLT) {
+    if (!isa<ConstantInt>(RHS))
+      return false;
+
+    if (!cast<ConstantInt>(RHS)->isNullValue())
+      return false;
+
+    TestBit = BW - 1;
+    IsCmpNE = true;
+  } else if (Predicate == CmpInst::ICMP_SGT) {
+    if (!isa<ConstantInt>(RHS))
+      return false;
+
+    if (cast<ConstantInt>(RHS)->getValue() != -1)
+      return false;
+
+    TestBit = BW - 1;
+    IsCmpNE = false;
+  } else
+    return false;
+
+  static const unsigned OpcTable[2][2][2] = {
+    { {AArch64::CBZW,  AArch64::CBZX },
+      {AArch64::CBNZW, AArch64::CBNZX} },
+    { {AArch64::TBZW,  AArch64::TBZX },
+      {AArch64::TBNZW, AArch64::TBNZX} }
+  };
+
+  bool IsBitTest = TestBit != -1;
+  bool Is64Bit = BW == 64;
+  if (TestBit < 32 && TestBit >= 0)
+    Is64Bit = false;
+  
+  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
+  const MCInstrDesc &II = TII.get(Opc);
+
+  unsigned SrcReg = getRegForValue(LHS);
+  if (!SrcReg)
+    return false;
+  bool SrcIsKill = hasTrivialKill(LHS);
+
+  if (BW == 64 && !Is64Bit) {
+    SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
+                                        AArch64::sub_32);
+    SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
+  }
+
+  // Emit the combined compare and branch instruction.
+  MachineInstrBuilder MIB =
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+          .addReg(SrcReg, getKillRegState(SrcIsKill));
+  if (IsBitTest)
+    MIB.addImm(TestBit);
+  MIB.addMBB(TBB);
+
+  // Obtain the branch weight and add the TrueBB to the successor list.
+  uint32_t BranchWeight = 0;
+  if (FuncInfo.BPI)
+    BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+                                               TBB->getBasicBlock());
+  FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+  fastEmitBranch(FBB, DbgLoc);
+
+  return true;
 }
 
 bool AArch64FastISel::selectBranch(const Instruction *I) {
@@ -1980,50 +2076,16 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
         return true;
       }
 
+      // Try to emit a combined compare-and-branch first.
+      if (emitCompareAndBranch(BI))
+        return true;
+
       // Try to take advantage of fallthrough opportunities.
       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
         std::swap(TBB, FBB);
         Predicate = CmpInst::getInversePredicate(Predicate);
       }
 
-      // Try to optimize comparisons against zero.
-      if (canFoldZeroIntoBranch(CI)) {
-        const Value *LHS = CI->getOperand(0);
-        const Value *RHS = CI->getOperand(1);
-
-        // Canonicalize zero values to the RHS.
-        if (const auto *C = dyn_cast<ConstantInt>(LHS))
-          if (C->isNullValue())
-            std::swap(LHS, RHS);
-
-        static const unsigned OpcTable[2][2] = {
-          {AArch64::CBZW,  AArch64::CBZX }, {AArch64::CBNZW, AArch64::CBNZX}
-        };
-        bool IsCmpNE = Predicate == CmpInst::ICMP_NE;
-        bool Is64Bit = LHS->getType()->isIntegerTy(64);
-        unsigned Opc = OpcTable[IsCmpNE][Is64Bit];
-
-        unsigned SrcReg = getRegForValue(LHS);
-        if (!SrcReg)
-          return false;
-        bool SrcIsKill = hasTrivialKill(LHS);
-
-        // Emit the combined compare and branch instruction.
-        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
-            .addReg(SrcReg, getKillRegState(SrcIsKill))
-            .addMBB(TBB);
-
-        // Obtain the branch weight and add the TrueBB to the successor list.
-        uint32_t BranchWeight = 0;
-        if (FuncInfo.BPI)
-          BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                                     TBB->getBasicBlock());
-        FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
-        fastEmitBranch(FBB, DbgLoc);
-        return true;
-      }
-
       // Emit the cmp.
       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
         return false;
author	Juergen Ributzka <juergen@apple.com>	2014-09-30 19:59:35 +0000
committer	Juergen Ributzka <juergen@apple.com>	2014-09-30 19:59:35 +0000
commit	c110c0b99a2b1ef9f5bbad280abc42664971dc80 (patch)
tree	8906dc6ab5b0ee17d068c50f3527597e51aec8a5 /llvm/lib/Target
parent	9706978077b99d0da8273b7499af4d50812defae (diff)
download	bcm5719-llvm-c110c0b99a2b1ef9f5bbad280abc42664971dc80.tar.gz bcm5719-llvm-c110c0b99a2b1ef9f5bbad280abc42664971dc80.zip