summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuentin Colombet <qcolombet@apple.com>2016-04-25 20:54:08 +0000
committerQuentin Colombet <qcolombet@apple.com>2016-04-25 20:54:08 +0000
commitabe2d016cf9e1e1ffefdc0cce03e125bec811493 (patch)
treeab0c01cd69bcf2defcd8d30f3cff0a711141c979
parent05fefa4a85cbafe99834ba02a96933e7fc65ec25 (diff)
downloadbcm5719-llvm-abe2d016cf9e1e1ffefdc0cce03e125bec811493.tar.gz
bcm5719-llvm-abe2d016cf9e1e1ffefdc0cce03e125bec811493.zip
Re-apply r267206 with a fix for the encoding problem: when the immediate of
log2(Mask) is smaller than 32, we must use the 32-bit variant because the 64-bit variant cannot encode it. Therefore, set the subreg part accordingly. [AArch64] Fix optimizeCondBranch logic. The opcode for the optimized branch does not depend on the size of the activate bits in the AND masks, but the AND opcode itself. Indeed, we need to use a X or W variant based on the AND variant not based on whether the mask fits into the related variant. Otherwise, we may end up using the W variant of the optimized branch for 64-bit register inputs! This fixes the last make check verifier issues for AArch64: PR27479. llvm-svn: 267465
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp17
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-tbz.ll57
2 files changed, 66 insertions, 8 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5a189f40ab1..2f06922f8c0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -3863,9 +3863,9 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
if (!MRI->hasOneNonDBGUse(VReg))
return false;
+ bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
- DefMI->getOperand(2).getImm(),
- (DefMI->getOpcode() == AArch64::ANDWri) ? 32 : 64);
+ DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
if (!isPowerOf2_64(Mask))
return false;
@@ -3883,7 +3883,18 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
unsigned Opc = (Imm < 32)
? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
: (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
- BuildMI(RefToMBB, MI, DL, get(Opc)).addReg(NewReg).addImm(Imm).addMBB(TBB);
+ MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
+ .addReg(NewReg)
+ .addImm(Imm)
+ .addMBB(TBB);
+
+ // For immediate smaller than 32, we need to use the 32-bit
+ // variant (W) in all cases. Indeed the 64-bit variant does not
+ // allow to encode them.
+ // Therefore, if the input register is 64-bit, we need to take the
+ // 32-bit sub-part.
+ if (!Is32Bit && Imm < 32)
+ NewMI->getOperand(0).setSubReg(AArch64::sub_32);
MI->eraseFromParent();
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-tbz.ll b/llvm/test/CodeGen/AArch64/aarch64-tbz.ll
index e24f2d9ce2b..f4ebcc70674 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-tbz.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-tbz.ll
@@ -1,9 +1,9 @@
-; RUN: llc -mtriple=aarch64-linux-gnueabi < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnueabi < %s | FileCheck %s
; CHECK-LABEL: test1
; CHECK: tbz {{w[0-9]}}, #3, {{.LBB0_3}}
-; CHECK: tbz [[REG1:x[0-9]+]], #2, {{.LBB0_3}}
-; CHECK-NOT: and [[REG2:x[0-9]+]], [[REG1]], #0x4
+; CHECK: tbz w[[REG1:[0-9]+]], #2, {{.LBB0_3}}
+; CHECK-NOT: and [[REG2:x[0-9]+]], x[[REG1]], #0x4
; CHECK-NOT: cbz [[REG2]], {{.LBB0_3}}
; CHECK: b
@@ -26,8 +26,8 @@ if.end3: ; preds = %if.then2, %entry
; CHECK-LABEL: test2
; CHECK: cbz {{x[0-9]}}, {{.LBB1_3}}
-; CHECK: tbz [[REG1:x[0-9]+]], #3, {{.LBB1_3}}
-; CHECK-NOT: and [REG2:x[0-9]+], [[REG1]], #0x08
+; CHECK: tbz w[[REG1:[0-9]+]], #3, {{.LBB1_3}}
+; CHECK-NOT: and [REG2:x[0-9]+], x[[REG1]], #0x08
; CHECK-NOT: cbz [[REG2]], {{.LBB1_3}}
define void @test2(i64 %A, i64* readonly %B) #0 {
@@ -47,5 +47,52 @@ if.end3: ; preds = %entry, %if.then2
ret void
}
+; Make sure we use the W variant when log2(mask) is < 32.
+; CHECK-LABEL: test3
+; CHECK: tbz {{w[0-9]}}, #3, {{.LBB2_3}}
+; CHECK: tbz w[[REG1:[0-9]+]], #28, {{.LBB2_3}}
+; CHECK-NOT: and [[REG2:x[0-9]+]], x[[REG1]]
+; CHECK-NOT: cbz [[REG2]], {{.LBB2_3}}
+define void @test3(i64 %A, i64 %B) {
+entry:
+ %shift = shl i64 1, 28
+ %and = and i64 %A, %shift
+ %notlhs = icmp eq i64 %and, 0
+ %and.1 = and i64 %B, 8
+ %0 = icmp eq i64 %and.1, 0
+ %1 = or i1 %0, %notlhs
+ br i1 %1, label %if.then2, label %if.end3
+
+if.then2: ; preds = %entry
+ tail call void @foo(i64 %A, i64 %B)
+ br label %if.end3
+
+if.end3: ; preds = %if.then2, %entry
+ ret void
+}
+
+; CHECK-LABEL: test4
+; CHECK: tbz {{w[0-9]}}, #3, {{.LBB3_3}}
+; CHECK: tbz [[REG1:x[0-9]+]], #35, {{.LBB3_3}}
+; CHECK-NOT: and [[REG2:x[0-9]+]], x[[REG1]]
+; CHECK-NOT: cbz [[REG2]], {{.LBB2_3}}
+define void @test4(i64 %A, i64 %B) {
+entry:
+ %shift = shl i64 1, 35
+ %and = and i64 %A, %shift
+ %notlhs = icmp eq i64 %and, 0
+ %and.1 = and i64 %B, 8
+ %0 = icmp eq i64 %and.1, 0
+ %1 = or i1 %0, %notlhs
+ br i1 %1, label %if.then2, label %if.end3
+
+if.then2: ; preds = %entry
+ tail call void @foo(i64 %A, i64 %B)
+ br label %if.end3
+
+if.end3: ; preds = %if.then2, %entry
+ ret void
+}
+
declare void @foo(i64, i64)
OpenPOWER on IntegriCloud