summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorGeoff Berry <gberry@codeaurora.org>2017-02-21 18:53:14 +0000
committerGeoff Berry <gberry@codeaurora.org>2017-02-21 18:53:14 +0000
commit5d534b6a11d6428d173279e5155b4f966dab1be7 (patch)
treeb706e6c1da467f7760c6fd7d2fce71bb608970c7 /llvm/test
parent16289cfcfca9fd8cd5699a00b1bd4769ffa79144 (diff)
downloadbcm5719-llvm-5d534b6a11d6428d173279e5155b4f966dab1be7.tar.gz
bcm5719-llvm-5d534b6a11d6428d173279e5155b4f966dab1be7.zip
[CodeGenPrepare] Sink and duplicate more 'and' instructions.
Summary: Rework the code that was sinking/duplicating (icmp and, 0) sequences into blocks where they were being used by conditional branches to form more tbz instructions on AArch64. The new code is more general in that it just looks for 'and's that have all icmp 0's as users, with a target hook used to select which subset of 'and' instructions to consider. This change also enables 'and' sinking for X86, where it is more widely beneficial than on AArch64. The 'and' sinking/duplicating code is moved into the optimizeInst phase of CodeGenPrepare, where it can take advantage of the fact the OptimizeCmpExpression has already sunk/duplicated any icmps into the blocks where they are used. One minor complication from this change is that optimizeLoadExt needed to be updated to always mark 'and's it has determined should be in the same block as their feeding load in the InsertedInsts set to avoid an infinite loop of hoisting and sinking the same 'and'. This change fixes a regression on X86 in the tsan runtime caused by moving GVNHoist to a later place in the optimization pipeline (see PR31382). Reviewers: t.p.northover, qcolombet, MatzeB Subscribers: aemerson, mcrosier, sebpop, llvm-commits Differential Revision: https://reviews.llvm.org/D28813 llvm-svn: 295746
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AArch64/and-sink.ll90
-rw-r--r--llvm/test/CodeGen/AArch64/fast-isel-tbz.ll18
-rw-r--r--llvm/test/CodeGen/X86/and-sink.ll181
3 files changed, 288 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AArch64/and-sink.ll b/llvm/test/CodeGen/AArch64/and-sink.ll
new file mode 100644
index 00000000000..91b7bd0db17
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/and-sink.ll
@@ -0,0 +1,90 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: opt -S -codegenprepare -mtriple=aarch64-linux %s | FileCheck --check-prefix=CHECK-CGP %s
+
+@A = global i32 zeroinitializer
+@B = global i32 zeroinitializer
+@C = global i32 zeroinitializer
+
+; Test that and is sunk into cmp block to form tbz.
+define i32 @and_sink1(i32 %a, i1 %c) {
+; CHECK-LABEL: and_sink1:
+; CHECK: tbz w1, #0
+; CHECK: str wzr, [x{{[0-9]+}}, :lo12:A]
+; CHECK: tbnz {{w[0-9]+}}, #2
+
+; CHECK-CGP-LABEL: @and_sink1(
+; CHECK-CGP-NOT: and i32
+ %and = and i32 %a, 4
+ br i1 %c, label %bb0, label %bb2
+bb0:
+; CHECK-CGP-LABEL: bb0:
+; CHECK-CGP: and i32
+; CHECK-CGP-NEXT: icmp eq i32
+; CHECK-CGP-NEXT: store
+; CHECK-CGP-NEXT: br
+ %cmp = icmp eq i32 %and, 0
+ store i32 0, i32* @A
+ br i1 %cmp, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 0
+}
+
+; Test that both 'and' and cmp get sunk to form tbz.
+define i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
+; CHECK-LABEL: and_sink2:
+; CHECK: str wzr, [x{{[0-9]+}}, :lo12:A]
+; CHECK: tbz w1, #0
+; CHECK: str wzr, [x{{[0-9]+}}, :lo12:B]
+; CHECK: tbz w2, #0
+; CHECK: str wzr, [x{{[0-9]+}}, :lo12:C]
+; CHECK: tbnz {{w[0-9]+}}, #2
+
+; CHECK-CGP-LABEL: @and_sink2(
+; CHECK-CGP-NOT: and i32
+ %and = and i32 %a, 4
+ store i32 0, i32* @A
+ br i1 %c, label %bb0, label %bb3
+bb0:
+; CHECK-CGP-LABEL: bb0:
+; CHECK-CGP-NOT: and i32
+; CHECK-CGP-NOT: icmp
+ %cmp = icmp eq i32 %and, 0
+ store i32 0, i32* @B
+ br i1 %c2, label %bb1, label %bb3
+bb1:
+; CHECK-CGP-LABEL: bb1:
+; CHECK-CGP: and i32
+; CHECK-CGP-NEXT: icmp eq i32
+; CHECK-CGP-NEXT: store
+; CHECK-CGP-NEXT: br
+ store i32 0, i32* @C
+ br i1 %cmp, label %bb2, label %bb0
+bb2:
+ ret i32 1
+bb3:
+ ret i32 0
+}
+
+; Test that 'and' is not sunk since cbz is a better alternative.
+define i32 @and_sink3(i32 %a) {
+; CHECK-LABEL: and_sink3:
+; CHECK: and [[REG:w[0-9]+]], w0, #0x3
+; CHECK: [[LOOP:.L[A-Z0-9_]+]]:
+; CHECK: str wzr, [x{{[0-9]+}}, :lo12:A]
+; CHECK: cbz [[REG]], [[LOOP]]
+
+; CHECK-CGP-LABEL: @and_sink3(
+; CHECK-CGP-NEXT: and i32
+ %and = and i32 %a, 3
+ br label %bb0
+bb0:
+; CHECK-CGP-LABEL: bb0:
+; CHECK-CGP-NOT: and i32
+ %cmp = icmp eq i32 %and, 0
+ store i32 0, i32* @A
+ br i1 %cmp, label %bb0, label %bb2
+bb2:
+ ret i32 0
+}
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-tbz.ll b/llvm/test/CodeGen/AArch64/fast-isel-tbz.ll
index af817777143..d6d10318bf0 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-tbz.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-tbz.ll
@@ -278,8 +278,24 @@ bb2:
; Test that we don't fold the 'and' instruction into the compare.
define i32 @icmp_eq_and_i32(i32 %a, i1 %c) {
; CHECK-LABEL: icmp_eq_and_i32
-; CHECK: and [[REG:w[0-9]+]], w0, #0x4
+; CHECK: and [[REG:w[0-9]+]], w0, #0x3
; CHECK-NEXT: cbz [[REG]], {{LBB.+_3}}
+ %1 = and i32 %a, 3
+ br i1 %c, label %bb0, label %bb2
+bb0:
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+ ret i32 1
+bb2:
+ ret i32 0
+}
+
+; Test that we do fold the 'and' instruction into the compare and
+; generate a tbz instruction for the conditional branch.
+define i32 @icmp_eq_and1bit_i32(i32 %a, i1 %c) {
+; CHECK-LABEL: icmp_eq_and1bit_i32
+; CHECK: tbz {{w[0-9]+}}, #2, {{LBB.+_3}}
%1 = and i32 %a, 4
br i1 %c, label %bb0, label %bb2
bb0:
diff --git a/llvm/test/CodeGen/X86/and-sink.ll b/llvm/test/CodeGen/X86/and-sink.ll
new file mode 100644
index 00000000000..18120d7f786
--- /dev/null
+++ b/llvm/test/CodeGen/X86/and-sink.ll
@@ -0,0 +1,181 @@
+; RUN: llc -mtriple=i686-unknown -verify-machineinstrs < %s | FileCheck %s
+; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck --check-prefix=CHECK-CGP %s
+
+@A = global i32 zeroinitializer
+@B = global i32 zeroinitializer
+@C = global i32 zeroinitializer
+
+; Test that 'and' is sunk into bb0.
+define i32 @and_sink1(i32 %a, i1 %c) {
+; CHECK-LABEL: and_sink1:
+; CHECK: testb $1,
+; CHECK: je
+; CHECK-NOT: andl $4,
+; CHECK: movl $0, A
+; CHECK: testb $4,
+; CHECK: jne
+
+; CHECK-CGP-LABEL: @and_sink1(
+; CHECK-CGP-NOT: and i32
+ %and = and i32 %a, 4
+ br i1 %c, label %bb0, label %bb2
+bb0:
+; CHECK-CGP-LABEL: bb0:
+; CHECK-CGP: and i32
+; CHECK-CGP-NEXT: icmp eq i32
+; CHECK-CGP-NEXT: store
+; CHECK-CGP-NEXT: br
+ %cmp = icmp eq i32 %and, 0
+ store i32 0, i32* @A
+ br i1 %cmp, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 0
+}
+
+; Test that both 'and' and cmp get sunk to bb1.
+define i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
+; CHECK-LABEL: and_sink2:
+; CHECK: movl $0, A
+; CHECK: testb $1,
+; CHECK: je
+; CHECK-NOT: andl $4,
+; CHECK: movl $0, B
+; CHECK: testb $1,
+; CHECK: je
+; CHECK: movl $0, C
+; CHECK: testb $4,
+; CHECK: jne
+
+; CHECK-CGP-LABEL: @and_sink2(
+; CHECK-CGP-NOT: and i32
+ %and = and i32 %a, 4
+ store i32 0, i32* @A
+ br i1 %c, label %bb0, label %bb3
+bb0:
+; CHECK-CGP-LABEL: bb0:
+; CHECK-CGP-NOT: and i32
+; CHECK-CGP-NOT: icmp
+ %cmp = icmp eq i32 %and, 0
+ store i32 0, i32* @B
+ br i1 %c2, label %bb1, label %bb3
+bb1:
+; CHECK-CGP-LABEL: bb1:
+; CHECK-CGP: and i32
+; CHECK-CGP-NEXT: icmp eq i32
+; CHECK-CGP-NEXT: store
+; CHECK-CGP-NEXT: br
+ store i32 0, i32* @C
+ br i1 %cmp, label %bb2, label %bb0
+bb2:
+ ret i32 1
+bb3:
+ ret i32 0
+}
+
+; Test that CodeGenPrepare doesn't get stuck in a loop sinking and hoisting a masked load.
+define i32 @and_sink3(i1 %c, i32* %p) {
+; CHECK-LABEL: and_sink3:
+; CHECK: testb $1,
+; CHECK: je
+; CHECK: movzbl
+; CHECK: movl $0, A
+; CHECK: testl %
+; CHECK: je
+
+; CHECK-CGP-LABEL: @and_sink3(
+; CHECK-CGP: load i32
+; CHECK-CGP-NEXT: and i32
+ %load = load i32, i32* %p
+ %and = and i32 %load, 255
+ br i1 %c, label %bb0, label %bb2
+bb0:
+; CHECK-CGP-LABEL: bb0:
+; CHECK-CGP-NOT: and i32
+; CHECK-CGP: icmp eq i32
+ %cmp = icmp eq i32 %and, 0
+ store i32 0, i32* @A
+ br i1 %cmp, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 0
+}
+
+; Test that CodeGenPrepare sinks/duplicates non-immediate 'and'.
+define i32 @and_sink4(i32 %a, i32 %b, i1 %c) {
+; CHECK-LABEL: and_sink4:
+; CHECK: testb $1,
+; CHECK: je
+; CHECK-NOT: andl
+; CHECK: movl $0, A
+; CHECK: testl [[REG1:%[a-z0-9]+]], [[REG2:%[a-z0-9]+]]
+; CHECK: jne
+; CHECK: movl {{%[a-z0-9]+}}, B
+; CHECK: testl [[REG1]], [[REG2]]
+; CHECK: je
+
+; CHECK-CGP-LABEL: @and_sink4(
+; CHECK-CGP-NOT: and i32
+; CHECK-CGP-NOT: icmp
+ %and = and i32 %a, %b
+ %cmp = icmp eq i32 %and, 0
+ br i1 %c, label %bb0, label %bb3
+bb0:
+; CHECK-CGP-LABEL: bb0:
+; CHECK-CGP: and i32
+; CHECK-CGP-NEXT: icmp eq i32
+ store i32 0, i32* @A
+ br i1 %cmp, label %bb1, label %bb3
+bb1:
+; CHECK-CGP-LABEL: bb1:
+; CHECK-CGP: and i32
+; CHECK-CGP-NEXT: icmp eq i32
+ %add = add i32 %a, %b
+ store i32 %add, i32* @B
+ br i1 %cmp, label %bb2, label %bb3
+bb2:
+ ret i32 1
+bb3:
+ ret i32 0
+}
+
+
+; Test that CodeGenPrepare doesn't sink/duplicate non-immediate 'and'
+; when it would increase register pressure.
+define i32 @and_sink5(i32 %a, i32 %b, i32 %a2, i32 %b2, i1 %c) {
+; CHECK-LABEL: and_sink5:
+; CHECK: testb $1,
+; CHECK: je
+; CHECK: andl {{[0-9]+\(%[a-z0-9]+\)}}, [[REG:%[a-z0-9]+]]
+; CHECK: movl $0, A
+; CHECK: jne
+; CHECK: movl {{%[a-z0-9]+}}, B
+; CHECK: testl [[REG]], [[REG]]
+; CHECK: je
+
+; CHECK-CGP-LABEL: @and_sink5(
+; CHECK-CGP: and i32
+; CHECK-CGP-NOT: icmp
+ %and = and i32 %a, %b
+ %cmp = icmp eq i32 %and, 0
+ br i1 %c, label %bb0, label %bb3
+bb0:
+; CHECK-CGP-LABEL: bb0:
+; CHECK-CGP-NOT: and i32
+; CHECK-CGP: icmp eq i32
+ store i32 0, i32* @A
+ br i1 %cmp, label %bb1, label %bb3
+bb1:
+; CHECK-CGP-LABEL: bb1:
+; CHECK-CGP-NOT: and i32
+; CHECK-CGP: icmp eq i32
+ %add = add i32 %a2, %b2
+ store i32 %add, i32* @B
+ br i1 %cmp, label %bb2, label %bb3
+bb2:
+ ret i32 1
+bb3:
+ ret i32 0
+}
OpenPOWER on IntegriCloud