diff options
author | Geoff Berry <gberry@codeaurora.org> | 2017-02-21 18:53:14 +0000 |
---|---|---|
committer | Geoff Berry <gberry@codeaurora.org> | 2017-02-21 18:53:14 +0000 |
commit | 5d534b6a11d6428d173279e5155b4f966dab1be7 (patch) | |
tree | b706e6c1da467f7760c6fd7d2fce71bb608970c7 /llvm/test | |
parent | 16289cfcfca9fd8cd5699a00b1bd4769ffa79144 (diff) | |
download | bcm5719-llvm-5d534b6a11d6428d173279e5155b4f966dab1be7.tar.gz bcm5719-llvm-5d534b6a11d6428d173279e5155b4f966dab1be7.zip |
[CodeGenPrepare] Sink and duplicate more 'and' instructions.
Summary:
Rework the code that was sinking/duplicating (icmp and, 0) sequences
into blocks where they were being used by conditional branches to form
more tbz instructions on AArch64. The new code is more general in that
it just looks for 'and's that have all icmp 0's as users, with a target
hook used to select which subset of 'and' instructions to consider.
This change also enables 'and' sinking for X86, where it is more widely
beneficial than on AArch64.
The 'and' sinking/duplicating code is moved into the optimizeInst phase
of CodeGenPrepare, where it can take advantage of the fact the
OptimizeCmpExpression has already sunk/duplicated any icmps into the
blocks where they are used. One minor complication from this change is
that optimizeLoadExt needed to be updated to always mark 'and's it has
determined should be in the same block as their feeding load in the
InsertedInsts set to avoid an infinite loop of hoisting and sinking the
same 'and'.
This change fixes a regression on X86 in the tsan runtime caused by
moving GVNHoist to a later place in the optimization pipeline (see
PR31382).
Reviewers: t.p.northover, qcolombet, MatzeB
Subscribers: aemerson, mcrosier, sebpop, llvm-commits
Differential Revision: https://reviews.llvm.org/D28813
llvm-svn: 295746
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/AArch64/and-sink.ll | 90 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/fast-isel-tbz.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/and-sink.ll | 181 |
3 files changed, 288 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AArch64/and-sink.ll b/llvm/test/CodeGen/AArch64/and-sink.ll new file mode 100644 index 00000000000..91b7bd0db17 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/and-sink.ll @@ -0,0 +1,90 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: opt -S -codegenprepare -mtriple=aarch64-linux %s | FileCheck --check-prefix=CHECK-CGP %s + +@A = global i32 zeroinitializer +@B = global i32 zeroinitializer +@C = global i32 zeroinitializer + +; Test that and is sunk into cmp block to form tbz. +define i32 @and_sink1(i32 %a, i1 %c) { +; CHECK-LABEL: and_sink1: +; CHECK: tbz w1, #0 +; CHECK: str wzr, [x{{[0-9]+}}, :lo12:A] +; CHECK: tbnz {{w[0-9]+}}, #2 + +; CHECK-CGP-LABEL: @and_sink1( +; CHECK-CGP-NOT: and i32 + %and = and i32 %a, 4 + br i1 %c, label %bb0, label %bb2 +bb0: +; CHECK-CGP-LABEL: bb0: +; CHECK-CGP: and i32 +; CHECK-CGP-NEXT: icmp eq i32 +; CHECK-CGP-NEXT: store +; CHECK-CGP-NEXT: br + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @A + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +; Test that both 'and' and cmp get sunk to form tbz. +define i32 @and_sink2(i32 %a, i1 %c, i1 %c2) { +; CHECK-LABEL: and_sink2: +; CHECK: str wzr, [x{{[0-9]+}}, :lo12:A] +; CHECK: tbz w1, #0 +; CHECK: str wzr, [x{{[0-9]+}}, :lo12:B] +; CHECK: tbz w2, #0 +; CHECK: str wzr, [x{{[0-9]+}}, :lo12:C] +; CHECK: tbnz {{w[0-9]+}}, #2 + +; CHECK-CGP-LABEL: @and_sink2( +; CHECK-CGP-NOT: and i32 + %and = and i32 %a, 4 + store i32 0, i32* @A + br i1 %c, label %bb0, label %bb3 +bb0: +; CHECK-CGP-LABEL: bb0: +; CHECK-CGP-NOT: and i32 +; CHECK-CGP-NOT: icmp + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @B + br i1 %c2, label %bb1, label %bb3 +bb1: +; CHECK-CGP-LABEL: bb1: +; CHECK-CGP: and i32 +; CHECK-CGP-NEXT: icmp eq i32 +; CHECK-CGP-NEXT: store +; CHECK-CGP-NEXT: br + store i32 0, i32* @C + br i1 %cmp, label %bb2, label %bb0 +bb2: + ret i32 1 +bb3: + ret i32 0 +} + +; Test that 'and' is not sunk since cbz is a better alternative. +define i32 @and_sink3(i32 %a) { +; CHECK-LABEL: and_sink3: +; CHECK: and [[REG:w[0-9]+]], w0, #0x3 +; CHECK: [[LOOP:.L[A-Z0-9_]+]]: +; CHECK: str wzr, [x{{[0-9]+}}, :lo12:A] +; CHECK: cbz [[REG]], [[LOOP]] + +; CHECK-CGP-LABEL: @and_sink3( +; CHECK-CGP-NEXT: and i32 + %and = and i32 %a, 3 + br label %bb0 +bb0: +; CHECK-CGP-LABEL: bb0: +; CHECK-CGP-NOT: and i32 + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @A + br i1 %cmp, label %bb0, label %bb2 +bb2: + ret i32 0 +} diff --git a/llvm/test/CodeGen/AArch64/fast-isel-tbz.ll b/llvm/test/CodeGen/AArch64/fast-isel-tbz.ll index af817777143..d6d10318bf0 100644 --- a/llvm/test/CodeGen/AArch64/fast-isel-tbz.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-tbz.ll @@ -278,8 +278,24 @@ bb2: ; Test that we don't fold the 'and' instruction into the compare. define i32 @icmp_eq_and_i32(i32 %a, i1 %c) { ; CHECK-LABEL: icmp_eq_and_i32 -; CHECK: and [[REG:w[0-9]+]], w0, #0x4 +; CHECK: and [[REG:w[0-9]+]], w0, #0x3 ; CHECK-NEXT: cbz [[REG]], {{LBB.+_3}} + %1 = and i32 %a, 3 + br i1 %c, label %bb0, label %bb2 +bb0: + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +; Test that we do fold the 'and' instruction into the compare and +; generate a tbz instruction for the conditional branch. +define i32 @icmp_eq_and1bit_i32(i32 %a, i1 %c) { +; CHECK-LABEL: icmp_eq_and1bit_i32 +; CHECK: tbz {{w[0-9]+}}, #2, {{LBB.+_3}} %1 = and i32 %a, 4 br i1 %c, label %bb0, label %bb2 bb0: diff --git a/llvm/test/CodeGen/X86/and-sink.ll b/llvm/test/CodeGen/X86/and-sink.ll new file mode 100644 index 00000000000..18120d7f786 --- /dev/null +++ b/llvm/test/CodeGen/X86/and-sink.ll @@ -0,0 +1,181 @@ +; RUN: llc -mtriple=i686-unknown -verify-machineinstrs < %s | FileCheck %s +; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck --check-prefix=CHECK-CGP %s + +@A = global i32 zeroinitializer +@B = global i32 zeroinitializer +@C = global i32 zeroinitializer + +; Test that 'and' is sunk into bb0. +define i32 @and_sink1(i32 %a, i1 %c) { +; CHECK-LABEL: and_sink1: +; CHECK: testb $1, +; CHECK: je +; CHECK-NOT: andl $4, +; CHECK: movl $0, A +; CHECK: testb $4, +; CHECK: jne + +; CHECK-CGP-LABEL: @and_sink1( +; CHECK-CGP-NOT: and i32 + %and = and i32 %a, 4 + br i1 %c, label %bb0, label %bb2 +bb0: +; CHECK-CGP-LABEL: bb0: +; CHECK-CGP: and i32 +; CHECK-CGP-NEXT: icmp eq i32 +; CHECK-CGP-NEXT: store +; CHECK-CGP-NEXT: br + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @A + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +; Test that both 'and' and cmp get sunk to bb1. +define i32 @and_sink2(i32 %a, i1 %c, i1 %c2) { +; CHECK-LABEL: and_sink2: +; CHECK: movl $0, A +; CHECK: testb $1, +; CHECK: je +; CHECK-NOT: andl $4, +; CHECK: movl $0, B +; CHECK: testb $1, +; CHECK: je +; CHECK: movl $0, C +; CHECK: testb $4, +; CHECK: jne + +; CHECK-CGP-LABEL: @and_sink2( +; CHECK-CGP-NOT: and i32 + %and = and i32 %a, 4 + store i32 0, i32* @A + br i1 %c, label %bb0, label %bb3 +bb0: +; CHECK-CGP-LABEL: bb0: +; CHECK-CGP-NOT: and i32 +; CHECK-CGP-NOT: icmp + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @B + br i1 %c2, label %bb1, label %bb3 +bb1: +; CHECK-CGP-LABEL: bb1: +; CHECK-CGP: and i32 +; CHECK-CGP-NEXT: icmp eq i32 +; CHECK-CGP-NEXT: store +; CHECK-CGP-NEXT: br + store i32 0, i32* @C + br i1 %cmp, label %bb2, label %bb0 +bb2: + ret i32 1 +bb3: + ret i32 0 +} + +; Test that CodeGenPrepare doesn't get stuck in a loop sinking and hoisting a masked load. +define i32 @and_sink3(i1 %c, i32* %p) { +; CHECK-LABEL: and_sink3: +; CHECK: testb $1, +; CHECK: je +; CHECK: movzbl +; CHECK: movl $0, A +; CHECK: testl % +; CHECK: je + +; CHECK-CGP-LABEL: @and_sink3( +; CHECK-CGP: load i32 +; CHECK-CGP-NEXT: and i32 + %load = load i32, i32* %p + %and = and i32 %load, 255 + br i1 %c, label %bb0, label %bb2 +bb0: +; CHECK-CGP-LABEL: bb0: +; CHECK-CGP-NOT: and i32 +; CHECK-CGP: icmp eq i32 + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @A + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +; Test that CodeGenPrepare sinks/duplicates non-immediate 'and'. +define i32 @and_sink4(i32 %a, i32 %b, i1 %c) { +; CHECK-LABEL: and_sink4: +; CHECK: testb $1, +; CHECK: je +; CHECK-NOT: andl +; CHECK: movl $0, A +; CHECK: testl [[REG1:%[a-z0-9]+]], [[REG2:%[a-z0-9]+]] +; CHECK: jne +; CHECK: movl {{%[a-z0-9]+}}, B +; CHECK: testl [[REG1]], [[REG2]] +; CHECK: je + +; CHECK-CGP-LABEL: @and_sink4( +; CHECK-CGP-NOT: and i32 +; CHECK-CGP-NOT: icmp + %and = and i32 %a, %b + %cmp = icmp eq i32 %and, 0 + br i1 %c, label %bb0, label %bb3 +bb0: +; CHECK-CGP-LABEL: bb0: +; CHECK-CGP: and i32 +; CHECK-CGP-NEXT: icmp eq i32 + store i32 0, i32* @A + br i1 %cmp, label %bb1, label %bb3 +bb1: +; CHECK-CGP-LABEL: bb1: +; CHECK-CGP: and i32 +; CHECK-CGP-NEXT: icmp eq i32 + %add = add i32 %a, %b + store i32 %add, i32* @B + br i1 %cmp, label %bb2, label %bb3 +bb2: + ret i32 1 +bb3: + ret i32 0 +} + + +; Test that CodeGenPrepare doesn't sink/duplicate non-immediate 'and' +; when it would increase register pressure. +define i32 @and_sink5(i32 %a, i32 %b, i32 %a2, i32 %b2, i1 %c) { +; CHECK-LABEL: and_sink5: +; CHECK: testb $1, +; CHECK: je +; CHECK: andl {{[0-9]+\(%[a-z0-9]+\)}}, [[REG:%[a-z0-9]+]] +; CHECK: movl $0, A +; CHECK: jne +; CHECK: movl {{%[a-z0-9]+}}, B +; CHECK: testl [[REG]], [[REG]] +; CHECK: je + +; CHECK-CGP-LABEL: @and_sink5( +; CHECK-CGP: and i32 +; CHECK-CGP-NOT: icmp + %and = and i32 %a, %b + %cmp = icmp eq i32 %and, 0 + br i1 %c, label %bb0, label %bb3 +bb0: +; CHECK-CGP-LABEL: bb0: +; CHECK-CGP-NOT: and i32 +; CHECK-CGP: icmp eq i32 + store i32 0, i32* @A + br i1 %cmp, label %bb1, label %bb3 +bb1: +; CHECK-CGP-LABEL: bb1: +; CHECK-CGP-NOT: and i32 +; CHECK-CGP: icmp eq i32 + %add = add i32 %a2, %b2 + store i32 %add, i32* @B + br i1 %cmp, label %bb2, label %bb3 +bb2: + ret i32 1 +bb3: + ret i32 0 +} |