diff options
Diffstat (limited to 'llvm/test/CodeGen/PowerPC/tail-dup-layout.ll')
-rw-r--r-- | llvm/test/CodeGen/PowerPC/tail-dup-layout.ll | 378 |
1 files changed, 352 insertions, 26 deletions
diff --git a/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll b/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll index 6790aa8e944..95c23ade513 100644 --- a/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll +++ b/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll @@ -1,59 +1,59 @@ -; RUN: llc -outline-optional-branches -O2 < %s | FileCheck %s +; RUN: llc -O2 < %s | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-grtev4-linux-gnu" ; Intended layout: -; The outlining flag produces the layout +; The chain-based outlining produces the layout ; test1 ; test2 ; test3 ; test4 -; exit ; optional1 ; optional2 ; optional3 ; optional4 +; exit ; Tail duplication puts test n+1 at the end of optional n ; so optional1 includes a copy of test2 at the end, and branches ; to test3 (at the top) or falls through to optional 2. -; The CHECK statements check for the whole string of tests and exit block, +; The CHECK statements check for the whole string of tests ; and then check that the correct test has been duplicated into the end of ; the optional blocks and that the optional blocks are in the correct order. -;CHECK-LABEL: f: +;CHECK-LABEL: straight_test: ; test1 may have been merged with entry ;CHECK: mr [[TAGREG:[0-9]+]], 3 ;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 12, 1, [[OPT1LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[TEST2LABEL:[._0-9A-Za-z]+]]: # %test2 +;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] +;CHECK-NEXT: # %test2 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 -;CHECK-NEXT: bne 0, [[OPT2LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3 +;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] +;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 -;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[TEST4LABEL:[._0-9A-Za-z]+]]: # %test4 +;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]] +;CHECK-NEXT: .[[TEST4LABEL:[_0-9A-Za-z]+]]: # %test4 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 -;CHECK-NEXT: bne 0, .[[OPT4LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit +;CHECK-NEXT: bne 0, .[[OPT4LABEL:[_0-9A-Za-z]+]] +;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit ;CHECK: blr -;CHECK-NEXT: [[OPT1LABEL]] +;CHECK-NEXT: .[[OPT1LABEL]]: ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 -;CHECK-NEXT: beq 0, [[TEST3LABEL]] -;CHECK-NEXT: [[OPT2LABEL]] +;CHECK-NEXT: beq 0, .[[TEST3LABEL]] +;CHECK-NEXT: .[[OPT2LABEL]]: ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 -;CHECK-NEXT: beq 0, [[TEST4LABEL]] -;CHECK-NEXT: [[OPT3LABEL]] +;CHECK-NEXT: beq 0, .[[TEST4LABEL]] +;CHECK-NEXT: .[[OPT3LABEL]]: ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 -;CHECK-NEXT: beq 0, [[EXITLABEL]] -;CHECK-NEXT: [[OPT4LABEL]] -;CHECK: b [[EXITLABEL]] +;CHECK-NEXT: beq 0, .[[EXITLABEL]] +;CHECK-NEXT: .[[OPT4LABEL]]: +;CHECK: b .[[EXITLABEL]] -define void @f(i32 %tag) { +define void @straight_test(i32 %tag) { entry: br label %test1 test1: %tagbit1 = and i32 %tag, 1 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 - br i1 %tagbit1eq0, label %test2, label %optional1 + br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1 optional1: call void @a() call void @a() @@ -63,7 +63,7 @@ optional1: test2: %tagbit2 = and i32 %tag, 2 %tagbit2eq0 = icmp eq i32 %tagbit2, 0 - br i1 %tagbit2eq0, label %test3, label %optional2 + br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1 optional2: call void @b() call void @b() @@ -73,7 +73,7 @@ optional2: test3: %tagbit3 = and i32 %tag, 4 %tagbit3eq0 = icmp eq i32 %tagbit3, 0 - br i1 %tagbit3eq0, label %test4, label %optional3 + br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1 optional3: call void @c() call void @c() @@ -83,7 +83,7 @@ optional3: test4: %tagbit4 = and i32 %tag, 8 %tagbit4eq0 = icmp eq i32 %tagbit4, 0 - br i1 %tagbit4eq0, label %exit, label %optional4 + br i1 %tagbit4eq0, label %exit, label %optional4, !prof !1 optional4: call void @d() call void @d() @@ -94,7 +94,333 @@ exit: ret void } +; Intended layout: +; The chain-based outlining produces the layout +; entry +; --- Begin loop --- +; for.latch +; for.check +; test1 +; test2 +; test3 +; test4 +; optional1 +; optional2 +; optional3 +; optional4 +; --- End loop --- +; exit +; The CHECK statements check for the whole string of tests and exit block, +; and then check that the correct test has been duplicated into the end of +; the optional blocks and that the optional blocks are in the correct order. +;CHECK-LABEL: loop_test: +;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4 +;CHECK: .[[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch +;CHECK: addi +;CHECK: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check +;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]]) +;CHECK: # %test1 +;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 +;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]] +;CHECK-NEXT: # %test2 +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]] +;CHECK-NEXT: .[[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3 +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 +;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]] +;CHECK-NEXT: .[[TEST4LABEL:[._0-9A-Za-z]+]]: # %{{(test4|optional3)}} +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 +;CHECK-NEXT: beq 0, .[[LATCHLABEL]] +;CHECK-NEXT: b .[[OPT4LABEL:[._0-9A-Za-z]+]] +;CHECK: [[OPT1LABEL]] +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK-NEXT: beq 0, .[[TEST3LABEL]] +;CHECK-NEXT: .[[OPT2LABEL]] +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 +;CHECK-NEXT: beq 0, .[[TEST4LABEL]] +;CHECK-NEXT: .[[OPT3LABEL]] +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 +;CHECK-NEXT: beq 0, .[[LATCHLABEL]] +;CHECK: [[OPT4LABEL]]: +;CHECK: b .[[LATCHLABEL]] +define void @loop_test(i32* %tags, i32 %count) { +entry: + br label %for.check +for.check: + %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch] + %done.count = icmp ugt i32 %count.loop, 0 + %tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count + %tag = load i32, i32* %tag_ptr + %done.tag = icmp eq i32 %tag, 0 + %done = and i1 %done.count, %done.tag + br i1 %done, label %test1, label %exit, !prof !1 +test1: + %tagbit1 = and i32 %tag, 1 + %tagbit1eq0 = icmp eq i32 %tagbit1, 0 + br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1 +optional1: + call void @a() + call void @a() + call void @a() + call void @a() + br label %test2 +test2: + %tagbit2 = and i32 %tag, 2 + %tagbit2eq0 = icmp eq i32 %tagbit2, 0 + br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1 +optional2: + call void @b() + call void @b() + call void @b() + call void @b() + br label %test3 +test3: + %tagbit3 = and i32 %tag, 4 + %tagbit3eq0 = icmp eq i32 %tagbit3, 0 + br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1 +optional3: + call void @c() + call void @c() + call void @c() + call void @c() + br label %test4 +test4: + %tagbit4 = and i32 %tag, 8 + %tagbit4eq0 = icmp eq i32 %tagbit4, 0 + br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1 +optional4: + call void @d() + call void @d() + call void @d() + call void @d() + br label %for.latch +for.latch: + %count.sub = sub i32 %count.loop, 1 + br label %for.check +exit: + ret void +} + +; The block then2 is not unavoidable, meaning it does not dominate the exit. +; But since it can be tail-duplicated, it should be placed as a fallthrough from +; test2 and copied. The purpose here is to make sure that the tail-duplication +; code is independent of the outlining code, which works by choosing the +; "unavoidable" blocks. +; CHECK-LABEL: avoidable_test: +; CHECK: # %entry +; CHECK: andi. +; CHECK: # %test2 +; Make sure then2 falls through from test2 +; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} +; CHECK: # %then2 +; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29 +; CHECK: # %else1 +; CHECK: bl a +; CHECK: bl a +; Make sure then2 was copied into else1 +; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29 +; CHECK: # %end1 +; CHECK: bl d +; CHECK: # %else2 +; CHECK: bl c +; CHECK: # %end2 +define void @avoidable_test(i32 %tag) { +entry: + br label %test1 +test1: + %tagbit1 = and i32 %tag, 1 + %tagbit1eq0 = icmp eq i32 %tagbit1, 0 + br i1 %tagbit1eq0, label %test2, label %else1, !prof !1 ; %test2 more likely +else1: + call void @a() + call void @a() + br label %then2 +test2: + %tagbit2 = and i32 %tag, 2 + %tagbit2eq0 = icmp eq i32 %tagbit2, 0 + br i1 %tagbit2eq0, label %then2, label %else2, !prof !1 ; %then2 more likely +then2: + %tagbit3 = and i32 %tag, 4 + %tagbit3eq0 = icmp eq i32 %tagbit3, 0 + br i1 %tagbit3eq0, label %end2, label %end1, !prof !1 ; %end2 more likely +else2: + call void @c() + br label %end2 +end2: + ret void +end1: + call void @d() + ret void +} + +; CHECK-LABEL: trellis_test +; The number in the block labels is the expected block frequency given the +; probabilities annotated. There is a conflict in the b;c->d;e trellis that +; should be resolved as c->e;b->d. +; The d;e->f;g trellis should be resolved as e->g;d->f. +; The f;g->h;i trellis should be resolved as f->i;g->h. +; The h;i->j;ret trellis contains a triangle edge, and should be resolved as +; h->j->ret +; CHECK: # %entry +; CHECK: # %c10 +; CHECK: # %e9 +; CHECK: # %g10 +; CHECK: # %h10 +; CHECK: # %j8 +; CHECK: # %ret +; CHECK: # %b6 +; CHECK: # %d7 +; CHECK: # %f6 +; CHECK: # %i6 +define void @trellis_test(i32 %tag) { +entry: + br label %a16 +a16: + call void @a() + call void @a() + %tagbits.a = and i32 %tag, 3 + %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0 + br i1 %tagbits.a.eq0, label %c10, label %b6, !prof !1 ; 10 to 6 +c10: + call void @c() + call void @c() + %tagbits.c = and i32 %tag, 12 + %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0 + ; Both of these edges should be hotter than the other incoming edge + ; for e9 or d7 + br i1 %tagbits.c.eq0, label %e9, label %d7, !prof !3 ; 6 to 4 +e9: + call void @e() + call void @e() + %tagbits.e = and i32 %tag, 48 + %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0 + br i1 %tagbits.e.eq0, label %g10, label %f6, !prof !4 ; 7 to 2 +g10: + call void @g() + call void @g() + %tagbits.g = and i32 %tag, 192 + %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0 + br i1 %tagbits.g.eq0, label %i6, label %h10, !prof !5 ; 2 to 8 +i6: + call void @i() + call void @i() + %tagbits.i = and i32 %tag, 768 + %tagbits.i.eq0 = icmp eq i32 %tagbits.i, 0 + br i1 %tagbits.i.eq0, label %ret, label %j8, !prof !2 ; balanced (3 to 3) +b6: + call void @b() + call void @b() + %tagbits.b = and i32 %tag, 12 + %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8 + br i1 %tagbits.b.eq1, label %e9, label %d7, !prof !2 ; balanced (3 to 3) +d7: + call void @d() + call void @d() + %tagbits.d = and i32 %tag, 48 + %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32 + br i1 %tagbits.d.eq1, label %g10, label %f6, !prof !6 ; 3 to 4 +f6: + call void @f() + call void @f() + %tagbits.f = and i32 %tag, 192 + %tagbits.f.eq1 = icmp eq i32 %tagbits.f, 128 + br i1 %tagbits.f.eq1, label %i6, label %h10, !prof !7 ; 4 to 2 +h10: + call void @h() + call void @h() + %tagbits.h = and i32 %tag, 768 + %tagbits.h.eq1 = icmp eq i32 %tagbits.h, 512 + br i1 %tagbits.h.eq1, label %ret, label %j8, !prof !2 ; balanced (5 to 5) +j8: + call void @j() + call void @j() + br label %ret +ret: + ret void +} + +; Verify that we still consider tail-duplication opportunities if we find a +; triangle trellis. Here D->F->G is the triangle, and D;E are both predecessors +; of both F and G. The basic trellis algorithm picks the F->G edge, but after +; checking, it's profitable to duplicate G into F. The weights here are not +; really important. They are there to help make the test stable. +; CHECK-LABEL: trellis_then_dup_test +; CHECK: # %entry +; CHECK: # %b +; CHECK: # %d +; CHECK: # %g +; CHECK: # %ret1 +; CHECK: # %c +; CHECK: # %e +; CHECK: # %f +; CHECK: # %ret2 +; CHECK: # %ret +define void @trellis_then_dup_test(i32 %tag) { +entry: + br label %a +a: + call void @a() + call void @a() + %tagbits.a = and i32 %tag, 3 + %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0 + br i1 %tagbits.a.eq0, label %b, label %c, !prof !1 ; 5 to 3 +b: + call void @b() + call void @b() + %tagbits.b = and i32 %tag, 12 + %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8 + br i1 %tagbits.b.eq1, label %d, label %e, !prof !1 ; 5 to 3 +d: + call void @d() + call void @d() + %tagbits.d = and i32 %tag, 48 + %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32 + br i1 %tagbits.d.eq1, label %g, label %f, !prof !1 ; 5 to 3 +f: + call void @f() + call void @f() + br label %g +g: + %tagbits.g = and i32 %tag, 192 + %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0 + br i1 %tagbits.g.eq0, label %ret1, label %ret2, !prof !2 ; balanced +c: + call void @c() + call void @c() + %tagbits.c = and i32 %tag, 12 + %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0 + br i1 %tagbits.c.eq0, label %d, label %e, !prof !1 ; 5 to 3 +e: + call void @e() + call void @e() + %tagbits.e = and i32 %tag, 48 + %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0 + br i1 %tagbits.e.eq0, label %g, label %f, !prof !1 ; 5 to 3 +ret1: + call void @a() + br label %ret +ret2: + call void @b() + br label %ret +ret: + ret void +} + declare void @a() declare void @b() declare void @c() declare void @d() +declare void @e() +declare void @f() +declare void @g() +declare void @h() +declare void @i() +declare void @j() + +!1 = !{!"branch_weights", i32 5, i32 3} +!2 = !{!"branch_weights", i32 50, i32 50} +!3 = !{!"branch_weights", i32 6, i32 4} +!4 = !{!"branch_weights", i32 7, i32 2} +!5 = !{!"branch_weights", i32 2, i32 8} +!6 = !{!"branch_weights", i32 3, i32 4} +!7 = !{!"branch_weights", i32 4, i32 2} |