diff options
Diffstat (limited to 'llvm/test/Transforms/LoopVersioning')
8 files changed, 481 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVersioning/add-phi-update-users.ll b/llvm/test/Transforms/LoopVersioning/add-phi-update-users.ll new file mode 100644 index 00000000000..22d5dcd095b --- /dev/null +++ b/llvm/test/Transforms/LoopVersioning/add-phi-update-users.ll @@ -0,0 +1,65 @@ +; RUN: opt < %s -loop-versioning -S -o - | FileCheck %s + +; This test case used to end like this: +; +; Instruction does not dominate all uses! +; %t2 = load i16, i16* @b, align 1, !tbaa !2, !alias.scope !6 +; %tobool = icmp eq i16 %t2, 0 +; LLVM ERROR: Broken function found, compilation aborted! +; +; due to a fault where we did not replace the use of %t2 in the icmp in +; for.end, when adding a new PHI node for the versioned loops based on the +; loop-defined values used outside of the loop. +; +; Verify that the code compiles, that we get a versioned loop, and that the +; uses of %t2 in for.end and if.then are updated to use the value from the +; added phi node. + +; CHECK: define void @f1 +; CHECK: for.end: +; CHECK-NEXT: %t2.lver = phi i16 [ %t2, %for.body ], [ %t2.lver.orig, %for.body.lver.orig ] +; CHECK-NEXT: %tobool = icmp eq i16 %t2.lver, 0 +; CHECK: if.then: +; CHECK-NEXT: store i16 %t2.lver + +@a = dso_local global i16 0, align 1 +@b = dso_local global i16 0, align 1 +@c = dso_local global i16* null, align 1 + +define void @f1() { +entry: + %t0 = load i16*, i16** @c, align 1 + br label %for.cond + +for.cond: ; preds = %for.cond.backedge, %entry + br label %for.body + +for.body: ; preds = %for.cond, %for.body + %t1 = phi i64 [ 0, %for.cond ], [ %inc, %for.body ] + %t2 = load i16, i16* @b, align 1, !tbaa !2 + store i16 %t2, i16* %t0, align 1, !tbaa !2 + %inc = add nuw nsw i64 %t1, 1 + %cmp = icmp ult i64 %inc, 3 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %tobool = icmp eq i16 %t2, 0 + br i1 %tobool, label %for.cond.backedge, label %if.then + +for.cond.backedge: ; preds = %for.end, %if.then + br label %for.cond + +if.then: ; preds = %for.end + store i16 %t2, i16* @a, align 1, !tbaa !2 + br label %for.cond.backedge +} + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 1} +!1 = !{!"clang version 7.0.0"} +!2 = !{!3, !3, i64 0} +!3 = !{!"long long", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/Transforms/LoopVersioning/basic.ll b/llvm/test/Transforms/LoopVersioning/basic.ll new file mode 100644 index 00000000000..f59caecadae --- /dev/null +++ b/llvm/test/Transforms/LoopVersioning/basic.ll @@ -0,0 +1,47 @@ +; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; Version this loop with overlap checks between a, c and b, c. + +define void @f(i32* %a, i32* %b, i32* %c) { +entry: + br label %for.body + +; CHECK: for.body.lver.check: +; CHECK: icmp +; CHECK: icmp +; CHECK: icmp +; CHECK: icmp +; CHECK-NOT: icmp +; CHECK: br i1 %memcheck.conflict, label %for.body.ph.lver.orig, label %for.body.ph + +; CHECK: for.body.ph.lver.orig: +; CHECK: for.body.lver.orig: +; CHECK: br i1 %exitcond.lver.orig, label %for.end, label %for.body.lver.orig +; CHECK: for.body.ph: +; CHECK: for.body: +; CHECK: br i1 %exitcond, label %for.end, label %for.body +; CHECK: for.end: + +for.body: ; preds = %for.body, %entry + %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] + + %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind + %loadA = load i32, i32* %arrayidxA, align 4 + + %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind + %loadB = load i32, i32* %arrayidxB, align 4 + + %mulC = mul i32 %loadA, %loadB + + %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind + store i32 %mulC, i32* %arrayidxC, align 4 + + %add = add nuw nsw i64 %ind, 1 + %exitcond = icmp eq i64 %add, 20 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll b/llvm/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll new file mode 100644 index 00000000000..960c890516c --- /dev/null +++ b/llvm/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll @@ -0,0 +1,38 @@ +; This test ensures loop versioning does not produce an invalid dominator tree +; if the exit block of the loop (bb0) dominates the runtime check block +; (bb1 will become the runtime check block). + +; RUN: opt -loop-distribute -enable-loop-distribute -verify-dom-info -S -o - %s > %t +; RUN: opt -loop-simplify -loop-distribute -enable-loop-distribute -verify-dom-info -S -o - %s > %t +; RUN: FileCheck --check-prefix CHECK-VERSIONING -input-file %t %s + +; RUN: opt -loop-versioning -verify-dom-info -S -o - %s > %t +; RUN: opt -loop-simplify -loop-versioning -verify-dom-info -S -o - %s > %t +; RUN: FileCheck --check-prefix CHECK-VERSIONING -input-file %t %s + +@c1 = external global i16 + +define void @f(i16 %a) { + br label %bb0 + +bb0: + br label %bb1 + +bb1: + %tmp1 = load i16, i16* @c1 + br label %bb2 + +bb2: + %tmp2 = phi i16 [ %tmp1, %bb1 ], [ %tmp3, %bb2 ] + %tmp4 = getelementptr inbounds [1 x i32], [1 x i32]* undef, i32 0, i32 4 + store i32 1, i32* %tmp4 + %tmp5 = getelementptr inbounds [1 x i32], [1 x i32]* undef, i32 0, i32 9 + store i32 0, i32* %tmp5 + %tmp3 = add i16 %tmp2, 1 + store i16 %tmp2, i16* @c1 + %tmp6 = icmp sle i16 %tmp3, 0 + br i1 %tmp6, label %bb2, label %bb0 +} + +; Simple check to make sure loop versioning happened. +; CHECK-VERSIONING: bb2.lver.check: diff --git a/llvm/test/Transforms/LoopVersioning/incorrect-phi.ll b/llvm/test/Transforms/LoopVersioning/incorrect-phi.ll new file mode 100644 index 00000000000..de170be1376 --- /dev/null +++ b/llvm/test/Transforms/LoopVersioning/incorrect-phi.ll @@ -0,0 +1,60 @@ +; RUN: opt -loop-versioning -S < %s | FileCheck %s + +; Make sure all PHIs are properly updated in the exit block. Based on +; PR28037. + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@x = external global [2 x [3 x [5 x i16]]] + +; CHECK-LABEL: @phi_with_undef +define void @phi_with_undef() { +bb6.lr.ph: ; preds = %bb5.preheader + br label %bb6 + +bb6: ; preds = %bb6.lr.ph, %bb6 + %_tmp1423 = phi i64 [ undef, %bb6.lr.ph ], [ %_tmp142, %bb6 ] + %_tmp123 = getelementptr [2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i16 0, i64 undef + %_tmp126 = getelementptr [3 x [5 x i16]], [3 x [5 x i16]]* %_tmp123, i16 0, i64 %_tmp1423 + %_tmp129 = getelementptr [5 x i16], [5 x i16]* %_tmp126, i16 0, i64 undef + %_tmp130 = load i16, i16* %_tmp129 + store i16 undef, i16* getelementptr ([2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i64 0, i64 undef, i64 undef, i64 undef) + %_tmp142 = add i64 %_tmp1423, 1 + br i1 false, label %bb6, label %loop.exit + +loop.exit: ; preds = %bb6 + %_tmp142.lcssa = phi i64 [ %_tmp142, %bb6 ] + %split = phi i16 [ undef, %bb6 ] +; CHECK: %split = phi i16 [ undef, %bb6 ], [ undef, %bb6.lver.orig ] + br label %bb9 + +bb9: ; preds = %bb9.loopexit, %bb1 + ret void +} + +; CHECK-LABEL: @phi_with_non_loop_defined_value +define void @phi_with_non_loop_defined_value() { +bb6.lr.ph: ; preds = %bb5.preheader + %t = add i16 1, 1 + br label %bb6 + +bb6: ; preds = %bb6.lr.ph, %bb6 + %_tmp1423 = phi i64 [ undef, %bb6.lr.ph ], [ %_tmp142, %bb6 ] + %_tmp123 = getelementptr [2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i16 0, i64 undef + %_tmp126 = getelementptr [3 x [5 x i16]], [3 x [5 x i16]]* %_tmp123, i16 0, i64 %_tmp1423 + %_tmp129 = getelementptr [5 x i16], [5 x i16]* %_tmp126, i16 0, i64 undef + %_tmp130 = load i16, i16* %_tmp129 + store i16 undef, i16* getelementptr ([2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i64 0, i64 undef, i64 undef, i64 undef) + %_tmp142 = add i64 %_tmp1423, 1 + br i1 false, label %bb6, label %loop.exit + +loop.exit: ; preds = %bb6 + %_tmp142.lcssa = phi i64 [ %_tmp142, %bb6 ] + %split = phi i16 [ %t, %bb6 ] +; CHECK: %split = phi i16 [ %t, %bb6 ], [ %t, %bb6.lver.orig ] + br label %bb9 + +bb9: ; preds = %bb9.loopexit, %bb1 + ret void +} diff --git a/llvm/test/Transforms/LoopVersioning/lcssa.ll b/llvm/test/Transforms/LoopVersioning/lcssa.ll new file mode 100644 index 00000000000..64993061008 --- /dev/null +++ b/llvm/test/Transforms/LoopVersioning/lcssa.ll @@ -0,0 +1,72 @@ +; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s +target triple = "x86_64-unknown-linux-gnu" + +define void @fill(i8** %ls1.20, i8** %ls2.21, i8* %cse3.22) { +; CHECK: bb1.lver.check: +; CHECK: br i1 %memcheck.conflict, label %bb1.ph.lver.orig, label %bb1.ph +bb1.ph: + %ls1.20.promoted = load i8*, i8** %ls1.20 + %ls2.21.promoted = load i8*, i8** %ls2.21 + br label %bb1 + +bb1: + %_tmp302 = phi i8* [ %ls2.21.promoted, %bb1.ph ], [ %_tmp30, %bb1 ] + %_tmp281 = phi i8* [ %ls1.20.promoted, %bb1.ph ], [ %_tmp28, %bb1 ] + %_tmp14 = getelementptr i8, i8* %_tmp281, i16 -1 + %_tmp15 = load i8, i8* %_tmp14 + %add = add i8 %_tmp15, 1 + store i8 %add, i8* %_tmp281 + store i8 %add, i8* %_tmp302 + %_tmp28 = getelementptr i8, i8* %_tmp281, i16 1 + %_tmp30 = getelementptr i8, i8* %_tmp302, i16 1 + br i1 false, label %bb1, label %bb3.loopexit + +bb3.loopexit: + %_tmp30.lcssa = phi i8* [ %_tmp30, %bb1 ] + %_tmp15.lcssa = phi i8 [ %_tmp15, %bb1 ] + %_tmp28.lcssa = phi i8* [ %_tmp28, %bb1 ] + store i8* %_tmp28.lcssa, i8** %ls1.20 + store i8 %_tmp15.lcssa, i8* %cse3.22 + store i8* %_tmp30.lcssa, i8** %ls2.21 + br label %bb3 + +bb3: + ret void +} + +define void @fill_no_null_opt(i8** %ls1.20, i8** %ls2.21, i8* %cse3.22) #0 { +; CHECK-LABEL: fill_no_null_opt( +; CHECK: bb1.lver.check: +; CHECK: %lver.safe = or i1 %memcheck.conflict, %{{.*}} +; CHECK: br i1 %lver.safe, label %bb1.ph.lver.orig, label %bb1.ph +bb1.ph: + %ls1.20.promoted = load i8*, i8** %ls1.20 + %ls2.21.promoted = load i8*, i8** %ls2.21 + br label %bb1 + +bb1: + %_tmp302 = phi i8* [ %ls2.21.promoted, %bb1.ph ], [ %_tmp30, %bb1 ] + %_tmp281 = phi i8* [ %ls1.20.promoted, %bb1.ph ], [ %_tmp28, %bb1 ] + %_tmp14 = getelementptr i8, i8* %_tmp281, i16 -1 + %_tmp15 = load i8, i8* %_tmp14 + %add = add i8 %_tmp15, 1 + store i8 %add, i8* %_tmp281 + store i8 %add, i8* %_tmp302 + %_tmp28 = getelementptr i8, i8* %_tmp281, i16 1 + %_tmp30 = getelementptr i8, i8* %_tmp302, i16 1 + br i1 false, label %bb1, label %bb3.loopexit + +bb3.loopexit: + %_tmp30.lcssa = phi i8* [ %_tmp30, %bb1 ] + %_tmp15.lcssa = phi i8 [ %_tmp15, %bb1 ] + %_tmp28.lcssa = phi i8* [ %_tmp28, %bb1 ] + store i8* %_tmp28.lcssa, i8** %ls1.20 + store i8 %_tmp15.lcssa, i8* %cse3.22 + store i8* %_tmp30.lcssa, i8** %ls2.21 + br label %bb3 + +bb3: + ret void +} + +attributes #0 = { "null-pointer-is-valid"="true" } diff --git a/llvm/test/Transforms/LoopVersioning/loop-invariant-bound.ll b/llvm/test/Transforms/LoopVersioning/loop-invariant-bound.ll new file mode 100644 index 00000000000..01c5a55bd5b --- /dev/null +++ b/llvm/test/Transforms/LoopVersioning/loop-invariant-bound.ll @@ -0,0 +1,38 @@ +; RUN: opt -loop-versioning -S < %s | FileCheck %s +; Checks that when introducing check, we don't accidentally introduce non-dominating instructions +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%Dual.212 = type { %Dual.213, %Partials.215 } +%Dual.213 = type { double, %Partials.214 } +%Partials.214 = type { [2 x double] } +%Partials.215 = type { [2 x %Dual.213] } + +; Function Attrs: sspreq +define void @"julia_axpy!_65480"(%Dual.212*, %Dual.212* %other) { +top: + br label %if24 + +; CHECK-NOT: %bc = bitcast i64* %v2.sroa.0.0..sroa_cast +; CHECK: %bound0 = icmp ult i8* %[[x:[a-z0-9]+]], %[[y:[a-z0-9]+]] +; CHECK-NOT: %bound1 = icmp ult i8* %[[y]], %[[x]] + +if24: ; preds = %if24, %top + %"#temp#1.sroa.3.02" = phi i64 [ undef, %top ], [ %2, %if24 ] + %"#temp#1.sroa.0.01" = phi i64 [ undef, %top ], [ %1, %if24 ] + %1 = add i64 %"#temp#1.sroa.0.01", 1 + %2 = add i64 %"#temp#1.sroa.3.02", 1 + ; This pointer is loop invariant. LAA used to re-use it from memcheck, even though it didn't dominate. + %v2.sroa.0.0..sroa_cast = bitcast %Dual.212* %0 to i64* + %v2.sroa.0.0.copyload = load i64, i64* %v2.sroa.0.0..sroa_cast, align 1 + %3 = add i64 %"#temp#1.sroa.0.01", -1 + %4 = getelementptr inbounds %Dual.212, %Dual.212* %other, i64 0, i32 1, i32 0, i64 0, i32 1, i32 0, i64 0 + %5 = bitcast double* %4 to i64* + store i64 undef, i64* %5, align 8 + %notlhs27 = icmp eq i64 %2, undef + %notrhs28 = icmp eq i64 %1, undef + %6 = or i1 %notrhs28, %notlhs27 + br i1 %6, label %L41.L335_crit_edge, label %if24 + +L41.L335_crit_edge: ; preds = %if24 + ret void +} diff --git a/llvm/test/Transforms/LoopVersioning/noalias-version-twice.ll b/llvm/test/Transforms/LoopVersioning/noalias-version-twice.ll new file mode 100644 index 00000000000..c53dc858c5c --- /dev/null +++ b/llvm/test/Transforms/LoopVersioning/noalias-version-twice.ll @@ -0,0 +1,107 @@ +; RUN: opt -basicaa -loop-distribute -enable-loop-distribute -loop-simplify -scoped-noalias \ +; RUN: -loop-versioning -S < %s | FileCheck %s + +; Test the metadata generated when versioning an already versioned loop. Here +; we invoke loop distribution to perform the first round of versioning. It +; adds memchecks for accesses that can alias across the distribution boundary. +; Then we further version the distributed loops to fully disambiguate accesses +; within each. +; +; So as an example, we add noalias between C and A during the versioning +; within loop distribution and then add noalias between C and D during the +; second explicit versioning step: +; +; for (i = 0; i < n; i++) { +; A[i + 1] = A[i] * B[i]; +; ------------------------------- +; C[i] = D[i] * E[i]; +; } + +; To see it easier what's going on, I expanded every noalias/scope metadata +; reference below in a comment. For a scope I use the format scope(domain), +; e.g. scope 17 in domain 15 is written as 17(15). + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +@B = common global i32* null, align 8 +@A = common global i32* null, align 8 +@C = common global i32* null, align 8 +@D = common global i32* null, align 8 +@E = common global i32* null, align 8 + +define void @f() { +entry: + %a = load i32*, i32** @A, align 8 + %b = load i32*, i32** @B, align 8 + %c = load i32*, i32** @C, align 8 + %d = load i32*, i32** @D, align 8 + %e = load i32*, i32** @E, align 8 + br label %for.body + +for.body: ; preds = %for.body, %entry + %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] + + %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind + +; CHECK: %loadA.ldist1 = {{.*}} !noalias !25 +; A noalias C: !25 -> { 17(15), 18(15), 19(15), 26(24) } +; ^^^^^^ + %loadA = load i32, i32* %arrayidxA, align 4 + + %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind + %loadB = load i32, i32* %arrayidxB, align 4 + + %mulA = mul i32 %loadB, %loadA + + %add = add nuw nsw i64 %ind, 1 + %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add + store i32 %mulA, i32* %arrayidxA_plus_4, align 4 + +; CHECK: for.body: + + %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind + +; CHECK: %loadD = {{.*}} !alias.scope !31 +; D's scope: !31 -> { 18(15), 32(33) } +; ^^^^^^ + %loadD = load i32, i32* %arrayidxD, align 4 + + %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind + +; CHECK: %loadE = {{.*}} !alias.scope !34 +; E's scope: !34 -> { 19(15), 35(33) } +; ^^^^^^ + %loadE = load i32, i32* %arrayidxE, align 4 + + %mulC = mul i32 %loadD, %loadE + + %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind + +; CHECK: store i32 %mulC, {{.*}} !alias.scope !36, !noalias !38 +; C's scope: !36 -> { 17(15), 37(33) } +; ^^^^^^ +; C noalias D and E: !38 -> { 21(15), 32(33), 35(33) } +; ^^^^^^ ^^^^^^ + store i32 %mulC, i32* %arrayidxC, align 4 + + %exitcond = icmp eq i64 %add, 20 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Domain for the second loop versioning for the top loop after +; distribution. +; CHECK: !15 = distinct !{!15, !"LVerDomain"} +; CHECK: !17 = distinct !{!17, !15} +; CHECK: !25 = !{!17, !18, !19, !26} +; CHECK: !31 = !{!18, !32} +; CHECK: !32 = distinct !{!32, !33} +; Domain for the second loop versioning for the bottom loop after +; distribution. +; CHECK: !33 = distinct !{!33, !"LVerDomain"} +; CHECK: !34 = !{!19, !35} +; CHECK: !35 = distinct !{!35, !33} +; CHECK: !36 = !{!17, !37} +; CHECK: !38 = !{!21, !32, !35} diff --git a/llvm/test/Transforms/LoopVersioning/noalias.ll b/llvm/test/Transforms/LoopVersioning/noalias.ll new file mode 100644 index 00000000000..c2539726db4 --- /dev/null +++ b/llvm/test/Transforms/LoopVersioning/noalias.ll @@ -0,0 +1,54 @@ +; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s + +; A very simple case. After versioning the %loadA and %loadB can't alias with +; the store. +; +; To see it easier what's going on, I expanded every noalias/scope metadata +; reference below in a comment. For a scope I use the format scope(domain), +; e.g. scope 17 in domain 15 is written as 17(15). + +; CHECK-LABEL: @f( + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %a, i32* %b, i32* %c) { +entry: + br label %for.body + +; CHECK: for.body.lver.orig: +; CHECK: for.body: +for.body: ; preds = %for.body, %entry + %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] + + %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind +; CHECK: %loadA = {{.*}} !alias.scope !0 +; A's scope: !0 -> { 1(2) } + %loadA = load i32, i32* %arrayidxA, align 4 + + %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind +; CHECK: %loadB = {{.*}} !alias.scope !3 +; B's scope: !3 -> { 4(2) } + %loadB = load i32, i32* %arrayidxB, align 4 + + %mulC = mul i32 %loadA, %loadB + + %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind +; CHECK: store {{.*}} !alias.scope !5, !noalias !7 +; C noalias A and B: !7 -> { 1(2), 4(2) } + store i32 %mulC, i32* %arrayidxC, align 4 + + %add = add nuw nsw i64 %ind, 1 + %exitcond = icmp eq i64 %add, 20 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} +; CHECK: !0 = !{!1} +; CHECK: !1 = distinct !{!1, !2} +; CHECK: !2 = distinct !{!2, !"LVerDomain"} +; CHECK: !3 = !{!4} +; CHECK: !4 = distinct !{!4, !2} +; CHECK: !5 = !{!6} +; CHECK: !6 = distinct !{!6, !2} +; CHECK: !7 = !{!1, !4} |