8 files changed, 481 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVersioning/add-phi-update-users.ll b/llvm/test/Transforms/LoopVersioning/add-phi-update-users.ll
new file mode 100644
index 00000000000..22d5dcd095b
--- /dev/null
+++ b/llvm/test/Transforms/LoopVersioning/add-phi-update-users.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -loop-versioning -S -o - | FileCheck %s
+
+; This test case used to end like this:
+;
+;    Instruction does not dominate all uses!
+;      %t2 = load i16, i16* @b, align 1, !tbaa !2, !alias.scope !6
+;      %tobool = icmp eq i16 %t2, 0
+;    LLVM ERROR: Broken function found, compilation aborted!
+;
+; due to a fault where we did not replace the use of %t2 in the icmp in
+; for.end, when adding a new PHI node for the versioned loops based on the
+; loop-defined values used outside of the loop.
+;
+; Verify that the code compiles, that we get a versioned loop, and that the
+; uses of %t2 in for.end and if.then are updated to use the value from the
+; added phi node.
+
+; CHECK:       define void @f1
+; CHECK:       for.end:
+; CHECK-NEXT:    %t2.lver = phi i16 [ %t2, %for.body ], [ %t2.lver.orig, %for.body.lver.orig ]
+; CHECK-NEXT:    %tobool = icmp eq i16 %t2.lver, 0
+; CHECK:       if.then:
+; CHECK-NEXT:    store i16 %t2.lver
+
+@a = dso_local global i16 0, align 1
+@b = dso_local global i16 0, align 1
+@c = dso_local global i16* null, align 1
+
+define void @f1() {
+entry:
+  %t0 = load i16*, i16** @c, align 1
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond.backedge, %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond, %for.body
+  %t1 = phi i64 [ 0, %for.cond ], [ %inc, %for.body ]
+  %t2 = load i16, i16* @b, align 1, !tbaa !2
+  store i16 %t2, i16* %t0, align 1, !tbaa !2
+  %inc = add nuw nsw i64 %t1, 1
+  %cmp = icmp ult i64 %inc, 3
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %tobool = icmp eq i16 %t2, 0
+  br i1 %tobool, label %for.cond.backedge, label %if.then
+
+for.cond.backedge:                                ; preds = %for.end, %if.then
+  br label %for.cond
+
+if.then:                                          ; preds = %for.end
+  store i16 %t2, i16* @a, align 1, !tbaa !2
+  br label %for.cond.backedge
+}
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 1}
+!1 = !{!"clang version 7.0.0"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"long long", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/Transforms/LoopVersioning/basic.ll b/llvm/test/Transforms/LoopVersioning/basic.ll
new file mode 100644
index 00000000000..f59caecadae
--- /dev/null
+++ b/llvm/test/Transforms/LoopVersioning/basic.ll
@@ -0,0 +1,47 @@
+; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Version this loop with overlap checks between a, c and b, c.
+
+define void @f(i32* %a, i32* %b, i32* %c) {
+entry:
+  br label %for.body
+
+; CHECK: for.body.lver.check:
+; CHECK:   icmp
+; CHECK:   icmp
+; CHECK:   icmp
+; CHECK:   icmp
+; CHECK-NOT: icmp
+; CHECK:   br i1 %memcheck.conflict, label %for.body.ph.lver.orig, label %for.body.ph
+
+; CHECK: for.body.ph.lver.orig:
+; CHECK: for.body.lver.orig:
+; CHECK:   br i1 %exitcond.lver.orig, label %for.end, label %for.body.lver.orig
+; CHECK: for.body.ph:
+; CHECK: for.body:
+; CHECK:   br i1 %exitcond, label %for.end, label %for.body
+; CHECK: for.end:
+
+for.body:                                         ; preds = %for.body, %entry
+  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+  %loadA = load i32, i32* %arrayidxA, align 4
+
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+  %loadB = load i32, i32* %arrayidxB, align 4
+
+  %mulC = mul i32 %loadA, %loadB
+
+  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+  store i32 %mulC, i32* %arrayidxC, align 4
+
+  %add = add nuw nsw i64 %ind, 1
+  %exitcond = icmp eq i64 %add, 20
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll b/llvm/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll
new file mode 100644
index 00000000000..960c890516c
--- /dev/null
+++ b/llvm/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll
@@ -0,0 +1,38 @@
+; This test ensures loop versioning does not produce an invalid dominator tree
+; if the exit block of the loop (bb0) dominates the runtime check block
+; (bb1 will become the runtime check block).
+
+; RUN: opt -loop-distribute -enable-loop-distribute -verify-dom-info -S -o - %s > %t
+; RUN: opt -loop-simplify -loop-distribute -enable-loop-distribute -verify-dom-info -S -o - %s > %t
+; RUN: FileCheck --check-prefix CHECK-VERSIONING -input-file %t %s
+
+; RUN: opt -loop-versioning -verify-dom-info -S -o - %s > %t
+; RUN: opt -loop-simplify -loop-versioning -verify-dom-info -S -o - %s > %t
+; RUN: FileCheck --check-prefix CHECK-VERSIONING -input-file %t %s
+
+@c1 = external global i16
+
+define void @f(i16 %a) {
+  br label %bb0
+
+bb0:
+  br label %bb1
+
+bb1:
+  %tmp1 = load i16, i16* @c1
+  br label %bb2
+
+bb2:
+  %tmp2 = phi i16 [ %tmp1, %bb1 ], [ %tmp3, %bb2 ]
+  %tmp4 = getelementptr inbounds [1 x i32], [1 x i32]* undef, i32 0, i32 4
+  store i32 1, i32* %tmp4
+  %tmp5 = getelementptr inbounds [1 x i32], [1 x i32]* undef, i32 0, i32 9
+  store i32 0, i32* %tmp5
+  %tmp3 = add i16 %tmp2, 1
+  store i16 %tmp2, i16* @c1
+  %tmp6 = icmp sle i16 %tmp3, 0
+  br i1 %tmp6, label %bb2, label %bb0
+}
+
+; Simple check to make sure loop versioning happened.
+; CHECK-VERSIONING: bb2.lver.check:
diff --git a/llvm/test/Transforms/LoopVersioning/incorrect-phi.ll b/llvm/test/Transforms/LoopVersioning/incorrect-phi.ll
new file mode 100644
index 00000000000..de170be1376
--- /dev/null
+++ b/llvm/test/Transforms/LoopVersioning/incorrect-phi.ll
@@ -0,0 +1,60 @@
+; RUN: opt -loop-versioning -S < %s | FileCheck %s
+
+; Make sure all PHIs are properly updated in the exit block.  Based on
+; PR28037.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@x = external global [2 x [3 x [5 x i16]]]
+
+; CHECK-LABEL: @phi_with_undef
+define void @phi_with_undef() {
+bb6.lr.ph:                                        ; preds = %bb5.preheader
+  br label %bb6
+
+bb6:                                              ; preds = %bb6.lr.ph, %bb6
+  %_tmp1423 = phi i64 [ undef, %bb6.lr.ph ], [ %_tmp142, %bb6 ]
+  %_tmp123 = getelementptr [2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i16 0, i64 undef
+  %_tmp126 = getelementptr [3 x [5 x i16]], [3 x [5 x i16]]* %_tmp123, i16 0, i64 %_tmp1423
+  %_tmp129 = getelementptr [5 x i16], [5 x i16]* %_tmp126, i16 0, i64 undef
+  %_tmp130 = load i16, i16* %_tmp129
+  store i16 undef, i16* getelementptr ([2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i64 0, i64 undef, i64 undef, i64 undef)
+  %_tmp142 = add i64 %_tmp1423, 1
+  br i1 false, label %bb6, label %loop.exit
+
+loop.exit:                                ; preds = %bb6
+  %_tmp142.lcssa = phi i64 [ %_tmp142, %bb6 ]
+  %split = phi i16 [ undef, %bb6 ]
+; CHECK: %split = phi i16 [ undef, %bb6 ], [ undef, %bb6.lver.orig ]
+  br label %bb9
+
+bb9:                                              ; preds = %bb9.loopexit, %bb1
+  ret void
+}
+
+; CHECK-LABEL: @phi_with_non_loop_defined_value
+define void @phi_with_non_loop_defined_value() {
+bb6.lr.ph:                                        ; preds = %bb5.preheader
+  %t = add i16 1, 1
+  br label %bb6
+
+bb6:                                              ; preds = %bb6.lr.ph, %bb6
+  %_tmp1423 = phi i64 [ undef, %bb6.lr.ph ], [ %_tmp142, %bb6 ]
+  %_tmp123 = getelementptr [2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i16 0, i64 undef
+  %_tmp126 = getelementptr [3 x [5 x i16]], [3 x [5 x i16]]* %_tmp123, i16 0, i64 %_tmp1423
+  %_tmp129 = getelementptr [5 x i16], [5 x i16]* %_tmp126, i16 0, i64 undef
+  %_tmp130 = load i16, i16* %_tmp129
+  store i16 undef, i16* getelementptr ([2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i64 0, i64 undef, i64 undef, i64 undef)
+  %_tmp142 = add i64 %_tmp1423, 1
+  br i1 false, label %bb6, label %loop.exit
+
+loop.exit:                                ; preds = %bb6
+  %_tmp142.lcssa = phi i64 [ %_tmp142, %bb6 ]
+  %split = phi i16 [ %t, %bb6 ]
+; CHECK: %split = phi i16 [ %t, %bb6 ], [ %t, %bb6.lver.orig ]
+  br label %bb9
+
+bb9:                                              ; preds = %bb9.loopexit, %bb1
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVersioning/lcssa.ll b/llvm/test/Transforms/LoopVersioning/lcssa.ll
new file mode 100644
index 00000000000..64993061008
--- /dev/null
+++ b/llvm/test/Transforms/LoopVersioning/lcssa.ll
@@ -0,0 +1,72 @@
+; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @fill(i8** %ls1.20, i8** %ls2.21, i8* %cse3.22) {
+; CHECK: bb1.lver.check:
+; CHECK:   br i1 %memcheck.conflict, label %bb1.ph.lver.orig, label %bb1.ph
+bb1.ph:
+  %ls1.20.promoted = load i8*, i8** %ls1.20
+  %ls2.21.promoted = load i8*, i8** %ls2.21
+  br label %bb1
+
+bb1:
+  %_tmp302 = phi i8* [ %ls2.21.promoted, %bb1.ph ], [ %_tmp30, %bb1 ]
+  %_tmp281 = phi i8* [ %ls1.20.promoted, %bb1.ph ], [ %_tmp28, %bb1 ]
+  %_tmp14 = getelementptr i8, i8* %_tmp281, i16 -1
+  %_tmp15 = load i8, i8* %_tmp14
+  %add = add i8 %_tmp15, 1
+  store i8 %add, i8* %_tmp281
+  store i8 %add, i8* %_tmp302
+  %_tmp28 = getelementptr i8, i8* %_tmp281, i16 1
+  %_tmp30 = getelementptr i8, i8* %_tmp302, i16 1
+  br i1 false, label %bb1, label %bb3.loopexit
+
+bb3.loopexit:
+  %_tmp30.lcssa = phi i8* [ %_tmp30, %bb1 ]
+  %_tmp15.lcssa = phi i8 [ %_tmp15, %bb1 ]
+  %_tmp28.lcssa = phi i8* [ %_tmp28, %bb1 ]
+  store i8* %_tmp28.lcssa, i8** %ls1.20
+  store i8 %_tmp15.lcssa, i8* %cse3.22
+  store i8* %_tmp30.lcssa, i8** %ls2.21
+  br label %bb3
+
+bb3:
+  ret void
+}
+
+define void @fill_no_null_opt(i8** %ls1.20, i8** %ls2.21, i8* %cse3.22) #0 {
+; CHECK-LABEL: fill_no_null_opt(
+; CHECK: bb1.lver.check:
+; CHECK: %lver.safe = or i1 %memcheck.conflict, %{{.*}}
+; CHECK:  br i1 %lver.safe, label %bb1.ph.lver.orig, label %bb1.ph
+bb1.ph:
+  %ls1.20.promoted = load i8*, i8** %ls1.20
+  %ls2.21.promoted = load i8*, i8** %ls2.21
+  br label %bb1
+
+bb1:
+  %_tmp302 = phi i8* [ %ls2.21.promoted, %bb1.ph ], [ %_tmp30, %bb1 ]
+  %_tmp281 = phi i8* [ %ls1.20.promoted, %bb1.ph ], [ %_tmp28, %bb1 ]
+  %_tmp14 = getelementptr i8, i8* %_tmp281, i16 -1
+  %_tmp15 = load i8, i8* %_tmp14
+  %add = add i8 %_tmp15, 1
+  store i8 %add, i8* %_tmp281
+  store i8 %add, i8* %_tmp302
+  %_tmp28 = getelementptr i8, i8* %_tmp281, i16 1
+  %_tmp30 = getelementptr i8, i8* %_tmp302, i16 1
+  br i1 false, label %bb1, label %bb3.loopexit
+
+bb3.loopexit:
+  %_tmp30.lcssa = phi i8* [ %_tmp30, %bb1 ]
+  %_tmp15.lcssa = phi i8 [ %_tmp15, %bb1 ]
+  %_tmp28.lcssa = phi i8* [ %_tmp28, %bb1 ]
+  store i8* %_tmp28.lcssa, i8** %ls1.20
+  store i8 %_tmp15.lcssa, i8* %cse3.22
+  store i8* %_tmp30.lcssa, i8** %ls2.21
+  br label %bb3
+
+bb3:
+  ret void
+}
+
+attributes #0 = { "null-pointer-is-valid"="true" }
diff --git a/llvm/test/Transforms/LoopVersioning/loop-invariant-bound.ll b/llvm/test/Transforms/LoopVersioning/loop-invariant-bound.ll
new file mode 100644
index 00000000000..01c5a55bd5b
--- /dev/null
+++ b/llvm/test/Transforms/LoopVersioning/loop-invariant-bound.ll
@@ -0,0 +1,38 @@
+; RUN: opt -loop-versioning -S < %s | FileCheck %s
+; Checks that when introducing check, we don't accidentally introduce non-dominating instructions
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%Dual.212 = type { %Dual.213, %Partials.215 }
+%Dual.213 = type { double, %Partials.214 }
+%Partials.214 = type { [2 x double] }
+%Partials.215 = type { [2 x %Dual.213] }
+
+; Function Attrs: sspreq
+define void @"julia_axpy!_65480"(%Dual.212*, %Dual.212* %other) {
+top:
+  br label %if24
+
+; CHECK-NOT: %bc = bitcast i64* %v2.sroa.0.0..sroa_cast
+; CHECK: %bound0 = icmp ult i8* %[[x:[a-z0-9]+]], %[[y:[a-z0-9]+]]
+; CHECK-NOT: %bound1 = icmp ult i8* %[[y]], %[[x]]
+
+if24:                                             ; preds = %if24, %top
+  %"#temp#1.sroa.3.02" = phi i64 [ undef, %top ], [ %2, %if24 ]
+  %"#temp#1.sroa.0.01" = phi i64 [ undef, %top ], [ %1, %if24 ]
+  %1 = add i64 %"#temp#1.sroa.0.01", 1
+  %2 = add i64 %"#temp#1.sroa.3.02", 1
+  ; This pointer is loop invariant. LAA used to re-use it from memcheck, even though it didn't dominate.
+  %v2.sroa.0.0..sroa_cast = bitcast %Dual.212* %0 to i64*
+  %v2.sroa.0.0.copyload = load i64, i64* %v2.sroa.0.0..sroa_cast, align 1
+  %3 = add i64 %"#temp#1.sroa.0.01", -1
+  %4 = getelementptr inbounds %Dual.212, %Dual.212* %other, i64 0, i32 1, i32 0, i64 0, i32 1, i32 0, i64 0
+  %5 = bitcast double* %4 to i64*
+  store i64 undef, i64* %5, align 8
+  %notlhs27 = icmp eq i64 %2, undef
+  %notrhs28 = icmp eq i64 %1, undef
+  %6 = or i1 %notrhs28, %notlhs27
+  br i1 %6, label %L41.L335_crit_edge, label %if24
+
+L41.L335_crit_edge:                               ; preds = %if24
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVersioning/noalias-version-twice.ll b/llvm/test/Transforms/LoopVersioning/noalias-version-twice.ll
new file mode 100644
index 00000000000..c53dc858c5c
--- /dev/null
+++ b/llvm/test/Transforms/LoopVersioning/noalias-version-twice.ll
@@ -0,0 +1,107 @@
+; RUN: opt -basicaa -loop-distribute -enable-loop-distribute -loop-simplify -scoped-noalias \
+; RUN:     -loop-versioning -S < %s | FileCheck %s
+
+; Test the metadata generated when versioning an already versioned loop.  Here
+; we invoke loop distribution to perform the first round of versioning.  It
+; adds memchecks for accesses that can alias across the distribution boundary.
+; Then we further version the distributed loops to fully disambiguate accesses
+; within each.
+;
+; So as an example, we add noalias between C and A during the versioning
+; within loop distribution and then add noalias between C and D during the
+; second explicit versioning step:
+;
+;   for (i = 0; i < n; i++) {
+;     A[i + 1] = A[i] * B[i];
+; -------------------------------
+;     C[i] = D[i] * E[i];
+;   }
+
+; To see it easier what's going on, I expanded every noalias/scope metadata
+; reference below in a comment.  For a scope I use the format scope(domain),
+; e.g. scope 17 in domain 15 is written as 17(15).
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@B = common global i32* null, align 8
+@A = common global i32* null, align 8
+@C = common global i32* null, align 8
+@D = common global i32* null, align 8
+@E = common global i32* null, align 8
+
+define void @f() {
+entry:
+  %a = load i32*, i32** @A, align 8
+  %b = load i32*, i32** @B, align 8
+  %c = load i32*, i32** @C, align 8
+  %d = load i32*, i32** @D, align 8
+  %e = load i32*, i32** @E, align 8
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+
+; CHECK: %loadA.ldist1 = {{.*}} !noalias !25
+; A noalias C: !25 -> { 17(15), 18(15), 19(15), 26(24) }
+;                       ^^^^^^
+  %loadA = load i32, i32* %arrayidxA, align 4
+
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+  %loadB = load i32, i32* %arrayidxB, align 4
+
+  %mulA = mul i32 %loadB, %loadA
+
+  %add = add nuw nsw i64 %ind, 1
+  %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+  store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+; CHECK: for.body:
+
+  %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
+
+; CHECK: %loadD = {{.*}} !alias.scope !31
+; D's scope: !31 -> { 18(15), 32(33) }
+;                             ^^^^^^
+  %loadD = load i32, i32* %arrayidxD, align 4
+
+  %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
+
+; CHECK: %loadE = {{.*}} !alias.scope !34
+; E's scope: !34 -> { 19(15), 35(33) }
+;                             ^^^^^^
+  %loadE = load i32, i32* %arrayidxE, align 4
+
+  %mulC = mul i32 %loadD, %loadE
+
+  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+
+; CHECK: store i32 %mulC, {{.*}} !alias.scope !36, !noalias !38
+; C's scope: !36 -> { 17(15), 37(33) }
+;                     ^^^^^^
+; C noalias D and E: !38 -> { 21(15), 32(33), 35(33) }
+;                                     ^^^^^^  ^^^^^^
+  store i32 %mulC, i32* %arrayidxC, align 4
+
+  %exitcond = icmp eq i64 %add, 20
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Domain for the second loop versioning for the top loop after
+; distribution.
+; CHECK: !15 = distinct !{!15, !"LVerDomain"}
+; CHECK: !17 = distinct !{!17, !15}
+; CHECK: !25 = !{!17, !18, !19, !26}
+; CHECK: !31 = !{!18, !32}
+; CHECK: !32 = distinct !{!32, !33}
+; Domain for the second loop versioning for the bottom loop after
+; distribution.
+; CHECK: !33 = distinct !{!33, !"LVerDomain"}
+; CHECK: !34 = !{!19, !35}
+; CHECK: !35 = distinct !{!35, !33}
+; CHECK: !36 = !{!17, !37}
+; CHECK: !38 = !{!21, !32, !35}
diff --git a/llvm/test/Transforms/LoopVersioning/noalias.ll b/llvm/test/Transforms/LoopVersioning/noalias.ll
new file mode 100644
index 00000000000..c2539726db4
--- /dev/null
+++ b/llvm/test/Transforms/LoopVersioning/noalias.ll
@@ -0,0 +1,54 @@
+; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s
+
+; A very simple case.  After versioning the %loadA and %loadB can't alias with
+; the store.
+;
+; To see it easier what's going on, I expanded every noalias/scope metadata
+; reference below in a comment.  For a scope I use the format scope(domain),
+; e.g. scope 17 in domain 15 is written as 17(15).
+
+; CHECK-LABEL: @f(
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %a, i32* %b, i32* %c) {
+entry:
+  br label %for.body
+
+; CHECK: for.body.lver.orig:
+; CHECK: for.body:
+for.body:                                         ; preds = %for.body, %entry
+  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+; CHECK: %loadA = {{.*}} !alias.scope !0
+; A's scope: !0 -> { 1(2) }
+  %loadA = load i32, i32* %arrayidxA, align 4
+
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+; CHECK: %loadB = {{.*}} !alias.scope !3
+; B's scope: !3 -> { 4(2) }
+  %loadB = load i32, i32* %arrayidxB, align 4
+
+  %mulC = mul i32 %loadA, %loadB
+
+  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+; CHECK: store {{.*}} !alias.scope !5, !noalias !7
+; C noalias A and B: !7 -> { 1(2), 4(2) }
+  store i32 %mulC, i32* %arrayidxC, align 4
+
+  %add = add nuw nsw i64 %ind, 1
+  %exitcond = icmp eq i64 %add, 20
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+; CHECK: !0 = !{!1}
+; CHECK: !1 = distinct !{!1, !2}
+; CHECK: !2 = distinct !{!2, !"LVerDomain"}
+; CHECK: !3 = !{!4}
+; CHECK: !4 = distinct !{!4, !2}
+; CHECK: !5 = !{!6}
+; CHECK: !6 = distinct !{!6, !2}
+; CHECK: !7 = !{!1, !4}