[LoopVectorize] Annotate versioned loop with noalias metadata

Summary: Use the new LoopVersioning facility (D16712) to add noalias metadata in the vector loop if we versioned with memchecks. This can enable some optimization opportunities further down the pipeline (see the included test or the benchmark improvement quoted in D16712). The test also covers the bug I had in the initial version in D16712. The vectorizer did not previously use LoopVersioning. The reason is that the vectorizer performs its transformations in single shot. It creates an empty single-block vector loop that it then populates with the widened, if-converted instructions. Thus creating an intermediate versioned scalar loop seems wasteful. So this patch (rather than bringing in LoopVersioning fully) adds a special interface to LoopVersioning to allow the vectorizer to add no-alias annotation while still performing its own versioning. As the vectorizer propagates metadata from the instructions in the original loop to the vector instructions we also check the pointer in the original instruction and see if LoopVersioning can add no-alias metadata based on the issued memchecks. Reviewers: hfinkel, nadav, mzolotukhin Subscribers: mzolotukhin, llvm-commits Differential Revision: http://reviews.llvm.org/D17191 llvm-svn: 263744
author: Adam Nemet <anemet@apple.com> 2016-03-17 20:32:37 +0000
committer: Adam Nemet <anemet@apple.com> 2016-03-17 20:32:37 +0000
commit: b0c4eae07339dce7d0d6e7e697fba37b41cb28cf (patch)
tree: 1efa158666c93c6a61aaaf66008bd36863fb5e2a /llvm/test/Transforms
parent: 5eccf07df359955e5dd378bd886c1cfaee5e11f3 (diff)
download: bcm5719-llvm-b0c4eae07339dce7d0d6e7e697fba37b41cb28cf.tar.gz
bcm5719-llvm-b0c4eae07339dce7d0d6e7e697fba37b41cb28cf.zip
2 files changed, 137 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/noalias-md-licm.ll b/llvm/test/Transforms/LoopVectorize/noalias-md-licm.ll
new file mode 100644
index 00000000000..233d530dc10
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/noalias-md-licm.ll
@@ -0,0 +1,59 @@
+; RUN: opt -basicaa -scoped-noalias -loop-vectorize -licm -force-vector-width=2 \
+; RUN:     -force-vector-interleave=1 -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; In order to vectorize the inner loop, it needs to be versioned with
+; memchecks between {A} x {B, C} first:
+;
+;   for (i = 0; i < n; i++)
+;     for (j = 0; j < m; j++)
+;         A[j] += B[i] + C[j];
+;
+; Since in the versioned vector loop A and B can no longer alias, B[i] can be
+; LICM'ed from the inner loop.
+
+
+define void @f(i32* %a, i32* %b, i32* %c) {
+entry:
+  br label %outer
+
+outer:
+  %i.2 = phi i64 [ 0, %entry ], [ %i, %inner.end ]
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %i.2
+  br label %inner.ph
+
+inner.ph:
+; CHECK: vector.ph:
+; CHECK: load i32, i32* %arrayidxB,
+; CHECK: br label %vector.body
+  br label %inner
+
+inner:
+  %j.2 = phi i64 [ 0, %inner.ph ], [ %j, %inner ]
+
+  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %j.2
+  %loadA = load i32, i32* %arrayidxA, align 4
+
+  %loadB = load i32, i32* %arrayidxB, align 4
+
+  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %j.2
+  %loadC = load i32, i32* %arrayidxC, align 4
+
+  %add = add nuw i32 %loadA, %loadB
+  %add2 = add nuw i32 %add, %loadC
+
+  store i32 %add2, i32* %arrayidxA, align 4
+
+  %j = add nuw nsw i64 %j.2, 1
+  %cond1 = icmp eq i64 %j, 20
+  br i1 %cond1, label %inner.end, label %inner
+
+inner.end:
+  %i = add nuw nsw i64 %i.2, 1
+  %cond2 = icmp eq i64 %i, 30
+  br i1 %cond2, label %outer.end, label %outer
+
+outer.end:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/noalias-md.ll b/llvm/test/Transforms/LoopVectorize/noalias-md.ll
new file mode 100644
index 00000000000..787ea88f945
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/noalias-md.ll
@@ -0,0 +1,78 @@
+; RUN: opt -basicaa -loop-vectorize -force-vector-width=2 \
+; RUN:     -force-vector-interleave=1 -S < %s \
+; RUN:     | FileCheck %s -check-prefix=BOTH -check-prefix=LV
+; RUN: opt -basicaa -scoped-noalias -loop-vectorize -dse -force-vector-width=2 \
+; RUN:     -force-vector-interleave=1 -S < %s \
+; RUN:     | FileCheck %s -check-prefix=BOTH -check-prefix=DSE
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; This loop needs to be versioned with memchecks between {A, B} x {C} before
+; it can be vectorized.
+;
+;   for (i = 0; i < n; i++) {
+;     C[i] = A[i] + 1;
+;     C[i] += B[i];
+;   }
+;
+; Check that the corresponding noalias metadata is added to the vector loop
+; but not to the scalar loop.
+;
+; Since in the versioned vector loop C and B can no longer alias, the first
+; store to C[i] can be DSE'd.
+
+
+define void @f(i32* %a, i32* %b, i32* %c) {
+entry:
+  br label %for.body
+
+; BOTH: vector.memcheck:
+; BOTH: vector.body:
+for.body:                                         ; preds = %for.body, %entry
+  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+
+  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+; Scope 1
+; LV: = load {{.*}} !alias.scope !0
+  %loadA = load i32, i32* %arrayidxA, align 4
+
+  %add = add nuw i32 %loadA, 2
+
+  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+; Noalias with scope 1 and 6
+; LV: store {{.*}} !alias.scope !3, !noalias !5
+; DSE-NOT: store
+  store i32 %add, i32* %arrayidxC, align 4
+
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+; Scope 6
+; LV: = load {{.*}} !alias.scope !7
+  %loadB = load i32, i32* %arrayidxB, align 4
+
+  %add2 = add nuw i32 %add, %loadB
+
+; Noalias with scope 1 and 6
+; LV: store {{.*}} !alias.scope !3, !noalias !5
+; DSE: store
+  store i32 %add2, i32* %arrayidxC, align 4
+
+  %inc = add nuw nsw i64 %ind, 1
+  %exitcond = icmp eq i64 %inc, 20
+  br i1 %exitcond, label %for.end, label %for.body
+
+; BOTH: for.body:
+; BOTH-NOT: !alias.scope
+; BOTH-NOT: !noalias
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; LV: !0 = !{!1}
+; LV: !1 = distinct !{!1, !2}
+; LV: !2 = distinct !{!2, !"LVerDomain"}
+; LV: !3 = !{!4}
+; LV: !4 = distinct !{!4, !2}
+; LV: !5 = !{!1, !6}
+; LV: !6 = distinct !{!6, !2}
+; LV: !7 = !{!6}
author	Adam Nemet <anemet@apple.com>	2016-03-17 20:32:37 +0000
committer	Adam Nemet <anemet@apple.com>	2016-03-17 20:32:37 +0000
commit	b0c4eae07339dce7d0d6e7e697fba37b41cb28cf (patch)
tree	1efa158666c93c6a61aaaf66008bd36863fb5e2a /llvm/test/Transforms
parent	5eccf07df359955e5dd378bd886c1cfaee5e11f3 (diff)
download	bcm5719-llvm-b0c4eae07339dce7d0d6e7e697fba37b41cb28cf.tar.gz bcm5719-llvm-b0c4eae07339dce7d0d6e7e697fba37b41cb28cf.zip