summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorAnna Thomas <anna@azul.com>2018-08-21 14:40:27 +0000
committerAnna Thomas <anna@azul.com>2018-08-21 14:40:27 +0000
commitb02b0ad8c78eab94b2c24333c5c378ffe90ea309 (patch)
treed4e4d5a5eb176d5b0091cc28f58b0dfebf4540d3 /llvm/test
parent89632b84f5f1ba86eb61cace5d6f03982bd0952c (diff)
downloadbcm5719-llvm-b02b0ad8c78eab94b2c24333c5c378ffe90ea309.tar.gz
bcm5719-llvm-b02b0ad8c78eab94b2c24333c5c378ffe90ea309.zip
[LV] Vectorize loops where non-phi instructions used outside loop
Summary: Follow up change to rL339703, where we now vectorize loops with non-phi instructions used outside the loop. Note that the cyclic dependency identification occurs when identifying reduction/induction vars. We also need to identify that we do not allow users where the PSCEV information within and outside the loop are different. This was the fix added in rL307837 for PR33706. Reviewers: Ayal, mkuper, fhahn Subscribers: javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D50778 llvm-svn: 340278
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/Transforms/LoopVectorize/no_outside_user.ll147
1 files changed, 147 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll
index d0b5d7cd301..f2dd557e80a 100644
--- a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll
@@ -265,3 +265,150 @@ for.end: ; preds = %for.body, %entry
%x.0.lcssa = phi i32 [ 0, %entry ], [ %tmp17 , %latch ]
ret i32 %x.0.lcssa
}
+
+
+; CHECK-LABEL: @outside_user_non_phi(
+; CHECK: %vec.ind = phi <2 x i32>
+; CHECK: [[CMP:%[a-zA-Z0-9.]+]] = icmp sgt <2 x i32> %vec.ind, <i32 10, i32 10>
+; CHECK: %predphi = select <2 x i1> [[CMP]], <2 x i32> <i32 1, i32 1>, <2 x i32> zeroinitializer
+; CHECK: [[TRUNC:%[a-zA-Z0-9.]+]] = trunc <2 x i32> %predphi to <2 x i8>
+
+; CHECK-LABEL: middle.block:
+; CHECK: [[E1:%[a-zA-Z0-9.]+]] = extractelement <2 x i8> [[TRUNC]], i32 1
+
+; CHECK-LABEL: f1.exit.loopexit:
+; CHECK: %.lcssa = phi i8 [ %tmp17.trunc, %bb16 ], [ [[E1]], %middle.block ]
+define i8 @outside_user_non_phi() {
+bb:
+ %b.promoted = load i32, i32* @b, align 4
+ br label %.lr.ph.i
+
+.lr.ph.i:
+ %tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
+ %tmp2 = icmp sgt i32 %tmp8, 10
+ br i1 %tmp2, label %bb16, label %bb10
+
+bb10:
+ br label %bb16
+
+bb16:
+ %tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ]
+ %tmp17.trunc = trunc i32 %tmp17 to i8
+ %tmp18 = add nsw i32 %tmp8, 1
+ %tmp19 = icmp slt i32 %tmp18, 4
+ br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
+
+f1.exit.loopexit:
+ %.lcssa = phi i8 [ %tmp17.trunc, %bb16 ]
+ ret i8 %.lcssa
+}
+
+; CHECK-LABEL: no_vectorize_reduction_with_outside_use(
+; CHECK-NOT: <2 x i32>
+define i32 @no_vectorize_reduction_with_outside_use(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+ %cmp7 = icmp sgt i32 %n, 0
+ br i1 %cmp7, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx2, align 4
+ %add = add nsw i32 %1, %0
+ %or = or i32 %add, %result.08
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %result.0.lcssa = phi i32 [ 0, %entry ], [ %1, %for.body ]
+ ret i32 %result.0.lcssa
+}
+
+
+; vectorize c[i] = a[i] + b[i] loop where result of c[i] is used outside the
+; loop
+; CHECK-LABEL: sum_arrays_outside_use(
+; CHECK-LABEL: vector.memcheck:
+; CHECK: br i1 %memcheck.conflict, label %scalar.ph, label %vector.ph
+
+; CHECK-LABEL: vector.body:
+; CHECK: %wide.load = load <2 x i32>, <2 x i32>*
+; CHECK: %wide.load16 = load <2 x i32>, <2 x i32>*
+; CHECK: [[ADD:%[a-zA-Z0-9.]+]] = add nsw <2 x i32> %wide.load, %wide.load16
+; CHECK: store <2 x i32>
+
+; CHECK-LABEL: middle.block:
+; CHECK: [[E1:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> [[ADD]], i32 1
+
+; CHECK-LABEL: f1.exit.loopexit:
+; CHECK: %.lcssa = phi i32 [ %sum, %.lr.ph.i ], [ [[E1]], %middle.block ]
+define i32 @sum_arrays_outside_use(i32* %B, i32* %A, i32* %C, i32 %N) {
+bb:
+ %b.promoted = load i32, i32* @b, align 4
+ br label %.lr.ph.i
+
+.lr.ph.i:
+ %iv = phi i32 [ %ivnext, %.lr.ph.i ], [ %b.promoted, %bb ]
+ %indvars.iv = sext i32 %iv to i64
+ %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Bload = load i32, i32* %arrayidx2, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %Aload = load i32, i32* %arrayidx, align 4
+ %sum = add nsw i32 %Bload, %Aload
+ %arrayidx3 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ store i32 %sum, i32* %arrayidx3, align 4
+ %ivnext = add nsw i32 %iv, 1
+ %tmp19 = icmp slt i32 %ivnext, %N
+ br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
+
+f1.exit.loopexit:
+ %.lcssa = phi i32 [ %sum, %.lr.ph.i ]
+ ret i32 %.lcssa
+}
+
+@tab = common global [32 x i8] zeroinitializer, align 1
+
+; CHECK-LABEL: non_uniform_live_out()
+; CHECK-LABEL: vector.body:
+; CHECK: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next, %vector.body ]
+; CHECK: [[ADD:%[a-zA-Z0-9.]+]] = add <2 x i32> %vec.ind, <i32 7, i32 7>
+; CHECK: [[EE:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> [[ADD]], i32 0
+; CHECK: [[GEP:%[a-zA-Z0-9.]+]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[EE]]
+; CHECK-NEXT: [[GEP2:%[a-zA-Z0-9.]+]] = getelementptr inbounds i8, i8* [[GEP]], i32 0
+; CHECK-NEXT: [[BC:%[a-zA-Z0-9.]+]] = bitcast i8* [[GEP2]] to <2 x i8>*
+; CHECK-NEXT: %wide.load = load <2 x i8>, <2 x i8>* [[BC]]
+; CHECK-NEXT: [[ADD2:%[a-zA-Z0-9.]+]] = add <2 x i8> %wide.load, <i8 1, i8 1>
+; CHECK: store <2 x i8> [[ADD2]], <2 x i8>*
+
+; CHECK-LABEL: middle.block:
+; CHECK: [[ADDEE:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> [[ADD]], i32 1
+
+; CHECK-LABEL: for.end:
+; CHECK: %lcssa = phi i32 [ %i.09, %for.body ], [ [[ADDEE]], %middle.block ]
+; CHECK: %arrayidx.out = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %lcssa
+define i32 @non_uniform_live_out() {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %i.09 = add i32 %i.08, 7
+ %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.09
+ %0 = load i8, i8* %arrayidx, align 1
+ %bump = add i8 %0, 1
+ store i8 %bump, i8* %arrayidx, align 1
+ %inc = add nsw i32 %i.08, 1
+ %exitcond = icmp eq i32 %i.08, 20000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ %lcssa = phi i32 [%i.09, %for.body]
+ %arrayidx.out = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %lcssa
+ store i8 42, i8* %arrayidx.out, align 1
+ ret i32 0
+}
OpenPOWER on IntegriCloud