diff options
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll | 211 |
1 files changed, 211 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll new file mode 100644 index 00000000000..4ddc6a65217 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll @@ -0,0 +1,211 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s -check-prefix=VF8 +; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s | FileCheck %s -check-prefix=VF1 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Given a loop with an induction variable which is being +; truncated/extended using casts that had been proven to +; be redundant under a runtime test, we want to make sure +; that these casts, do not get vectorized/scalarized/widened. +; This is the case for inductions whose SCEV expression is +; of the form "ExtTrunc(%phi) + %step", where "ExtTrunc" +; can be a result of the IR sequences we check below. +; +; See also pr30654. +; + +; Case1: Check the following induction pattern: +; +; %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] +; %sext = shl i32 %p.09, 24 +; %conv = ashr exact i32 %sext, 24 +; %add = add nsw i32 %conv, %step +; +; This is the case in the following code: +; +; void doit1(int n, int step) { +; int i; +; char p = 0; +; for (i = 0; i < n; i++) { +; a[i] = p; +; p = p + step; +; } +; } +; +; The "ExtTrunc" IR sequence here is: +; "%sext = shl i32 %p.09, 24" +; "%conv = ashr exact i32 %sext, 24" +; We check that it does not appear in the vector loop body, whether +; we vectorize or scalarize the induction. +; In the case of widened induction, this means that the induction phi +; is directly used, without shl/ashr on the way. + +; VF8-LABEL: @doit1 +; VF8: vector.body: +; VF8: %vec.ind = phi <8 x i32> +; VF8: store <8 x i32> %vec.ind +; VF8: middle.block: + +; VF1-LABEL: @doit1 +; VF1: vector.body: +; VF1-NOT: %{{.*}} = shl i32 +; VF1: middle.block: + +@a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16 + +define void @doit1(i32 %n, i32 %step) { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %sext = shl i32 %p.09, 24 + %conv = ashr exact i32 %sext, 24 + %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx, align 4 + %add = add nsw i32 %conv, %step + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + + +; Case2: Another variant of the above pattern is where the induction variable +; is used only for address compuation (i.e. it is a GEP index) and therefore +; the induction is not vectorized but rather only the step is widened. +; +; This is the case in the following code, where the induction variable 'w_ix' +; is only used to access the array 'in': +; +; void doit2(int *in, int *out, size_t size, size_t step) +; { +; int w_ix = 0; +; for (size_t offset = 0; offset < size; ++offset) +; { +; int w = in[w_ix]; +; out[offset] = w; +; w_ix += step; +; } +; } +; +; The "ExtTrunc" IR sequence here is similar to the previous case: +; "%sext = shl i64 %w_ix.012, 32 +; %idxprom = ashr exact i64 %sext, 32" +; We check that it does not appear in the vector loop body, whether +; we widen or scalarize the induction. +; In the case of widened induction, this means that the induction phi +; is directly used, without shl/ashr on the way. + +; VF8-LABEL: @doit2 +; VF8: vector.body: +; VF8: %vec.ind = phi <8 x i64> +; VF8: %{{.*}} = extractelement <8 x i64> %vec.ind +; VF8: middle.block: + +; VF1-LABEL: @doit2 +; VF1: vector.body: +; VF1-NOT: %{{.*}} = shl i64 +; VF1: middle.block: +; + +define void @doit2(i32* nocapture readonly %in, i32* nocapture %out, i64 %size, i64 %step) { +entry: + %cmp9 = icmp eq i64 %size, 0 + br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph + +for.body.lr.ph: + br label %for.body + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %w_ix.011 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %sext = shl i64 %w_ix.011, 32 + %idxprom = ashr exact i64 %sext, 32 + %arrayidx = getelementptr inbounds i32, i32* %in, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %out, i64 %offset.010 + store i32 %0, i32* %arrayidx1, align 4 + %add = add i64 %idxprom, %step + %inc = add nuw i64 %offset.010, 1 + %exitcond = icmp eq i64 %inc, %size + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +; Case3: Lastly, check also the following induction pattern: +; +; %p.09 = phi i32 [ %val0, %scalar.ph ], [ %add, %for.body ] +; %conv = and i32 %p.09, 255 +; %add = add nsw i32 %conv, %step +; +; This is the case in the following code: +; +; int a[N]; +; void doit3(int n, int step) { +; int i; +; unsigned char p = 0; +; for (i = 0; i < n; i++) { +; a[i] = p; +; p = p + step; +; } +; } +; +; The "ExtTrunc" IR sequence here is: +; "%conv = and i32 %p.09, 255". +; We check that it does not appear in the vector loop body, whether +; we vectorize or scalarize the induction. + +; VF8-LABEL: @doit3 +; VF8: vector.body: +; VF8: %vec.ind = phi <8 x i32> +; VF8: store <8 x i32> %vec.ind +; VF8: middle.block: + +; VF1-LABEL: @doit3 +; VF1: vector.body: +; VF1-NOT: %{{.*}} = and i32 +; VF1: middle.block: + +define void @doit3(i32 %n, i32 %step) { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %conv = and i32 %p.09, 255 + %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx, align 4 + %add = add nsw i32 %conv, %step + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} |