diff options
author | Matthew Simpson <mssimpso@codeaurora.org> | 2016-07-06 14:26:59 +0000 |
---|---|---|
committer | Matthew Simpson <mssimpso@codeaurora.org> | 2016-07-06 14:26:59 +0000 |
commit | 433cb1dfe31a85e5e39743032a18c96bf12ce955 (patch) | |
tree | fc9919257eaf7d0e5944e0c4057d99727de3c1dd /llvm/test/Transforms/LoopVectorize/reverse_induction.ll | |
parent | ad0a56f3da287000ba0b64642db99b17186c3a5b (diff) | |
download | bcm5719-llvm-433cb1dfe31a85e5e39743032a18c96bf12ce955.tar.gz bcm5719-llvm-433cb1dfe31a85e5e39743032a18c96bf12ce955.zip |
[LV] Don't widen trivial induction variables
We currently always vectorize induction variables. However, if an induction
variable is only used for counting loop iterations or computing addresses with
getelementptr instructions, we don't need to do this. Vectorizing these trivial
induction variables can create vector code that is difficult to simplify later
on. This is especially true when the unroll factor is greater than one, and we
create vector arithmetic when computing step vectors. With this patch, we check
if an induction variable is only used for counting iterations or computing
addresses, and if so, scalarize the arithmetic when computing step vectors
instead. This allows for greater simplification.
This patch addresses the suboptimal pointer arithmetic sequence seen in
PR27881.
Reference: https://llvm.org/bugs/show_bug.cgi?id=27881
Differential Revision: http://reviews.llvm.org/D21620
llvm-svn: 274627
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize/reverse_induction.ll')
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/reverse_induction.ll | 62 |
1 files changed, 55 insertions, 7 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll index c19e438bc71..7eb35100c75 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -5,9 +5,24 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; Make sure consecutive vector generates correct negative indices. ; PR15882 -; CHECK-LABEL: @reverse_induction_i64( -; CHECK: %step.add = add <4 x i64> %vec.ind, <i64 -4, i64 -4, i64 -4, i64 -4> -; CHECK: %step.add2 = add <4 x i64> %step.add, <i64 -4, i64 -4, i64 -4, i64 -4> +; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; CHECK: %offset.idx = sub i64 %startval, %index +; CHECK: %[[a0:.+]] = add i64 %offset.idx, 0 +; CHECK: %[[v0:.+]] = insertelement <4 x i64> undef, i64 %[[a0]], i64 0 +; CHECK: %[[a1:.+]] = add i64 %offset.idx, -1 +; CHECK: %[[v1:.+]] = insertelement <4 x i64> %[[v0]], i64 %[[a1]], i64 1 +; CHECK: %[[a2:.+]] = add i64 %offset.idx, -2 +; CHECK: %[[v2:.+]] = insertelement <4 x i64> %[[v1]], i64 %[[a2]], i64 2 +; CHECK: %[[a3:.+]] = add i64 %offset.idx, -3 +; CHECK: %[[v3:.+]] = insertelement <4 x i64> %[[v2]], i64 %[[a3]], i64 3 +; CHECK: %[[a4:.+]] = add i64 %offset.idx, -4 +; CHECK: %[[v4:.+]] = insertelement <4 x i64> undef, i64 %[[a4]], i64 0 +; CHECK: %[[a5:.+]] = add i64 %offset.idx, -5 +; CHECK: %[[v5:.+]] = insertelement <4 x i64> %[[v4]], i64 %[[a5]], i64 1 +; CHECK: %[[a6:.+]] = add i64 %offset.idx, -6 +; CHECK: %[[v6:.+]] = insertelement <4 x i64> %[[v5]], i64 %[[a6]], i64 2 +; CHECK: %[[a7:.+]] = add i64 %offset.idx, -7 +; CHECK: %[[v7:.+]] = insertelement <4 x i64> %[[v6]], i64 %[[a7]], i64 3 define i32 @reverse_induction_i64(i64 %startval, i32 * %ptr) { entry: @@ -30,8 +45,25 @@ loopend: } ; CHECK-LABEL: @reverse_induction_i128( -; CHECK: %step.add = add <4 x i128> %vec.ind, <i128 -4, i128 -4, i128 -4, i128 -4> -; CHECK: %step.add2 = add <4 x i128> %step.add, <i128 -4, i128 -4, i128 -4, i128 -4> +; CHECK: %index = phi i128 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; CHECK: %offset.idx = sub i128 %startval, %index +; CHECK: %[[a0:.+]] = add i128 %offset.idx, 0 +; CHECK: %[[v0:.+]] = insertelement <4 x i128> undef, i128 %[[a0]], i64 0 +; CHECK: %[[a1:.+]] = add i128 %offset.idx, -1 +; CHECK: %[[v1:.+]] = insertelement <4 x i128> %[[v0]], i128 %[[a1]], i64 1 +; CHECK: %[[a2:.+]] = add i128 %offset.idx, -2 +; CHECK: %[[v2:.+]] = insertelement <4 x i128> %[[v1]], i128 %[[a2]], i64 2 +; CHECK: %[[a3:.+]] = add i128 %offset.idx, -3 +; CHECK: %[[v3:.+]] = insertelement <4 x i128> %[[v2]], i128 %[[a3]], i64 3 +; CHECK: %[[a4:.+]] = add i128 %offset.idx, -4 +; CHECK: %[[v4:.+]] = insertelement <4 x i128> undef, i128 %[[a4]], i64 0 +; CHECK: %[[a5:.+]] = add i128 %offset.idx, -5 +; CHECK: %[[v5:.+]] = insertelement <4 x i128> %[[v4]], i128 %[[a5]], i64 1 +; CHECK: %[[a6:.+]] = add i128 %offset.idx, -6 +; CHECK: %[[v6:.+]] = insertelement <4 x i128> %[[v5]], i128 %[[a6]], i64 2 +; CHECK: %[[a7:.+]] = add i128 %offset.idx, -7 +; CHECK: %[[v7:.+]] = insertelement <4 x i128> %[[v6]], i128 %[[a7]], i64 3 + define i32 @reverse_induction_i128(i128 %startval, i32 * %ptr) { entry: br label %for.body @@ -53,8 +85,24 @@ loopend: } ; CHECK-LABEL: @reverse_induction_i16( -; CHECK: add <4 x i16> %[[SPLAT:.*]], <i16 0, i16 -1, i16 -2, i16 -3> -; CHECK: add <4 x i16> %[[SPLAT]], <i16 -4, i16 -5, i16 -6, i16 -7> +; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; CHECK: %offset.idx = sub i16 %startval, {{.*}} +; CHECK: %[[a0:.+]] = add i16 %offset.idx, 0 +; CHECK: %[[v0:.+]] = insertelement <4 x i16> undef, i16 %[[a0]], i64 0 +; CHECK: %[[a1:.+]] = add i16 %offset.idx, -1 +; CHECK: %[[v1:.+]] = insertelement <4 x i16> %[[v0]], i16 %[[a1]], i64 1 +; CHECK: %[[a2:.+]] = add i16 %offset.idx, -2 +; CHECK: %[[v2:.+]] = insertelement <4 x i16> %[[v1]], i16 %[[a2]], i64 2 +; CHECK: %[[a3:.+]] = add i16 %offset.idx, -3 +; CHECK: %[[v3:.+]] = insertelement <4 x i16> %[[v2]], i16 %[[a3]], i64 3 +; CHECK: %[[a4:.+]] = add i16 %offset.idx, -4 +; CHECK: %[[v4:.+]] = insertelement <4 x i16> undef, i16 %[[a4]], i64 0 +; CHECK: %[[a5:.+]] = add i16 %offset.idx, -5 +; CHECK: %[[v5:.+]] = insertelement <4 x i16> %[[v4]], i16 %[[a5]], i64 1 +; CHECK: %[[a6:.+]] = add i16 %offset.idx, -6 +; CHECK: %[[v6:.+]] = insertelement <4 x i16> %[[v5]], i16 %[[a6]], i64 2 +; CHECK: %[[a7:.+]] = add i16 %offset.idx, -7 +; CHECK: %[[v7:.+]] = insertelement <4 x i16> %[[v6]], i16 %[[a7]], i64 3 define i32 @reverse_induction_i16(i16 %startval, i32 * %ptr) { entry: |