summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Simpson <mssimpso@codeaurora.org>2017-03-02 13:55:05 +0000
committerMatthew Simpson <mssimpso@codeaurora.org>2017-03-02 13:55:05 +0000
commit455c2ee39463ca0779849c9248e10c765ad11a77 (patch)
tree06a54b88746fffe44b3e829b4b85906dd40c816e
parent2855dc7ddc0e653dbc932dd3de7e3c0e99a41346 (diff)
downloadbcm5719-llvm-455c2ee39463ca0779849c9248e10c765ad11a77.tar.gz
bcm5719-llvm-455c2ee39463ca0779849c9248e10c765ad11a77.zip
[LV] Considier non-consecutive but vectorizable accesses for VF selection
When computing the smallest and largest types for selecting the maximum vectorization factor, we currently ignore loads and stores of pointer types if the memory access is non-consecutive. We do this because such accesses must be scalarized regardless of vectorization factor, and thus shouldn't be considered when determining the factor. This patch makes this check less aggressive by also considering non-consecutive accesses that may be vectorized, such as interleaved accesses. Because we don't know at the time of the check if an accesses will certainly be vectorized (this is a cost model decision given a particular VF), we consider all accesses that can potentially be vectorized. Differential Revision: https://reviews.llvm.org/D30305 llvm-svn: 296747
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp13
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll33
2 files changed, 43 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 36570b49bb0..763ce083837 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6326,9 +6326,16 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() {
T = ST->getValueOperand()->getType();
// Ignore loaded pointer types and stored pointer types that are not
- // consecutive. However, we do want to take consecutive stores/loads of
- // pointer vectors into account.
- if (T->isPointerTy() && !isConsecutiveLoadOrStore(&I))
+ // vectorizable.
+ //
+ // FIXME: The check here attempts to predict whether a load or store will
+ // be vectorized. We only know this for certain after a VF has
+ // been selected. Here, we assume that if an access can be
+ // vectorized, it will be. We should also look at extending this
+ // optimization to non-pointer types.
+ //
+ if (T->isPointerTy() && !isConsecutiveLoadOrStore(&I) &&
+ !Legal->isAccessInterleaved(&I) && !Legal->isLegalGatherOrScatter(&I))
continue;
MinWidth = std::min(MinWidth,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
new file mode 100644
index 00000000000..1ae7dadeffd
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
@@ -0,0 +1,33 @@
+; REQUIRES: asserts
+; RUN: opt < %s -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: Checking a loop in "interleaved_access"
+; CHECK: The Smallest and Widest types: 64 / 64 bits
+;
+define void @interleaved_access(i8** %A, i64 %N) {
+for.ph:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next.3, %for.body ], [ 0, %for.ph ]
+ %tmp0 = getelementptr inbounds i8*, i8** %A, i64 %i
+ store i8* null, i8** %tmp0, align 8
+ %i.next.0 = add nuw nsw i64 %i, 1
+ %tmp1 = getelementptr inbounds i8*, i8** %A, i64 %i.next.0
+ store i8* null, i8** %tmp1, align 8
+ %i.next.1 = add nsw i64 %i, 2
+ %tmp2 = getelementptr inbounds i8*, i8** %A, i64 %i.next.1
+ store i8* null, i8** %tmp2, align 8
+ %i.next.2 = add nsw i64 %i, 3
+ %tmp3 = getelementptr inbounds i8*, i8** %A, i64 %i.next.2
+ store i8* null, i8** %tmp3, align 8
+ %i.next.3 = add nsw i64 %i, 4
+ %cond = icmp slt i64 %i.next.3, %N
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
OpenPOWER on IntegriCloud