summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
authorHao Liu <Hao.Liu@arm.com>2015-06-11 09:05:02 +0000
committerHao Liu <Hao.Liu@arm.com>2015-06-11 09:05:02 +0000
commit4566d18e8926e2a255ad2ed13e54e2fad28afb40 (patch)
treeae6baf0fce7a02c95b3bbfcfb7da5bd2295da149 /llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
parent229628b39e1eaedc9870824786ee32ca8cd0c5c2 (diff)
downloadbcm5719-llvm-4566d18e8926e2a255ad2ed13e54e2fad28afb40.tar.gz
bcm5719-llvm-4566d18e8926e2a255ad2ed13e54e2fad28afb40.zip
[AArch64] Match interleaved memory accesses into ldN/stN instructions.
Add a pass AArch64InterleavedAccess to identify and match interleaved memory accesses. This pass transforms an interleaved load/store into ldN/stN intrinsic. As Loop Vectorizor disables optimization on interleaved accesses by default, this optimization is also disabled by default. To enable it by "-aarch64-interleaved-access-opt=true" E.g. Transform an interleaved load (Factor = 2): %wide.vec = load <8 x i32>, <8 x i32>* %ptr %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements Into: %ld2 = { <4 x i32>, <4 x i32> } call aarch64.neon.ld2(%ptr) %v0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 %v1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 E.g. Transform an interleaved store (Factor = 2): %i.vec = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7> ; Interleaved vec store <8 x i32> %i.vec, <8 x i32>* %ptr Into: %v0 = shuffle %i.vec, undef, <0, 1, 2, 3> %v1 = shuffle %i.vec, undef, <4, 5, 6, 7> call void aarch64.neon.st2(%v0, %v1, %ptr) llvm-svn: 239514
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp2
1 files changed, 1 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 95c9381985a..be6c542abde 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -139,7 +139,7 @@ static cl::opt<bool> EnableMemAccessVersioning(
cl::desc("Enable symblic stride memory access versioning"));
static cl::opt<bool> EnableInterleavedMemAccesses(
- "enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
+ "enable-interleaved-mem-accesses", cl::init(true), cl::Hidden,
cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
/// Maximum factor for an interleaved memory access.
OpenPOWER on IntegriCloud