summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoopVectorize
diff options
context:
space:
mode:
authorAmara Emerson <amara.emerson@arm.com>2017-05-10 15:15:38 +0000
committerAmara Emerson <amara.emerson@arm.com>2017-05-10 15:15:38 +0000
commit816542ceb39ed51c942fa94b5c27041b82fb4d1c (patch)
tree3b34faee402ffd5d71accb479ae5ff4c8df9f54f /llvm/test/Transforms/LoopVectorize
parent0381cc74c773fe3e73475b982375fb66cb7ef9f9 (diff)
downloadbcm5719-llvm-816542ceb39ed51c942fa94b5c27041b82fb4d1c.tar.gz
bcm5719-llvm-816542ceb39ed51c942fa94b5c27041b82fb4d1c.zip
[AArch64] Enable use of reduction intrinsics.
The new experimental reduction intrinsics can now be used, so I'm enabling this for AArch64. We will need this for SVE anyway, so it makes sense to do this for NEON reductions as well. The existing code to match shufflevector patterns are replaced with a direct lowering of the reductions to AArch64-specific nodes. Tests updated with the new, simpler, representation. Differential Revision: https://reviews.llvm.org/D32247 llvm-svn: 302678
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll26
1 files changed, 3 insertions, 23 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
index be08a63b212..9d9aea00e9a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
@@ -20,15 +20,7 @@ target triple = "aarch64--linux-gnu"
; CHECK: add <16 x i8>
;
; CHECK: middle.block:
-; CHECK: shufflevector <16 x i8>
-; CHECK: add <16 x i8>
-; CHECK: shufflevector <16 x i8>
-; CHECK: add <16 x i8>
-; CHECK: shufflevector <16 x i8>
-; CHECK: add <16 x i8>
-; CHECK: shufflevector <16 x i8>
-; CHECK: add <16 x i8>
-; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <16 x i8>
+; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>
; CHECK: zext i8 [[Rdx]] to i32
;
define i8 @reduction_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
@@ -83,13 +75,7 @@ for.body:
; CHECK: add <8 x i16>
;
; CHECK: middle.block:
-; CHECK: shufflevector <8 x i16>
-; CHECK: add <8 x i16>
-; CHECK: shufflevector <8 x i16>
-; CHECK: add <8 x i16>
-; CHECK: shufflevector <8 x i16>
-; CHECK: add <8 x i16>
-; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <8 x i16>
+; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>
; CHECK: zext i16 [[Rdx]] to i32
;
define i16 @reduction_i16_1(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %n) {
@@ -146,13 +132,7 @@ for.body:
; CHECK: add <8 x i16>
;
; CHECK: middle.block:
-; CHECK: shufflevector <8 x i16>
-; CHECK: add <8 x i16>
-; CHECK: shufflevector <8 x i16>
-; CHECK: add <8 x i16>
-; CHECK: shufflevector <8 x i16>
-; CHECK: add <8 x i16>
-; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <8 x i16>
+; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>
; CHECK: zext i16 [[Rdx]] to i32
;
define i16 @reduction_i16_2(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
OpenPOWER on IntegriCloud