diff options
| author | Amara Emerson <amara.emerson@arm.com> | 2017-05-10 15:15:38 +0000 |
|---|---|---|
| committer | Amara Emerson <amara.emerson@arm.com> | 2017-05-10 15:15:38 +0000 |
| commit | 816542ceb39ed51c942fa94b5c27041b82fb4d1c (patch) | |
| tree | 3b34faee402ffd5d71accb479ae5ff4c8df9f54f /llvm/test/Transforms/LoopVectorize | |
| parent | 0381cc74c773fe3e73475b982375fb66cb7ef9f9 (diff) | |
| download | bcm5719-llvm-816542ceb39ed51c942fa94b5c27041b82fb4d1c.tar.gz bcm5719-llvm-816542ceb39ed51c942fa94b5c27041b82fb4d1c.zip | |
[AArch64] Enable use of reduction intrinsics.
The new experimental reduction intrinsics can now be used, so I'm enabling this
for AArch64. We will need this for SVE anyway, so it makes sense to do this for
NEON reductions as well.
The existing code to match shufflevector patterns are replaced with a direct
lowering of the reductions to AArch64-specific nodes. Tests updated with the
new, simpler, representation.
Differential Revision: https://reviews.llvm.org/D32247
llvm-svn: 302678
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
| -rw-r--r-- | llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll | 26 |
1 files changed, 3 insertions, 23 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll index be08a63b212..9d9aea00e9a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll @@ -20,15 +20,7 @@ target triple = "aarch64--linux-gnu" ; CHECK: add <16 x i8> ; ; CHECK: middle.block: -; CHECK: shufflevector <16 x i8> -; CHECK: add <16 x i8> -; CHECK: shufflevector <16 x i8> -; CHECK: add <16 x i8> -; CHECK: shufflevector <16 x i8> -; CHECK: add <16 x i8> -; CHECK: shufflevector <16 x i8> -; CHECK: add <16 x i8> -; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <16 x i8> +; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> ; CHECK: zext i8 [[Rdx]] to i32 ; define i8 @reduction_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) { @@ -83,13 +75,7 @@ for.body: ; CHECK: add <8 x i16> ; ; CHECK: middle.block: -; CHECK: shufflevector <8 x i16> -; CHECK: add <8 x i16> -; CHECK: shufflevector <8 x i16> -; CHECK: add <8 x i16> -; CHECK: shufflevector <8 x i16> -; CHECK: add <8 x i16> -; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <8 x i16> +; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> ; CHECK: zext i16 [[Rdx]] to i32 ; define i16 @reduction_i16_1(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %n) { @@ -146,13 +132,7 @@ for.body: ; CHECK: add <8 x i16> ; ; CHECK: middle.block: -; CHECK: shufflevector <8 x i16> -; CHECK: add <8 x i16> -; CHECK: shufflevector <8 x i16> -; CHECK: add <8 x i16> -; CHECK: shufflevector <8 x i16> -; CHECK: add <8 x i16> -; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <8 x i16> +; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> ; CHECK: zext i16 [[Rdx]] to i32 ; define i16 @reduction_i16_2(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) { |

