summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorJames Molloy <james.molloy@arm.com>2014-10-17 17:06:31 +0000
committerJames Molloy <james.molloy@arm.com>2014-10-17 17:06:31 +0000
commitf497d5511d1aff061c3af95b13796f8a4b05a4d0 (patch)
treeb75871ef972ad46173228992a510b7f2ac1782c5 /llvm/test
parentff73fc9547e927a07468f7ee34aac022b522452e (diff)
downloadbcm5719-llvm-f497d5511d1aff061c3af95b13796f8a4b05a4d0.tar.gz
bcm5719-llvm-f497d5511d1aff061c3af95b13796f8a4b05a4d0.zip
[AArch64] Fix a silent codegen fault in BUILD_VECTOR lowering.
We should be talking about the number of source elements, not the number of destination elements, given we know at this point that the source and dest element numbers are not the same. While we're at it, avoid writing to std::vector::end()... Bug found with random testing and a lot of coffee. llvm-svn: 220051
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll22
1 files changed, 22 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll b/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
new file mode 100644
index 00000000000..d06df7a87fd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define <4 x i16> @f(<4 x i32> %vqdmlal_v3.i, <8 x i16> %x5) {
+entry:
+ ; Check that we don't just dup the input vector. The code emitted is ext, dup, ext, ext
+ ; but only match the last three instructions as the first two could be combined to
+ ; a dup2 at some stage.
+ ; CHECK: dup
+ ; CHECK: ext
+ ; CHECK: ext
+ %x4 = extractelement <4 x i32> %vqdmlal_v3.i, i32 2
+ %vgetq_lane = trunc i32 %x4 to i16
+ %vecinit.i = insertelement <4 x i16> undef, i16 %vgetq_lane, i32 0
+ %vecinit2.i = insertelement <4 x i16> %vecinit.i, i16 %vgetq_lane, i32 2
+ %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vgetq_lane, i32 3
+ %vgetq_lane261 = extractelement <8 x i16> %x5, i32 0
+ %vset_lane267 = insertelement <4 x i16> %vecinit3.i, i16 %vgetq_lane261, i32 1
+ ret <4 x i16> %vset_lane267
+}
OpenPOWER on IntegriCloud