summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/ARM/vlddup.ll
diff options
context:
space:
mode:
authorTim Northover <tnorthover@apple.com>2017-04-20 19:54:02 +0000
committerTim Northover <tnorthover@apple.com>2017-04-20 19:54:02 +0000
commit8b1240b0f09d53863cb298c7519508ddead3c957 (patch)
tree1cac9a03fa15e5a1c881534db5a93e0cf571cdd3 /llvm/test/CodeGen/ARM/vlddup.ll
parent175d70ee5c2f03f640151488f5f33b7bd9b96f8d (diff)
downloadbcm5719-llvm-8b1240b0f09d53863cb298c7519508ddead3c957.tar.gz
bcm5719-llvm-8b1240b0f09d53863cb298c7519508ddead3c957.zip
ARM: handle post-indexed NEON ops where the offset isn't the access width.
Before, we assumed that any ConstantInt offset was precisely the access width, so we could use the "[rN]!" form. ISelLowering only ever created that kind, but further simplification during combining could lead to unexpected constants and incorrect codegen. Should fix PR32658. llvm-svn: 300878
Diffstat (limited to 'llvm/test/CodeGen/ARM/vlddup.ll')
-rw-r--r--llvm/test/CodeGen/ARM/vlddup.ll17
1 files changed, 17 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/vlddup.ll b/llvm/test/CodeGen/ARM/vlddup.ll
index c6d5747f350..71ca0f79152 100644
--- a/llvm/test/CodeGen/ARM/vlddup.ll
+++ b/llvm/test/CodeGen/ARM/vlddup.ll
@@ -310,6 +310,23 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
ret <4 x i16> %tmp5
}
+define <4 x i16> @vld2dupi16_odd_update(i16** %ptr) nounwind {
+;CHECK-LABEL: vld2dupi16_odd_update:
+;CHECK: mov [[INC:r[0-9]+]], #6
+;CHECK: vld2.16 {d16[], d17[]}, [r1], [[INC]]
+ %A = load i16*, i16** %ptr
+ %A2 = bitcast i16* %A to i8*
+ %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+ %tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
+ %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp5 = add <4 x i16> %tmp2, %tmp4
+ %tmp6 = getelementptr i16, i16* %A, i32 3
+ store i16* %tmp6, i16** %ptr
+ ret <4 x i16> %tmp5
+}
+
define <2 x i32> @vld2dupi32(i8* %A) nounwind {
;CHECK-LABEL: vld2dupi32:
;Check the alignment value. Max for this instruction is 64 bits:
OpenPOWER on IntegriCloud