diff options
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/ARM/2012-05-04-vmov.ll | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/vector-DAGCombine.ll | 24 |
2 files changed, 28 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/ARM/2012-05-04-vmov.ll b/llvm/test/CodeGen/ARM/2012-05-04-vmov.ll index d52ef2cc5a1..14dbf7ff4ac 100644 --- a/llvm/test/CodeGen/ARM/2012-05-04-vmov.ll +++ b/llvm/test/CodeGen/ARM/2012-05-04-vmov.ll @@ -7,5 +7,8 @@ entry: %div = udiv <2 x i32> %A, %B ret <2 x i32> %div ; A9-CHECK: vmov.32 -; SWIFT-CHECK-NOT: vmov.32 +; vmov.32 should not be used to get a lane: +; vmov.32 <dst>, <src>[<lane>]. +; but vmov.32 <dst>[<lane>], <src> is fine. +; SWIFT-CHECK-NOT: vmov.32 {{r[0-9]+}}, {{d[0-9]\[[0-9]+\]}} } diff --git a/llvm/test/CodeGen/ARM/vector-DAGCombine.ll b/llvm/test/CodeGen/ARM/vector-DAGCombine.ll index 42964deb0b5..6d586f24264 100644 --- a/llvm/test/CodeGen/ARM/vector-DAGCombine.ll +++ b/llvm/test/CodeGen/ARM/vector-DAGCombine.ll @@ -160,3 +160,27 @@ define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) { store <16 x i8> %v1, <16 x i8>* %storeaddr ret void } + +; <rdar://problem/14170854>. +; vldr cannot handle unaligned loads. +; Fall back to vld1.32, which can, instead of using the general purpose loads +; followed by a costly sequence of instructions to build the vector register. +; CHECK: t3 +; CHECK: vld1.32 {[[REG:d[0-9]+]][0]} +; CHECK: vld1.32 {[[REG]][1]} +; CHECK: vmull.u8 q{{[0-9]+}}, [[REG]], [[REG]] +define <8 x i16> @t3(i8 zeroext %xf, i8* nocapture %sp0, i8* nocapture %sp1, i32* nocapture %outp) { +entry: + %pix_sp0.0.cast = bitcast i8* %sp0 to i32* + %pix_sp0.0.copyload = load i32* %pix_sp0.0.cast, align 1 + %pix_sp1.0.cast = bitcast i8* %sp1 to i32* + %pix_sp1.0.copyload = load i32* %pix_sp1.0.cast, align 1 + %vecinit = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 + %vecinit1 = insertelement <2 x i32> %vecinit, i32 %pix_sp1.0.copyload, i32 1 + %0 = bitcast <2 x i32> %vecinit1 to <8 x i8> + %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %0, <8 x i8> %0) + ret <8 x i16> %vmull.i +} + +; Function Attrs: nounwind readnone +declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) |

