diff options
author | Logan Chien <tzuhsiang.chien@gmail.com> | 2015-07-13 15:37:30 +0000 |
---|---|---|
committer | Logan Chien <tzuhsiang.chien@gmail.com> | 2015-07-13 15:37:30 +0000 |
commit | 0a43abc9f841246ba2974fe189628d7c45484cb6 (patch) | |
tree | 9e64cc838db184a3b43bc4c81dcac37fa15a14d8 /llvm/test | |
parent | 69bf1ce03a52bac5050ffaae5f311541d573bb99 (diff) | |
download | bcm5719-llvm-0a43abc9f841246ba2974fe189628d7c45484cb6.tar.gz bcm5719-llvm-0a43abc9f841246ba2974fe189628d7c45484cb6.zip |
ARM: Fix cttz expansion on vector types.
The 64/128-bit vector types are legal if NEON instructions are
available. However, there was no matching patterns for @llvm.cttz.*()
intrinsics and result in fatal error.
This commit fixes the problem by lowering cttz to:
a. ctpop((x & -x) - 1)
b. width - ctlz(x & -x) - 1
llvm-svn: 242037
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/ARM/cttz.ll | 90 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/cttz_vector.ll | 383 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/ctz.ll | 11 |
3 files changed, 473 insertions, 11 deletions
diff --git a/llvm/test/CodeGen/ARM/cttz.ll b/llvm/test/CodeGen/ARM/cttz.ll new file mode 100644 index 00000000000..dacfca50593 --- /dev/null +++ b/llvm/test/CodeGen/ARM/cttz.ll @@ -0,0 +1,90 @@ +; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 | FileCheck %s +; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s + +; This test checks the @llvm.cttz.* intrinsics for integers. + +declare i8 @llvm.cttz.i8(i8, i1) +declare i16 @llvm.cttz.i16(i16, i1) +declare i32 @llvm.cttz.i32(i32, i1) +declare i64 @llvm.cttz.i64(i64, i1) + +;------------------------------------------------------------------------------ + +define i8 @test_i8(i8 %a) { +; CHECK-LABEL: test_i8: +; CHECK: orr [[REG:r[0-9]+]], [[REG]], #256 +; CHECK: rbit +; CHECK: clz + %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 false) + ret i8 %tmp +} + +define i16 @test_i16(i16 %a) { +; CHECK-LABEL: test_i16: +; CHECK: orr [[REG:r[0-9]+]], [[REG]], #65536 +; CHECK: rbit +; CHECK: clz + %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 false) + ret i16 %tmp +} + +define i32 @test_i32(i32 %a) { +; CHECK-LABEL: test_i32: +; CHECK: rbit +; CHECK: clz + %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false) + ret i32 %tmp +} + +define i64 @test_i64(i64 %a) { +; CHECK-LABEL: test_i64: +; CHECK: rbit +; CHECK: rbit +; CHECK: cmp +; CHECK: clz +; CHECK: add +; CHECK: clzne + %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false) + ret i64 %tmp +} + +;------------------------------------------------------------------------------ + +define i8 @test_i8_zero_undef(i8 %a) { +; CHECK-LABEL: test_i8_zero_undef: +; CHECK-NOT: orr +; CHECK: rbit +; CHECK: clz + %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 true) + ret i8 %tmp +} + +define i16 @test_i16_zero_undef(i16 %a) { +; CHECK-LABEL: test_i16_zero_undef: +; CHECK-NOT: orr +; CHECK: rbit +; CHECK: clz + %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 true) + ret i16 %tmp +} + + +define i32 @test_i32_zero_undef(i32 %a) { +; CHECK-LABEL: test_i32_zero_undef: +; CHECK: rbit +; CHECK: clz + %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 true) + ret i32 %tmp +} + +define i64 @test_i64_zero_undef(i64 %a) { +; CHECK-LABEL: test_i64_zero_undef: +; CHECK: rbit +; CHECK: rbit +; CHECK: cmp +; CHECK: clz +; CHECK: add +; CHECK: clzne + %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 true) + ret i64 %tmp +} diff --git a/llvm/test/CodeGen/ARM/cttz_vector.ll b/llvm/test/CodeGen/ARM/cttz_vector.ll new file mode 100644 index 00000000000..9480d75db47 --- /dev/null +++ b/llvm/test/CodeGen/ARM/cttz_vector.ll @@ -0,0 +1,383 @@ +; RUN: llc < %s -mtriple armv7-linux-gnueabihf -mattr=+neon | FileCheck %s + +; This test checks the @llvm.cttz.* intrinsics for vectors. + +declare <1 x i8> @llvm.cttz.v1i8(<1 x i8>, i1) +declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1) +declare <4 x i8> @llvm.cttz.v4i8(<4 x i8>, i1) +declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>, i1) +declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1) + +declare <1 x i16> @llvm.cttz.v1i16(<1 x i16>, i1) +declare <2 x i16> @llvm.cttz.v2i16(<2 x i16>, i1) +declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>, i1) +declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1) + +declare <1 x i32> @llvm.cttz.v1i32(<1 x i32>, i1) +declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) +declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) + +declare <1 x i64> @llvm.cttz.v1i64(<1 x i64>, i1) +declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) + +;------------------------------------------------------------------------------ + +define void @test_v1i8(<1 x i8>* %p) { +; CHECK-LABEL: test_v1i8 + %a = load <1 x i8>, <1 x i8>* %p + %tmp = call <1 x i8> @llvm.cttz.v1i8(<1 x i8> %a, i1 false) + store <1 x i8> %tmp, <1 x i8>* %p + ret void +} + +define void @test_v2i8(<2 x i8>* %p) { +; CHECK-LABEL: test_v2i8: + %a = load <2 x i8>, <2 x i8>* %p + %tmp = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 false) + store <2 x i8> %tmp, <2 x i8>* %p + ret void +} + +define void @test_v4i8(<4 x i8>* %p) { +; CHECK-LABEL: test_v4i8: + %a = load <4 x i8>, <4 x i8>* %p + %tmp = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 false) + store <4 x i8> %tmp, <4 x i8>* %p + ret void +} + +define void @test_v8i8(<8 x i8>* %p) { +; CHECK-LABEL: test_v8i8: +; CHECK: vldr [[D1:d[0-9]+]], [r0] +; CHECK: vmov.i8 [[D2:d[0-9]+]], #0x1 +; CHECK: vneg.s8 [[D3:d[0-9]+]], [[D1]] +; CHECK: vand [[D1]], [[D1]], [[D3]] +; CHECK: vsub.i8 [[D1]], [[D1]], [[D2]] +; CHECK: vcnt.8 [[D1]], [[D1]] +; CHECK: vstr [[D1]], [r0] + %a = load <8 x i8>, <8 x i8>* %p + %tmp = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 false) + store <8 x i8> %tmp, <8 x i8>* %p + ret void +} + +define void @test_v16i8(<16 x i8>* %p) { +; CHECK-LABEL: test_v16i8: +; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] +; CHECK: vmov.i8 [[Q2:q[0-9]+]], #0x1 +; CHECK: vneg.s8 [[Q3:q[0-9]+]], [[Q1:q[0-9]+]] +; CHECK: vand [[Q1]], [[Q1]], [[Q3]] +; CHECK: vsub.i8 [[Q1]], [[Q1]], [[Q2]] +; CHECK: vcnt.8 [[Q1]], [[Q1]] +; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] + %a = load <16 x i8>, <16 x i8>* %p + %tmp = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) + store <16 x i8> %tmp, <16 x i8>* %p + ret void +} + +define void @test_v1i16(<1 x i16>* %p) { +; CHECK-LABEL: test_v1i16: + %a = load <1 x i16>, <1 x i16>* %p + %tmp = call <1 x i16> @llvm.cttz.v1i16(<1 x i16> %a, i1 false) + store <1 x i16> %tmp, <1 x i16>* %p + ret void +} + +define void @test_v2i16(<2 x i16>* %p) { +; CHECK-LABEL: test_v2i16: + %a = load <2 x i16>, <2 x i16>* %p + %tmp = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 false) + store <2 x i16> %tmp, <2 x i16>* %p + ret void +} + +define void @test_v4i16(<4 x i16>* %p) { +; CHECK-LABEL: test_v4i16: +; CHECK: vldr [[D1:d[0-9]+]], [r0] +; CHECK: vmov.i16 [[D2:d[0-9]+]], #0x1 +; CHECK: vneg.s16 [[D3:d[0-9]+]], [[D1]] +; CHECK: vand [[D1]], [[D1]], [[D3]] +; CHECK: vsub.i16 [[D1]], [[D1]], [[D2]] +; CHECK: vcnt.8 [[D1]], [[D1]] +; CHECK: vpaddl.u8 [[D1]], [[D1]] +; CHECK: vstr [[D1]], [r0] + %a = load <4 x i16>, <4 x i16>* %p + %tmp = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 false) + store <4 x i16> %tmp, <4 x i16>* %p + ret void +} + +define void @test_v8i16(<8 x i16>* %p) { +; CHECK-LABEL: test_v8i16: +; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] +; CHECK: vmov.i16 [[Q2:q[0-9]+]], #0x1 +; CHECK: vneg.s16 [[Q3:q[0-9]+]], [[Q1:q[0-9]+]] +; CHECK: vand [[Q1]], [[Q1]], [[Q3]] +; CHECK: vsub.i16 [[Q1]], [[Q1]], [[Q2]] +; CHECK: vcnt.8 [[Q1]], [[Q1]] +; CHECK: vpaddl.u8 [[Q1]], [[Q1]] +; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] + %a = load <8 x i16>, <8 x i16>* %p + %tmp = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) + store <8 x i16> %tmp, <8 x i16>* %p + ret void +} + +define void @test_v1i32(<1 x i32>* %p) { +; CHECK-LABEL: test_v1i32: + %a = load <1 x i32>, <1 x i32>* %p + %tmp = call <1 x i32> @llvm.cttz.v1i32(<1 x i32> %a, i1 false) + store <1 x i32> %tmp, <1 x i32>* %p + ret void +} + +define void @test_v2i32(<2 x i32>* %p) { +; CHECK-LABEL: test_v2i32: +; CHECK: vldr [[D1:d[0-9]+]], [r0] +; CHECK: vmov.i32 [[D2:d[0-9]+]], #0x1 +; CHECK: vneg.s32 [[D3:d[0-9]+]], [[D1]] +; CHECK: vand [[D1]], [[D1]], [[D3]] +; CHECK: vsub.i32 [[D1]], [[D1]], [[D2]] +; CHECK: vcnt.8 [[D1]], [[D1]] +; CHECK: vpaddl.u8 [[D1]], [[D1]] +; CHECK: vpaddl.u16 [[D1]], [[D1]] +; CHECK: vstr [[D1]], [r0] + %a = load <2 x i32>, <2 x i32>* %p + %tmp = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) + store <2 x i32> %tmp, <2 x i32>* %p + ret void +} + +define void @test_v4i32(<4 x i32>* %p) { +; CHECK-LABEL: test_v4i32: +; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] +; CHECK: vmov.i32 [[Q2:q[0-9]+]], #0x1 +; CHECK: vneg.s32 [[Q3:q[0-9]+]], [[Q1:q[0-9]+]] +; CHECK: vand [[Q1]], [[Q1]], [[Q3]] +; CHECK: vsub.i32 [[Q1]], [[Q1]], [[Q2]] +; CHECK: vcnt.8 [[Q1]], [[Q1]] +; CHECK: vpaddl.u8 [[Q1]], [[Q1]] +; CHECK: vpaddl.u16 [[Q1]], [[Q1]] +; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] + %a = load <4 x i32>, <4 x i32>* %p + %tmp = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) + store <4 x i32> %tmp, <4 x i32>* %p + ret void +} + +define void @test_v1i64(<1 x i64>* %p) { +; CHECK-LABEL: test_v1i64: +; CHECK: vldr [[D1:d[0-9]+]], [r0] +; CHECK: vmov.i32 [[D2:d[0-9]+]], #0x0 +; CHECK: vmov.i64 [[D3:d[0-9]+]], #0xffffffffffffffff +; CHECK: vsub.i64 [[D2]], [[D2]], [[D1]] +; CHECK: vand [[D1]], [[D1]], [[D2]] +; CHECK: vadd.i64 [[D1]], [[D1]], [[D3]] +; CHECK: vcnt.8 [[D1]], [[D1]] +; CHECK: vpaddl.u8 [[D1]], [[D1]] +; CHECK: vpaddl.u16 [[D1]], [[D1]] +; CHECK: vpaddl.u32 [[D1]], [[D1]] +; CHECK: vstr [[D1]], [r0] + %a = load <1 x i64>, <1 x i64>* %p + %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 false) + store <1 x i64> %tmp, <1 x i64>* %p + ret void +} + +define void @test_v2i64(<2 x i64>* %p) { +; CHECK-LABEL: test_v2i64: +; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] +; CHECK: vmov.i32 [[Q2:q[0-9]+]], #0x0 +; CHECK: vmov.i64 [[Q3:q[0-9]+]], #0xffffffffffffffff +; CHECK: vsub.i64 [[Q2]], [[Q2]], [[Q1:q[0-9]+]] +; CHECK: vand [[Q1]], [[Q1]], [[Q2]] +; CHECK: vadd.i64 [[Q1]], [[Q1]], [[Q3]] +; CHECK: vcnt.8 [[Q1]], [[Q1]] +; CHECK: vpaddl.u8 [[Q1]], [[Q1]] +; CHECK: vpaddl.u16 [[Q1]], [[Q1]] +; CHECK: vpaddl.u32 [[Q1]], [[Q1]] +; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] + %a = load <2 x i64>, <2 x i64>* %p + %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) + store <2 x i64> %tmp, <2 x i64>* %p + ret void +} + +;------------------------------------------------------------------------------ + +define void @test_v1i8_zero_undef(<1 x i8>* %p) { +; CHECK-LABEL: test_v1i8_zero_undef + %a = load <1 x i8>, <1 x i8>* %p + %tmp = call <1 x i8> @llvm.cttz.v1i8(<1 x i8> %a, i1 true) + store <1 x i8> %tmp, <1 x i8>* %p + ret void +} + +define void @test_v2i8_zero_undef(<2 x i8>* %p) { +; CHECK-LABEL: test_v2i8_zero_undef: + %a = load <2 x i8>, <2 x i8>* %p + %tmp = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true) + store <2 x i8> %tmp, <2 x i8>* %p + ret void +} + +define void @test_v4i8_zero_undef(<4 x i8>* %p) { +; CHECK-LABEL: test_v4i8_zero_undef: + %a = load <4 x i8>, <4 x i8>* %p + %tmp = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true) + store <4 x i8> %tmp, <4 x i8>* %p + ret void +} + +define void @test_v8i8_zero_undef(<8 x i8>* %p) { +; CHECK-LABEL: test_v8i8_zero_undef: +; CHECK: vldr [[D1:d[0-9]+]], [r0] +; CHECK: vmov.i8 [[D2:d[0-9]+]], #0x1 +; CHECK: vneg.s8 [[D3:d[0-9]+]], [[D1]] +; CHECK: vand [[D1]], [[D1]], [[D3]] +; CHECK: vsub.i8 [[D1]], [[D1]], [[D2]] +; CHECK: vcnt.8 [[D1]], [[D1]] +; CHECK: vstr [[D1]], [r0] + %a = load <8 x i8>, <8 x i8>* %p + %tmp = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true) + store <8 x i8> %tmp, <8 x i8>* %p + ret void +} + +define void @test_v16i8_zero_undef(<16 x i8>* %p) { +; CHECK-LABEL: test_v16i8_zero_undef: +; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] +; CHECK: vmov.i8 [[Q2:q[0-9]+]], #0x1 +; CHECK: vneg.s8 [[Q3:q[0-9]+]], [[Q1:q[0-9]+]] +; CHECK: vand [[Q1]], [[Q1]], [[Q3]] +; CHECK: vsub.i8 [[Q1]], [[Q1]], [[Q2]] +; CHECK: vcnt.8 [[Q1]], [[Q1]] +; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] + %a = load <16 x i8>, <16 x i8>* %p + %tmp = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) + store <16 x i8> %tmp, <16 x i8>* %p + ret void +} + +define void @test_v1i16_zero_undef(<1 x i16>* %p) { +; CHECK-LABEL: test_v1i16_zero_undef: + %a = load <1 x i16>, <1 x i16>* %p + %tmp = call <1 x i16> @llvm.cttz.v1i16(<1 x i16> %a, i1 true) + store <1 x i16> %tmp, <1 x i16>* %p + ret void +} + +define void @test_v2i16_zero_undef(<2 x i16>* %p) { +; CHECK-LABEL: test_v2i16_zero_undef: + %a = load <2 x i16>, <2 x i16>* %p + %tmp = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true) + store <2 x i16> %tmp, <2 x i16>* %p + ret void +} + +define void @test_v4i16_zero_undef(<4 x i16>* %p) { +; CHECK-LABEL: test_v4i16_zero_undef: +; CHECK: vldr [[D1:d[0-9]+]], [r0] +; CHECK: vneg.s16 [[D2:d[0-9]+]], [[D1]] +; CHECK: vand [[D1]], [[D1]], [[D2]] +; CHECK: vmov.i16 [[D3:d[0-9]+]], #0xf +; CHECK: vclz.i16 [[D1]], [[D1]] +; CHECK: vsub.i16 [[D1]], [[D3]], [[D1]] +; CHECK: vstr [[D1]], [r0] + %a = load <4 x i16>, <4 x i16>* %p + %tmp = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true) + store <4 x i16> %tmp, <4 x i16>* %p + ret void +} + +define void @test_v8i16_zero_undef(<8 x i16>* %p) { +; CHECK-LABEL: test_v8i16_zero_undef: +; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] +; CHECK: vneg.s16 [[Q2:q[0-9]+]], [[Q1:q[0-9]+]] +; CHECK: vand [[Q1]], [[Q1]], [[Q2]] +; CHECK: vmov.i16 [[Q3:q[0-9]+]], #0xf +; CHECK: vclz.i16 [[Q1]], [[Q1]] +; CHECK: vsub.i16 [[Q1]], [[Q3]], [[Q1]] +; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] + %a = load <8 x i16>, <8 x i16>* %p + %tmp = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) + store <8 x i16> %tmp, <8 x i16>* %p + ret void +} + +define void @test_v1i32_zero_undef(<1 x i32>* %p) { +; CHECK-LABEL: test_v1i32_zero_undef: + %a = load <1 x i32>, <1 x i32>* %p + %tmp = call <1 x i32> @llvm.cttz.v1i32(<1 x i32> %a, i1 true) + store <1 x i32> %tmp, <1 x i32>* %p + ret void +} + +define void @test_v2i32_zero_undef(<2 x i32>* %p) { +; CHECK-LABEL: test_v2i32_zero_undef: +; CHECK: vldr [[D1:d[0-9]+]], [r0] +; CHECK: vneg.s32 [[D2:d[0-9]+]], [[D1]] +; CHECK: vand [[D1]], [[D1]], [[D2]] +; CHECK: vmov.i32 [[D3:d[0-9]+]], #0x1f +; CHECK: vclz.i32 [[D1]], [[D1]] +; CHECK: vsub.i32 [[D1]], [[D3]], [[D1]] +; CHECK: vstr [[D1]], [r0] + %a = load <2 x i32>, <2 x i32>* %p + %tmp = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true) + store <2 x i32> %tmp, <2 x i32>* %p + ret void +} + +define void @test_v4i32_zero_undef(<4 x i32>* %p) { +; CHECK-LABEL: test_v4i32_zero_undef: +; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] +; CHECK: vneg.s32 [[Q2:q[0-9]+]], [[Q1:q[0-9]+]] +; CHECK: vand [[Q1]], [[Q1]], [[Q2]] +; CHECK: vmov.i32 [[Q3:q[0-9]+]], #0x1f +; CHECK: vclz.i32 [[Q1]], [[Q1]] +; CHECK: vsub.i32 [[Q1]], [[Q3]], [[Q1]] +; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] + %a = load <4 x i32>, <4 x i32>* %p + %tmp = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) + store <4 x i32> %tmp, <4 x i32>* %p + ret void +} + +define void @test_v1i64_zero_undef(<1 x i64>* %p) { +; CHECK-LABEL: test_v1i64_zero_undef: +; CHECK: vldr [[D1:d[0-9]+]], [r0] +; CHECK: vmov.i32 [[D2:d[0-9]+]], #0x0 +; CHECK: vmov.i64 [[D3:d[0-9]+]], #0xffffffffffffffff +; CHECK: vsub.i64 [[D2]], [[D2]], [[D1]] +; CHECK: vand [[D1]], [[D1]], [[D2]] +; CHECK: vadd.i64 [[D1]], [[D1]], [[D3]] +; CHECK: vcnt.8 [[D1]], [[D1]] +; CHECK: vpaddl.u8 [[D1]], [[D1]] +; CHECK: vpaddl.u16 [[D1]], [[D1]] +; CHECK: vpaddl.u32 [[D1]], [[D1]] +; CHECK: vstr [[D1]], [r0] + %a = load <1 x i64>, <1 x i64>* %p + %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 true) + store <1 x i64> %tmp, <1 x i64>* %p + ret void +} + +define void @test_v2i64_zero_undef(<2 x i64>* %p) { +; CHECK-LABEL: test_v2i64_zero_undef: +; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] +; CHECK: vmov.i32 [[Q2:q[0-9]+]], #0x0 +; CHECK: vmov.i64 [[Q3:q[0-9]+]], #0xffffffffffffffff +; CHECK: vsub.i64 [[Q2]], [[Q2]], [[Q1:q[0-9]+]] +; CHECK: vand [[Q1]], [[Q1]], [[Q2]] +; CHECK: vadd.i64 [[Q1]], [[Q1]], [[Q3]] +; CHECK: vcnt.8 [[Q1]], [[Q1]] +; CHECK: vpaddl.u8 [[Q1]], [[Q1]] +; CHECK: vpaddl.u16 [[Q1]], [[Q1]] +; CHECK: vpaddl.u32 [[Q1]], [[Q1]] +; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] + %a = load <2 x i64>, <2 x i64>* %p + %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) + store <2 x i64> %tmp, <2 x i64>* %p + ret void +} diff --git a/llvm/test/CodeGen/ARM/ctz.ll b/llvm/test/CodeGen/ARM/ctz.ll deleted file mode 100644 index 2d88b0351cf..00000000000 --- a/llvm/test/CodeGen/ARM/ctz.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: llc -mtriple=arm-eabi -mattr=+v6t2 %s -o - | FileCheck %s - -declare i32 @llvm.cttz.i32(i32, i1) - -define i32 @f1(i32 %a) { -; CHECK-LABEL: f1: -; CHECK: rbit -; CHECK: clz - %tmp = call i32 @llvm.cttz.i32( i32 %a, i1 true ) - ret i32 %tmp -} |