diff options
| author | Oliver Cruickshank <oliver.cruickshank@arm.com> | 2019-09-16 15:19:56 +0000 | 
|---|---|---|
| committer | Oliver Cruickshank <oliver.cruickshank@arm.com> | 2019-09-16 15:19:56 +0000 | 
| commit | 5f799ef1627f6f4f548f411a40fb94c620af25b6 (patch) | |
| tree | 6165f0798a07f4266bab061ae99a88063b0bbe7f | |
| parent | cd1a0b92710e567c00f6d2b932b197e9a1773f7d (diff) | |
| download | bcm5719-llvm-5f799ef1627f6f4f548f411a40fb94c620af25b6.tar.gz bcm5719-llvm-5f799ef1627f6f4f548f411a40fb94c620af25b6.zip  | |
[ARM] Lower CTTZ on MVE
Lower CTTZ on MVE using VBRSR and VCLS which will reverse the bits and
count the leading zeros, equivalent to a count trailing zeros (CTTZ).
llvm-svn: 372000
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-cttz.ll | 178 | 
2 files changed, 180 insertions, 2 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index f7d5496303a..d8e3b0973b9 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -262,6 +262,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {      setOperationAction(ISD::MLOAD, VT, Custom);      setOperationAction(ISD::MSTORE, VT, Legal);      setOperationAction(ISD::CTLZ, VT, Legal); +    setOperationAction(ISD::CTTZ, VT, Expand);      // No native support for these.      setOperationAction(ISD::UDIV, VT, Expand); @@ -5805,8 +5806,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,                           const ARMSubtarget *ST) {    SDLoc dl(N);    EVT VT = N->getValueType(0); -  if (VT.isVector()) { -    assert(ST->hasNEON()); +  if (VT.isVector() && ST->hasNEON()) {      // Compute the least significant set bit: LSB = X & -X      SDValue X = N->getOperand(0); diff --git a/llvm/test/CodeGen/Thumb2/mve-cttz.ll b/llvm/test/CodeGen/Thumb2/mve-cttz.ll new file mode 100644 index 00000000000..c30906fdbb1 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-cttz.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -verify-machineinstrs -mattr=+mve %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <2 x i64> @cttz_2i64_0_t(<2 x i64> %src){ +; CHECK-LABEL: cttz_2i64_0_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov r0, s2 +; CHECK-NEXT:    cmp r0, #0 +; CHECK-NEXT:    rbit r0, r0 +; CHECK-NEXT:    cset r1, ne +; CHECK-NEXT:    lsls r1, r1, #31 +; CHECK-NEXT:    vmov r1, s3 +; CHECK-NEXT:    rbit r1, r1 +; CHECK-NEXT:    clz r1, r1 +; CHECK-NEXT:    add.w r1, r1, #32 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    clzne r1, r0 +; CHECK-NEXT:    vmov r0, s0 +; CHECK-NEXT:    vmov s6, r1 +; CHECK-NEXT:    cmp r0, #0 +; CHECK-NEXT:    rbit r0, r0 +; CHECK-NEXT:    cset r1, ne +; CHECK-NEXT:    lsls r1, r1, #31 +; CHECK-NEXT:    vmov r1, s1 +; CHECK-NEXT:    rbit r1, r1 +; CHECK-NEXT:    clz r1, r1 +; CHECK-NEXT:    add.w r1, r1, #32 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    clzne r1, r0 +; CHECK-NEXT:    vmov s4, r1 +; CHECK-NEXT:    vldr s5, .LCPI0_0 +; CHECK-NEXT:    vmov.f32 s7, s5 +; CHECK-NEXT:    vmov q0, q1 +; CHECK-NEXT:    bx lr +; CHECK-NEXT:    .p2align 2 +; CHECK-NEXT:  @ %bb.1: +; CHECK-NEXT:  .LCPI0_0: +; CHECK-NEXT:    .long 0 @ float 0 +entry: +  %0 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %src, i1 0) +  ret <2 x i64> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @cttz_4i32_0_t(<4 x i32> %src){ +; CHECK-LABEL: cttz_4i32_0_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov.i32 q1, #0x1 +; CHECK-NEXT:    vsub.i32 q1, q0, q1 +; CHECK-NEXT:    vbic q0, q1, q0 +; CHECK-NEXT:    vmov.i32 q1, #0x20 +; CHECK-NEXT:    vclz.i32 q0, q0 +; CHECK-NEXT:    vsub.i32 q0, q1, q0 +; CHECK-NEXT:    bx lr +entry: +  %0 = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %src, i1 0) +  ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @cttz_8i16_0_t(<8 x i16> %src){ +; CHECK-LABEL: cttz_8i16_0_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov.i16 q1, #0x1 +; CHECK-NEXT:    vsub.i16 q1, q0, q1 +; CHECK-NEXT:    vbic q0, q1, q0 +; CHECK-NEXT:    vmov.i16 q1, #0x10 +; CHECK-NEXT:    vclz.i16 q0, q0 +; CHECK-NEXT:    vsub.i16 q0, q1, q0 +; CHECK-NEXT:    bx lr +entry: +  %0 = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %src, i1 0) +  ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @cttz_16i8_0_t(<16 x i8> %src) { +; CHECK-LABEL: cttz_16i8_0_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov.i8 q1, #0x1 +; CHECK-NEXT:    vsub.i8 q1, q0, q1 +; CHECK-NEXT:    vbic q0, q1, q0 +; CHECK-NEXT:    vmov.i8 q1, #0x8 +; CHECK-NEXT:    vclz.i8 q0, q0 +; CHECK-NEXT:    vsub.i8 q0, q1, q0 +; CHECK-NEXT:    bx lr +entry: +  %0 = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %src, i1 0) +  ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <2 x i64> @cttz_2i64_1_t(<2 x i64> %src){ +; CHECK-LABEL: cttz_2i64_1_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov r0, s2 +; CHECK-NEXT:    cmp r0, #0 +; CHECK-NEXT:    rbit r0, r0 +; CHECK-NEXT:    cset r1, ne +; CHECK-NEXT:    lsls r1, r1, #31 +; CHECK-NEXT:    vmov r1, s3 +; CHECK-NEXT:    rbit r1, r1 +; CHECK-NEXT:    clz r1, r1 +; CHECK-NEXT:    add.w r1, r1, #32 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    clzne r1, r0 +; CHECK-NEXT:    vmov r0, s0 +; CHECK-NEXT:    vmov s6, r1 +; CHECK-NEXT:    cmp r0, #0 +; CHECK-NEXT:    rbit r0, r0 +; CHECK-NEXT:    cset r1, ne +; CHECK-NEXT:    lsls r1, r1, #31 +; CHECK-NEXT:    vmov r1, s1 +; CHECK-NEXT:    rbit r1, r1 +; CHECK-NEXT:    clz r1, r1 +; CHECK-NEXT:    add.w r1, r1, #32 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    clzne r1, r0 +; CHECK-NEXT:    vmov s4, r1 +; CHECK-NEXT:    vldr s5, .LCPI4_0 +; CHECK-NEXT:    vmov.f32 s7, s5 +; CHECK-NEXT:    vmov q0, q1 +; CHECK-NEXT:    bx lr +; CHECK-NEXT:    .p2align 2 +; CHECK-NEXT:  @ %bb.1: +; CHECK-NEXT:  .LCPI4_0: +; CHECK-NEXT:    .long 0 @ float 0 +entry: +  %0 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %src, i1 1) +  ret <2 x i64> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @cttz_4i32_1_t(<4 x i32> %src){ +; CHECK-LABEL: cttz_4i32_1_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov.i32 q1, #0x1 +; CHECK-NEXT:    vsub.i32 q1, q0, q1 +; CHECK-NEXT:    vbic q0, q1, q0 +; CHECK-NEXT:    vmov.i32 q1, #0x20 +; CHECK-NEXT:    vclz.i32 q0, q0 +; CHECK-NEXT:    vsub.i32 q0, q1, q0 +; CHECK-NEXT:    bx lr +entry: +  %0 = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %src, i1 1) +  ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @cttz_8i16_1_t(<8 x i16> %src){ +; CHECK-LABEL: cttz_8i16_1_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov.i16 q1, #0x1 +; CHECK-NEXT:    vsub.i16 q1, q0, q1 +; CHECK-NEXT:    vbic q0, q1, q0 +; CHECK-NEXT:    vmov.i16 q1, #0x10 +; CHECK-NEXT:    vclz.i16 q0, q0 +; CHECK-NEXT:    vsub.i16 q0, q1, q0 +; CHECK-NEXT:    bx lr +entry: +  %0 = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %src, i1 1) +  ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @cttz_16i8_1_t(<16 x i8> %src) { +; CHECK-LABEL: cttz_16i8_1_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov.i8 q1, #0x1 +; CHECK-NEXT:    vsub.i8 q1, q0, q1 +; CHECK-NEXT:    vbic q0, q1, q0 +; CHECK-NEXT:    vmov.i8 q1, #0x8 +; CHECK-NEXT:    vclz.i8 q0, q0 +; CHECK-NEXT:    vsub.i8 q0, q1, q0 +; CHECK-NEXT:    bx lr +entry: +  %0 = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %src, i1 1) +  ret <16 x i8> %0 +} + + +declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) +declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) +declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1) +declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)  | 

