diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMInstrMVE.td | 14 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-abs.ll | 47 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-bitarith.ll | 65 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-div-expand.ll | 194 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-fmath.ll | 250 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll | 57 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-frint.ll | 150 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-minmax.ll | 215 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-neg.ll | 23 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-sext.ll | 47 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-shifts.ll | 205 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-shuffle.ll | 146 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-simple-arith.ll | 166 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll | 68 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-vcvt.ll | 108 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-vdup.ll | 49 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-vmovimm.ll | 89 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll | 16 | 
19 files changed, 1875 insertions, 40 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index a67adde262d..4eb8e0738a9 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -320,6 +320,10 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);      setOperationAction(ISD::BUILD_VECTOR, VT, Custom);    } +  // We can do bitwise operations on v2i64 vectors +  setOperationAction(ISD::AND, MVT::v2i64, Legal); +  setOperationAction(ISD::OR, MVT::v2i64, Legal); +  setOperationAction(ISD::XOR, MVT::v2i64, Legal);    // It is legal to extload from v4i8 to v4i16 or v4i32.    addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal); @@ -12855,6 +12859,8 @@ static SDValue PerformShiftCombine(SDNode *N,    const TargetLowering &TLI = DAG.getTargetLoweringInfo();    if (!VT.isVector() || !TLI.isTypeLegal(VT))      return SDValue(); +  if (ST->hasMVEIntegerOps() && VT == MVT::v2i64) +    return SDValue();    int64_t Cnt; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 1880daa6294..3e7ae55c7fc 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1035,6 +1035,8 @@ let Predicates = [HasMVEInt] in {              (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;    def : Pat<(v4i32 (vnotq  (v4i32 MQPR:$val1))),              (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>; +  def : Pat<(v2i64 (vnotq  (v2i64 MQPR:$val1))), +            (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>;  }  class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28> @@ -1081,6 +1083,8 @@ let Predicates = [HasMVEInt] in {              (v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;    def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),              (v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; +  def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))), +            (v2i64 (MVE_VAND (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;    def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),              (v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; @@ -1088,6 +1092,8 @@ let Predicates = [HasMVEInt] in {              (v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;    def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),              (v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; +  def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))), +            (v2i64 (MVE_VORR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;    def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),              (v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; @@ -1095,6 +1101,8 @@ let Predicates = [HasMVEInt] in {              (v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;    def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),              (v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; +  def : Pat<(v2i64 (xor (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))), +            (v2i64 (MVE_VEOR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;    def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),              (v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; @@ -1102,13 +1110,17 @@ let Predicates = [HasMVEInt] in {              (v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;    def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),              (v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; +  def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (vnotq MQPR:$val2))), +            (v2i64 (MVE_VBIC (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; -  def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq (v16i8 MQPR:$val2)))), +  def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),              (v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;    def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),              (v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;    def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),              (v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; +  def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (vnotq MQPR:$val2))), +            (v2i64 (MVE_VORN (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;  }  class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps> diff --git a/llvm/test/CodeGen/Thumb2/mve-abs.ll b/llvm/test/CodeGen/Thumb2/mve-abs.ll index e0af56abf12..6e2100e2f46 100644 --- a/llvm/test/CodeGen/Thumb2/mve-abs.ll +++ b/llvm/test/CodeGen/Thumb2/mve-abs.ll @@ -36,3 +36,50 @@ entry:    %2 = select <4 x i1> %0, <4 x i32> %1, <4 x i32> %s1    ret <4 x i32> %2  } + +define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) { +; CHECK-LABEL: abs_v2i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r4, r5, r6, r7, lr} +; CHECK-NEXT:    push {r4, r5, r6, r7, lr} +; CHECK-NEXT:    vmov r12, s2 +; CHECK-NEXT:    movs r0, #0 +; CHECK-NEXT:    vmov r3, s3 +; CHECK-NEXT:    vmov r1, s0 +; CHECK-NEXT:    rsbs.w lr, r12, #0 +; CHECK-NEXT:    sbc.w r5, r0, r3 +; CHECK-NEXT:    cmp r3, #0 +; CHECK-NEXT:    mov r2, lr +; CHECK-NEXT:    lsrl r2, r5, #32 +; CHECK-NEXT:    mov.w r5, #0 +; CHECK-NEXT:    it mi +; CHECK-NEXT:    movmi r5, #1 +; CHECK-NEXT:    cmp r5, #0 +; CHECK-NEXT:    it eq +; CHECK-NEXT:    moveq r2, r3 +; CHECK-NEXT:    vmov r3, s1 +; CHECK-NEXT:    rsbs r4, r1, #0 +; CHECK-NEXT:    mov r6, r4 +; CHECK-NEXT:    sbc.w r7, r0, r3 +; CHECK-NEXT:    cmp r3, #0 +; CHECK-NEXT:    lsrl r6, r7, #32 +; CHECK-NEXT:    it mi +; CHECK-NEXT:    movmi r0, #1 +; CHECK-NEXT:    cmp r0, #0 +; CHECK-NEXT:    ite eq +; CHECK-NEXT:    moveq r6, r3 +; CHECK-NEXT:    movne r1, r4 +; CHECK-NEXT:    vmov.32 q0[0], r1 +; CHECK-NEXT:    cmp r5, #0 +; CHECK-NEXT:    vmov.32 q0[1], r6 +; CHECK-NEXT:    it eq +; CHECK-NEXT:    moveq lr, r12 +; CHECK-NEXT:    vmov.32 q0[2], lr +; CHECK-NEXT:    vmov.32 q0[3], r2 +; CHECK-NEXT:    pop {r4, r5, r6, r7, pc} +entry: +  %0 = icmp slt <2 x i64> %s1, zeroinitializer +  %1 = sub nsw <2 x i64> zeroinitializer, %s1 +  %2 = select <2 x i1> %0, <2 x i64> %1, <2 x i64> %s1 +  ret <2 x i64> %2 +} diff --git a/llvm/test/CodeGen/Thumb2/mve-bitarith.ll b/llvm/test/CodeGen/Thumb2/mve-bitarith.ll index 1ee57124a60..30981816922 100644 --- a/llvm/test/CodeGen/Thumb2/mve-bitarith.ll +++ b/llvm/test/CodeGen/Thumb2/mve-bitarith.ll @@ -31,6 +31,16 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @and_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: and_int64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vand q0, q0, q1 +; CHECK-NEXT:    bx lr +entry: +  %0 = and <2 x i64> %src1, %src2 +  ret <2 x i64> %0 +} +  define arm_aapcs_vfpcc <16 x i8> @or_int8_t(<16 x i8> %src1, <16 x i8> %src2) {  ; CHECK-LABEL: or_int8_t: @@ -62,6 +72,16 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @or_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: or_int64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vorr q0, q0, q1 +; CHECK-NEXT:    bx lr +entry: +  %0 = or <2 x i64> %src1, %src2 +  ret <2 x i64> %0 +} +  define arm_aapcs_vfpcc <16 x i8> @xor_int8_t(<16 x i8> %src1, <16 x i8> %src2) {  ; CHECK-LABEL: xor_int8_t: @@ -93,6 +113,16 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @xor_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: xor_int64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    veor q0, q0, q1 +; CHECK-NEXT:    bx lr +entry: +  %0 = xor <2 x i64> %src1, %src2 +  ret <2 x i64> %0 +} +  define arm_aapcs_vfpcc <16 x i8> @v_mvn_i8(<16 x i8> %src) {  ; CHECK-LABEL: v_mvn_i8:  ; CHECK:       @ %bb.0: @ %entry @@ -123,6 +153,17 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @v_mvn_i64(<2 x i64> %src) { +; CHECK-LABEL: v_mvn_i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmvn q0, q0 +; CHECK-NEXT:    bx lr +entry: +  %0 = xor <2 x i64> %src, <i64 -1, i64 -1> +  ret <2 x i64> %0 +} + +  define arm_aapcs_vfpcc <16 x i8> @v_bic_i8(<16 x i8> %src1, <16 x i8> %src2) {  ; CHECK-LABEL: v_bic_i8:  ; CHECK:       @ %bb.0: @ %entry @@ -156,6 +197,18 @@ entry:    ret <4 x i32> %1  } +define arm_aapcs_vfpcc <2 x i64> @v_bic_i64(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: v_bic_i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vbic q0, q1, q0 +; CHECK-NEXT:    bx lr +entry: +  %0 = xor <2 x i64> %src1, <i64 -1, i64 -1> +  %1 = and <2 x i64> %src2, %0 +  ret <2 x i64> %1 +} + +  define arm_aapcs_vfpcc <16 x i8> @v_or_i8(<16 x i8> %src1, <16 x i8> %src2) {  ; CHECK-LABEL: v_or_i8:  ; CHECK:       @ %bb.0: @ %entry @@ -188,3 +241,15 @@ entry:    %1 = or <4 x i32> %src2, %0    ret <4 x i32> %1  } + +define arm_aapcs_vfpcc <2 x i64> @v_or_i64(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: v_or_i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vorn q0, q1, q0 +; CHECK-NEXT:    bx lr +entry: +  %0 = xor <2 x i64> %src1, <i64 -1, i64 -1> +  %1 = or <2 x i64> %src2, %0 +  ret <2 x i64> %1 +} + diff --git a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll index 02f2225c177..e0dddcd273c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll +++ b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll @@ -736,6 +736,144 @@ entry:    ret <16 x i8> %out  } +define arm_aapcs_vfpcc <2 x i64> @udiv_i64(<2 x i64> %in1, <2 x i64> %in2) { +; CHECK-LABEL: udiv_i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9, d10, d11} +; CHECK-NEXT:    vpush {d8, d9, d10, d11} +; CHECK-NEXT:    vmov q4, q1 +; CHECK-NEXT:    vmov q5, q0 +; CHECK-NEXT:    vmov r0, s20 +; CHECK-NEXT:    vmov r1, s21 +; CHECK-NEXT:    vmov r2, s16 +; CHECK-NEXT:    vmov r3, s17 +; CHECK-NEXT:    bl __aeabi_uldivmod +; CHECK-NEXT:    vmov r12, s22 +; CHECK-NEXT:    vmov lr, s23 +; CHECK-NEXT:    vmov r2, s18 +; CHECK-NEXT:    vmov r3, s19 +; CHECK-NEXT:    vmov.32 q4[0], r0 +; CHECK-NEXT:    lsrl r0, r1, #32 +; CHECK-NEXT:    vmov.32 q4[1], r0 +; CHECK-NEXT:    mov r0, r12 +; CHECK-NEXT:    mov r1, lr +; CHECK-NEXT:    bl __aeabi_uldivmod +; CHECK-NEXT:    vmov.32 q4[2], r0 +; CHECK-NEXT:    lsrl r0, r1, #32 +; CHECK-NEXT:    vmov.32 q4[3], r0 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9, d10, d11} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %out = udiv <2 x i64> %in1, %in2 +  ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @sdiv_i64(<2 x i64> %in1, <2 x i64> %in2) { +; CHECK-LABEL: sdiv_i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9, d10, d11} +; CHECK-NEXT:    vpush {d8, d9, d10, d11} +; CHECK-NEXT:    vmov q4, q1 +; CHECK-NEXT:    vmov q5, q0 +; CHECK-NEXT:    vmov r0, s20 +; CHECK-NEXT:    vmov r1, s21 +; CHECK-NEXT:    vmov r2, s16 +; CHECK-NEXT:    vmov r3, s17 +; CHECK-NEXT:    bl __aeabi_ldivmod +; CHECK-NEXT:    vmov r12, s22 +; CHECK-NEXT:    vmov lr, s23 +; CHECK-NEXT:    vmov r2, s18 +; CHECK-NEXT:    vmov r3, s19 +; CHECK-NEXT:    vmov.32 q4[0], r0 +; CHECK-NEXT:    lsrl r0, r1, #32 +; CHECK-NEXT:    vmov.32 q4[1], r0 +; CHECK-NEXT:    mov r0, r12 +; CHECK-NEXT:    mov r1, lr +; CHECK-NEXT:    bl __aeabi_ldivmod +; CHECK-NEXT:    vmov.32 q4[2], r0 +; CHECK-NEXT:    lsrl r0, r1, #32 +; CHECK-NEXT:    vmov.32 q4[3], r0 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9, d10, d11} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %out = sdiv <2 x i64> %in1, %in2 +  ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @urem_i64(<2 x i64> %in1, <2 x i64> %in2) { +; CHECK-LABEL: urem_i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9, d10, d11} +; CHECK-NEXT:    vpush {d8, d9, d10, d11} +; CHECK-NEXT:    vmov q4, q1 +; CHECK-NEXT:    vmov q5, q0 +; CHECK-NEXT:    vmov r0, s20 +; CHECK-NEXT:    vmov r1, s21 +; CHECK-NEXT:    vmov r2, s16 +; CHECK-NEXT:    vmov r3, s17 +; CHECK-NEXT:    bl __aeabi_uldivmod +; CHECK-NEXT:    vmov r12, s18 +; CHECK-NEXT:    vmov lr, s19 +; CHECK-NEXT:    vmov.32 q4[0], r2 +; CHECK-NEXT:    vmov r0, s22 +; CHECK-NEXT:    vmov.32 q4[1], r3 +; CHECK-NEXT:    vmov r1, s23 +; CHECK-NEXT:    mov r2, r12 +; CHECK-NEXT:    mov r3, lr +; CHECK-NEXT:    bl __aeabi_uldivmod +; CHECK-NEXT:    vmov.32 q4[2], r2 +; CHECK-NEXT:    vmov.32 q4[3], r3 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9, d10, d11} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %out = urem <2 x i64> %in1, %in2 +  ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @srem_i64(<2 x i64> %in1, <2 x i64> %in2) { +; CHECK-LABEL: srem_i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9, d10, d11} +; CHECK-NEXT:    vpush {d8, d9, d10, d11} +; CHECK-NEXT:    vmov q4, q1 +; CHECK-NEXT:    vmov q5, q0 +; CHECK-NEXT:    vmov r0, s20 +; CHECK-NEXT:    vmov r1, s21 +; CHECK-NEXT:    vmov r2, s16 +; CHECK-NEXT:    vmov r3, s17 +; CHECK-NEXT:    bl __aeabi_ldivmod +; CHECK-NEXT:    vmov r12, s18 +; CHECK-NEXT:    vmov lr, s19 +; CHECK-NEXT:    vmov.32 q4[0], r2 +; CHECK-NEXT:    vmov r0, s22 +; CHECK-NEXT:    vmov.32 q4[1], r3 +; CHECK-NEXT:    vmov r1, s23 +; CHECK-NEXT:    mov r2, r12 +; CHECK-NEXT:    mov r3, lr +; CHECK-NEXT:    bl __aeabi_ldivmod +; CHECK-NEXT:    vmov.32 q4[2], r2 +; CHECK-NEXT:    vmov.32 q4[3], r3 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9, d10, d11} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %out = srem <2 x i64> %in1, %in2 +  ret <2 x i64> %out +} + + +  define arm_aapcs_vfpcc <4 x float> @fdiv_f32(<4 x float> %in1, <4 x float> %in2) {  ; CHECK-LABEL: fdiv_f32: @@ -992,3 +1130,59 @@ entry:    %out = frem <8 x half> %in1, %in2    ret <8 x half> %out  } + +define arm_aapcs_vfpcc <2 x double> @fdiv_f64(<2 x double> %in1, <2 x double> %in2) { +; CHECK-LABEL: fdiv_f64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9, d10, d11} +; CHECK-NEXT:    vpush {d8, d9, d10, d11} +; CHECK-NEXT:    vmov q4, q1 +; CHECK-NEXT:    vmov q5, q0 +; CHECK-NEXT:    vmov r0, r1, d11 +; CHECK-NEXT:    vmov r2, r3, d9 +; CHECK-NEXT:    bl __aeabi_ddiv +; CHECK-NEXT:    vmov lr, r12, d10 +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, lr +; CHECK-NEXT:    mov r1, r12 +; CHECK-NEXT:    bl __aeabi_ddiv +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9, d10, d11} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %out = fdiv <2 x double> %in1, %in2 +  ret <2 x double> %out +} + +define arm_aapcs_vfpcc <2 x double> @frem_f64(<2 x double> %in1, <2 x double> %in2) { +; CHECK-LABEL: frem_f64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9, d10, d11} +; CHECK-NEXT:    vpush {d8, d9, d10, d11} +; CHECK-NEXT:    vmov q4, q1 +; CHECK-NEXT:    vmov q5, q0 +; CHECK-NEXT:    vmov r0, r1, d11 +; CHECK-NEXT:    vmov r2, r3, d9 +; CHECK-NEXT:    bl fmod +; CHECK-NEXT:    vmov lr, r12, d10 +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, lr +; CHECK-NEXT:    mov r1, r12 +; CHECK-NEXT:    bl fmod +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9, d10, d11} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %out = frem <2 x double> %in1, %in2 +  ret <2 x double> %out +} + + diff --git a/llvm/test/CodeGen/Thumb2/mve-fmath.ll b/llvm/test/CodeGen/Thumb2/mve-fmath.ll index 41054e2d34d..31956979dd9 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fmath.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fmath.ll @@ -66,6 +66,30 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @sqrt_float64_t(<2 x double> %src) { +; CHECK-LABEL: sqrt_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    bl sqrt +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl sqrt +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %src) +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <4 x float> @cos_float32_t(<4 x float> %src) {  ; CHECK-LABEL: cos_float32_t:  ; CHECK:       @ %bb.0: @ %entry @@ -198,6 +222,30 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @cos_float64_t(<2 x double> %src) { +; CHECK-LABEL: cos_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    bl cos +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl cos +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %src) +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <4 x float> @sin_float32_t(<4 x float> %src) {  ; CHECK-LABEL: sin_float32_t:  ; CHECK:       @ %bb.0: @ %entry @@ -330,6 +378,30 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @sin_float64_t(<2 x double> %src) { +; CHECK-LABEL: sin_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    bl sin +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl sin +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %src) +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) {  ; CHECK-LABEL: exp_float32_t:  ; CHECK:       @ %bb.0: @ %entry @@ -462,6 +534,30 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @exp_float64_t(<2 x double> %src) { +; CHECK-LABEL: exp_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    bl exp +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl exp +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %src) +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <4 x float> @exp2_float32_t(<4 x float> %src) {  ; CHECK-LABEL: exp2_float32_t:  ; CHECK:       @ %bb.0: @ %entry @@ -594,6 +690,30 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @exp2_float64_t(<2 x double> %src) { +; CHECK-LABEL: exp2_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    bl exp2 +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl exp2 +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %src) +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <4 x float> @log_float32_t(<4 x float> %src) {  ; CHECK-LABEL: log_float32_t:  ; CHECK:       @ %bb.0: @ %entry @@ -726,6 +846,30 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @log_float64_t(<2 x double> %src) { +; CHECK-LABEL: log_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    bl log +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl log +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %src) +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <4 x float> @log2_float32_t(<4 x float> %src) {  ; CHECK-LABEL: log2_float32_t:  ; CHECK:       @ %bb.0: @ %entry @@ -858,6 +1002,30 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @log2_float64_t(<2 x double> %src) { +; CHECK-LABEL: log2_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    bl log2 +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl log2 +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %src) +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <4 x float> @log10_float32_t(<4 x float> %src) {  ; CHECK-LABEL: log10_float32_t:  ; CHECK:       @ %bb.0: @ %entry @@ -990,6 +1158,30 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @log10_float64_t(<2 x double> %src) { +; CHECK-LABEL: log10_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    bl log10 +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl log10 +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %src) +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <4 x float> @pow_float32_t(<4 x float> %src1, <4 x float> %src2) {  ; CHECK-LABEL: pow_float32_t:  ; CHECK:       @ %bb.0: @ %entry @@ -1165,6 +1357,33 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @pow_float64_t(<2 x double> %src1, <2 x double> %src2) { +; CHECK-LABEL: pow_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9, d10, d11} +; CHECK-NEXT:    vpush {d8, d9, d10, d11} +; CHECK-NEXT:    vmov q4, q1 +; CHECK-NEXT:    vmov q5, q0 +; CHECK-NEXT:    vmov r0, r1, d11 +; CHECK-NEXT:    vmov r2, r3, d9 +; CHECK-NEXT:    bl pow +; CHECK-NEXT:    vmov lr, r12, d10 +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, lr +; CHECK-NEXT:    mov r1, r12 +; CHECK-NEXT:    bl pow +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9, d10, d11} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %src1, <2 x double> %src2) +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <4 x float> @copysign_float32_t(<4 x float> %src1, <4 x float> %src2) {  ; CHECK-LABEL: copysign_float32_t:  ; CHECK:       @ %bb.0: @ %entry @@ -1340,6 +1559,27 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @copysign_float64_t(<2 x double> %src1, <2 x double> %src2) { +; CHECK-LABEL: copysign_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    vmov r0, r1, d3 +; CHECK-NEXT:    vmov r0, lr, d2 +; CHECK-NEXT:    vmov r0, r3, d1 +; CHECK-NEXT:    vmov r12, r2, d0 +; CHECK-NEXT:    lsrs r1, r1, #31 +; CHECK-NEXT:    bfi r3, r1, #31, #1 +; CHECK-NEXT:    lsr.w r1, lr, #31 +; CHECK-NEXT:    bfi r2, r1, #31, #1 +; CHECK-NEXT:    vmov d1, r0, r3 +; CHECK-NEXT:    vmov d0, r12, r2 +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> %src1, <2 x double> %src2) +  ret <2 x double> %0 +} +  declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)  declare <4 x float> @llvm.cos.v4f32(<4 x float>)  declare <4 x float> @llvm.sin.v4f32(<4 x float>) @@ -1360,4 +1600,14 @@ declare <8 x half> @llvm.log2.v8f16(<8 x half>)  declare <8 x half> @llvm.log10.v8f16(<8 x half>)  declare <8 x half> @llvm.pow.v8f16(<8 x half>, <8 x half>)  declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) +declare <2 x double> @llvm.cos.v2f64(<2 x double>) +declare <2 x double> @llvm.sin.v2f64(<2 x double>) +declare <2 x double> @llvm.exp.v2f64(<2 x double>) +declare <2 x double> @llvm.exp2.v2f64(<2 x double>) +declare <2 x double> @llvm.log.v2f64(<2 x double>) +declare <2 x double> @llvm.log2.v2f64(<2 x double>) +declare <2 x double> @llvm.log10.v2f64(<2 x double>) +declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) diff --git a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll index 2489646ad6d..d1fd4b26762 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll @@ -76,6 +76,39 @@ entry:    ret <4 x float> %0  } +define arm_aapcs_vfpcc <2 x double> @fneg_float64_t(<2 x double> %src) { +; CHECK-LABEL: fneg_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r4, r5, r7, lr} +; CHECK-NEXT:    push {r4, r5, r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vldr d0, .LCPI2_0 +; CHECK-NEXT:    vmov r2, r3, d9 +; CHECK-NEXT:    vmov r4, r5, d0 +; CHECK-NEXT:    mov r0, r4 +; CHECK-NEXT:    mov r1, r5 +; CHECK-NEXT:    bl __aeabi_dsub +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r4 +; CHECK-NEXT:    mov r1, r5 +; CHECK-NEXT:    bl __aeabi_dsub +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r4, r5, r7, pc} +; CHECK-NEXT:    .p2align 3 +; CHECK-NEXT:  @ %bb.1: +; CHECK-NEXT:  .LCPI2_0: +; CHECK-NEXT:    .long 0 @ double -0 +; CHECK-NEXT:    .long 2147483648 +entry: +  %0 = fsub nnan ninf nsz <2 x double> <double 0.0e0, double 0.0e0>, %src +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <8 x half> @fabs_float16_t(<8 x half> %src) {  ; CHECK-MVE-LABEL: fabs_float16_t:  ; CHECK-MVE:       @ %bb.0: @ %entry @@ -150,6 +183,30 @@ entry:    ret <4 x float> %0  } +define arm_aapcs_vfpcc <2 x double> @fabs_float64_t(<2 x double> %src) { +; CHECK-LABEL: fabs_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vldr d2, .LCPI5_0 +; CHECK-NEXT:    vmov r12, r3, d0 +; CHECK-NEXT:    vmov r0, r1, d2 +; CHECK-NEXT:    vmov r0, r2, d1 +; CHECK-NEXT:    lsrs r1, r1, #31 +; CHECK-NEXT:    bfi r2, r1, #31, #1 +; CHECK-NEXT:    bfi r3, r1, #31, #1 +; CHECK-NEXT:    vmov d1, r0, r2 +; CHECK-NEXT:    vmov d0, r12, r3 +; CHECK-NEXT:    bx lr +; CHECK-NEXT:    .p2align 3 +; CHECK-NEXT:  @ %bb.1: +; CHECK-NEXT:  .LCPI5_0: +; CHECK-NEXT:    .long 0 @ double 0 +; CHECK-NEXT:    .long 0 +entry: +  %0 = call nnan ninf nsz <2 x double> @llvm.fabs.v2f64(<2 x double> %src) +  ret <2 x double> %0 +} +  declare <4 x float> @llvm.fabs.v4f32(<4 x float>)  declare <8 x half> @llvm.fabs.v8f16(<8 x half>) +declare <2 x double> @llvm.fabs.v2f64(<2 x double>) diff --git a/llvm/test/CodeGen/Thumb2/mve-frint.ll b/llvm/test/CodeGen/Thumb2/mve-frint.ll index 847d7ede1d7..e1758d5ed3b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-frint.ll +++ b/llvm/test/CodeGen/Thumb2/mve-frint.ll @@ -76,6 +76,30 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @fceil_float64_t(<2 x double> %src) { +; CHECK-LABEL: fceil_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    bl ceil +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl ceil +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.ceil.v2f64(<2 x double> %src) +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <4 x float> @ftrunc_float32_t(<4 x float> %src) {  ; CHECK-MVE-LABEL: ftrunc_float32_t:  ; CHECK-MVE:       @ %bb.0: @ %entry @@ -150,6 +174,30 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @ftrunc_float64_t(<2 x double> %src) { +; CHECK-LABEL: ftrunc_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    bl trunc +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl trunc +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.trunc.v2f64(<2 x double> %src) +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <4 x float> @frint_float32_t(<4 x float> %src) {  ; CHECK-MVE-LABEL: frint_float32_t:  ; CHECK-MVE:       @ %bb.0: @ %entry @@ -224,6 +272,30 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @frint_float64_t(<2 x double> %src) { +; CHECK-LABEL: frint_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    bl rint +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl rint +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.rint.v2f64(<2 x double> %src) +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <4 x float> @fnearbyint_float32_t(<4 x float> %src) {  ; CHECK-LABEL: fnearbyint_float32_t:  ; CHECK:       @ %bb.0: @ %entry @@ -288,6 +360,30 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @fnearbyint_float64_t(<2 x double> %src) { +; CHECK-LABEL: fnearbyint_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    bl nearbyint +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl nearbyint +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.nearbyint.v2f64(<2 x double> %src) +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <4 x float> @ffloor_float32_t(<4 x float> %src) {  ; CHECK-MVE-LABEL: ffloor_float32_t:  ; CHECK-MVE:       @ %bb.0: @ %entry @@ -362,6 +458,30 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @ffloor_float64_t(<2 x double> %src) { +; CHECK-LABEL: ffloor_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    bl floor +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl floor +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %src) +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <4 x float> @fround_float32_t(<4 x float> %src) {  ; CHECK-MVE-LABEL: fround_float32_t:  ; CHECK-MVE:       @ %bb.0: @ %entry @@ -436,6 +556,30 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @fround_float64_t(<2 x double> %src) { +; CHECK-LABEL: fround_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    bl round +; CHECK-NEXT:    vmov r2, r3, d8 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl round +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = call fast <2 x double> @llvm.round.v2f64(<2 x double> %src) +  ret <2 x double> %0 +} +  declare <4 x float> @llvm.ceil.v4f32(<4 x float>)  declare <4 x float> @llvm.trunc.v4f32(<4 x float>)  declare <4 x float> @llvm.rint.v4f32(<4 x float>) @@ -448,3 +592,9 @@ declare <8 x half> @llvm.rint.v8f16(<8 x half>)  declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)  declare <8 x half> @llvm.floor.v8f16(<8 x half>)  declare <8 x half> @llvm.round.v8f16(<8 x half>) +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) +declare <2 x double> @llvm.rint.v2f64(<2 x double>) +declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) +declare <2 x double> @llvm.floor.v2f64(<2 x double>) +declare <2 x double> @llvm.round.v2f64(<2 x double>) diff --git a/llvm/test/CodeGen/Thumb2/mve-minmax.ll b/llvm/test/CodeGen/Thumb2/mve-minmax.ll index 38990d35717..0b6308f8800 100644 --- a/llvm/test/CodeGen/Thumb2/mve-minmax.ll +++ b/llvm/test/CodeGen/Thumb2/mve-minmax.ll @@ -35,6 +35,49 @@ entry:    ret <4 x i32> %1  } +define arm_aapcs_vfpcc <2 x i64> @smin_v2i64(<2 x i64> %s1, <2 x i64> %s2) { +; CHECK-LABEL: smin_v2i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    vmov r2, s6 +; CHECK-NEXT:    movs r0, #0 +; CHECK-NEXT:    vmov r3, s2 +; CHECK-NEXT:    vmov r12, s7 +; CHECK-NEXT:    vmov r1, s3 +; CHECK-NEXT:    vmov lr, s1 +; CHECK-NEXT:    subs r2, r3, r2 +; CHECK-NEXT:    vmov r3, s0 +; CHECK-NEXT:    vmov r2, s4 +; CHECK-NEXT:    sbcs.w r1, r1, r12 +; CHECK-NEXT:    vmov r12, s5 +; CHECK-NEXT:    mov.w r1, #0 +; CHECK-NEXT:    it lt +; CHECK-NEXT:    movlt r1, #1 +; CHECK-NEXT:    cmp r1, #0 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    movne.w r1, #-1 +; CHECK-NEXT:    subs r2, r3, r2 +; CHECK-NEXT:    sbcs.w r2, lr, r12 +; CHECK-NEXT:    it lt +; CHECK-NEXT:    movlt r0, #1 +; CHECK-NEXT:    cmp r0, #0 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    movne.w r0, #-1 +; CHECK-NEXT:    vmov.32 q2[0], r0 +; CHECK-NEXT:    vmov.32 q2[1], r0 +; CHECK-NEXT:    vmov.32 q2[2], r1 +; CHECK-NEXT:    vmov.32 q2[3], r1 +; CHECK-NEXT:    vbic q1, q1, q2 +; CHECK-NEXT:    vand q0, q0, q2 +; CHECK-NEXT:    vorr q0, q0, q1 +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = icmp slt <2 x i64> %s1, %s2 +  %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2 +  ret <2 x i64> %1 +} +  define arm_aapcs_vfpcc <16 x i8> @umin_v16i8(<16 x i8> %s1, <16 x i8> %s2) {  ; CHECK-LABEL: umin_v16i8:  ; CHECK:       @ %bb.0: @ %entry @@ -68,6 +111,49 @@ entry:    ret <4 x i32> %1  } +define arm_aapcs_vfpcc <2 x i64> @umin_v2i64(<2 x i64> %s1, <2 x i64> %s2) { +; CHECK-LABEL: umin_v2i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    vmov r2, s6 +; CHECK-NEXT:    movs r0, #0 +; CHECK-NEXT:    vmov r3, s2 +; CHECK-NEXT:    vmov r12, s7 +; CHECK-NEXT:    vmov r1, s3 +; CHECK-NEXT:    vmov lr, s1 +; CHECK-NEXT:    subs r2, r3, r2 +; CHECK-NEXT:    vmov r3, s0 +; CHECK-NEXT:    vmov r2, s4 +; CHECK-NEXT:    sbcs.w r1, r1, r12 +; CHECK-NEXT:    vmov r12, s5 +; CHECK-NEXT:    mov.w r1, #0 +; CHECK-NEXT:    it lo +; CHECK-NEXT:    movlo r1, #1 +; CHECK-NEXT:    cmp r1, #0 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    movne.w r1, #-1 +; CHECK-NEXT:    subs r2, r3, r2 +; CHECK-NEXT:    sbcs.w r2, lr, r12 +; CHECK-NEXT:    it lo +; CHECK-NEXT:    movlo r0, #1 +; CHECK-NEXT:    cmp r0, #0 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    movne.w r0, #-1 +; CHECK-NEXT:    vmov.32 q2[0], r0 +; CHECK-NEXT:    vmov.32 q2[1], r0 +; CHECK-NEXT:    vmov.32 q2[2], r1 +; CHECK-NEXT:    vmov.32 q2[3], r1 +; CHECK-NEXT:    vbic q1, q1, q2 +; CHECK-NEXT:    vand q0, q0, q2 +; CHECK-NEXT:    vorr q0, q0, q1 +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = icmp ult <2 x i64> %s1, %s2 +  %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2 +  ret <2 x i64> %1 +} +  define arm_aapcs_vfpcc <16 x i8> @smax_v16i8(<16 x i8> %s1, <16 x i8> %s2) {  ; CHECK-LABEL: smax_v16i8: @@ -102,6 +188,49 @@ entry:    ret <4 x i32> %1  } +define arm_aapcs_vfpcc <2 x i64> @smax_v2i64(<2 x i64> %s1, <2 x i64> %s2) { +; CHECK-LABEL: smax_v2i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    vmov r2, s2 +; CHECK-NEXT:    movs r0, #0 +; CHECK-NEXT:    vmov r3, s6 +; CHECK-NEXT:    vmov r12, s3 +; CHECK-NEXT:    vmov r1, s7 +; CHECK-NEXT:    vmov lr, s5 +; CHECK-NEXT:    subs r2, r3, r2 +; CHECK-NEXT:    vmov r3, s4 +; CHECK-NEXT:    vmov r2, s0 +; CHECK-NEXT:    sbcs.w r1, r1, r12 +; CHECK-NEXT:    vmov r12, s1 +; CHECK-NEXT:    mov.w r1, #0 +; CHECK-NEXT:    it lt +; CHECK-NEXT:    movlt r1, #1 +; CHECK-NEXT:    cmp r1, #0 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    movne.w r1, #-1 +; CHECK-NEXT:    subs r2, r3, r2 +; CHECK-NEXT:    sbcs.w r2, lr, r12 +; CHECK-NEXT:    it lt +; CHECK-NEXT:    movlt r0, #1 +; CHECK-NEXT:    cmp r0, #0 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    movne.w r0, #-1 +; CHECK-NEXT:    vmov.32 q2[0], r0 +; CHECK-NEXT:    vmov.32 q2[1], r0 +; CHECK-NEXT:    vmov.32 q2[2], r1 +; CHECK-NEXT:    vmov.32 q2[3], r1 +; CHECK-NEXT:    vbic q1, q1, q2 +; CHECK-NEXT:    vand q0, q0, q2 +; CHECK-NEXT:    vorr q0, q0, q1 +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = icmp sgt <2 x i64> %s1, %s2 +  %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2 +  ret <2 x i64> %1 +} +  define arm_aapcs_vfpcc <16 x i8> @umax_v16i8(<16 x i8> %s1, <16 x i8> %s2) {  ; CHECK-LABEL: umax_v16i8:  ; CHECK:       @ %bb.0: @ %entry @@ -135,6 +264,49 @@ entry:    ret <4 x i32> %1  } +define arm_aapcs_vfpcc <2 x i64> @umax_v2i64(<2 x i64> %s1, <2 x i64> %s2) { +; CHECK-LABEL: umax_v2i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    vmov r2, s2 +; CHECK-NEXT:    movs r0, #0 +; CHECK-NEXT:    vmov r3, s6 +; CHECK-NEXT:    vmov r12, s3 +; CHECK-NEXT:    vmov r1, s7 +; CHECK-NEXT:    vmov lr, s5 +; CHECK-NEXT:    subs r2, r3, r2 +; CHECK-NEXT:    vmov r3, s4 +; CHECK-NEXT:    vmov r2, s0 +; CHECK-NEXT:    sbcs.w r1, r1, r12 +; CHECK-NEXT:    vmov r12, s1 +; CHECK-NEXT:    mov.w r1, #0 +; CHECK-NEXT:    it lo +; CHECK-NEXT:    movlo r1, #1 +; CHECK-NEXT:    cmp r1, #0 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    movne.w r1, #-1 +; CHECK-NEXT:    subs r2, r3, r2 +; CHECK-NEXT:    sbcs.w r2, lr, r12 +; CHECK-NEXT:    it lo +; CHECK-NEXT:    movlo r0, #1 +; CHECK-NEXT:    cmp r0, #0 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    movne.w r0, #-1 +; CHECK-NEXT:    vmov.32 q2[0], r0 +; CHECK-NEXT:    vmov.32 q2[1], r0 +; CHECK-NEXT:    vmov.32 q2[2], r1 +; CHECK-NEXT:    vmov.32 q2[3], r1 +; CHECK-NEXT:    vbic q1, q1, q2 +; CHECK-NEXT:    vand q0, q0, q2 +; CHECK-NEXT:    vorr q0, q0, q1 +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = icmp ugt <2 x i64> %s1, %s2 +  %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2 +  ret <2 x i64> %1 +} +  define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) {  ; CHECK-MVE-LABEL: maxnm_float32_t: @@ -227,3 +399,46 @@ entry:    %0 = select <8 x i1> %cmp, <8 x half> %src1, <8 x half> %src2    ret <8 x half> %0  } + +define arm_aapcs_vfpcc <2 x double> @maxnm_float64_t(<2 x double> %src1, <2 x double> %src2) { +; CHECK-LABEL: maxnm_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r4, lr} +; CHECK-NEXT:    push {r4, lr} +; CHECK-NEXT:    .vsave {d8, d9, d10, d11} +; CHECK-NEXT:    vpush {d8, d9, d10, d11} +; CHECK-NEXT:    vmov q4, q1 +; CHECK-NEXT:    vmov q5, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    vmov r2, r3, d11 +; CHECK-NEXT:    bl __aeabi_dcmpgt +; CHECK-NEXT:    mov r4, r0 +; CHECK-NEXT:    vmov r0, r1, d8 +; CHECK-NEXT:    vmov r2, r3, d10 +; CHECK-NEXT:    cmp r4, #0 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    movne r4, #1 +; CHECK-NEXT:    cmp r4, #0 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    movne.w r4, #-1 +; CHECK-NEXT:    bl __aeabi_dcmpgt +; CHECK-NEXT:    cmp r0, #0 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    movne r0, #1 +; CHECK-NEXT:    cmp r0, #0 +; CHECK-NEXT:    it ne +; CHECK-NEXT:    movne.w r0, #-1 +; CHECK-NEXT:    vmov.32 q0[0], r0 +; CHECK-NEXT:    vmov.32 q0[1], r0 +; CHECK-NEXT:    vmov.32 q0[2], r4 +; CHECK-NEXT:    vmov.32 q0[3], r4 +; CHECK-NEXT:    vbic q1, q5, q0 +; CHECK-NEXT:    vand q0, q4, q0 +; CHECK-NEXT:    vorr q0, q0, q1 +; CHECK-NEXT:    vpop {d8, d9, d10, d11} +; CHECK-NEXT:    pop {r4, pc} +entry: +  %cmp = fcmp fast ogt <2 x double> %src2, %src1 +  %0 = select <2 x i1> %cmp, <2 x double> %src2, <2 x double> %src1 +  ret <2 x double> %0 +} diff --git a/llvm/test/CodeGen/Thumb2/mve-neg.ll b/llvm/test/CodeGen/Thumb2/mve-neg.ll index f1c4352e3ed..602ce3d5f9b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-neg.ll +++ b/llvm/test/CodeGen/Thumb2/mve-neg.ll @@ -30,3 +30,26 @@ entry:    %0 = sub nsw <4 x i32> zeroinitializer, %s1    ret <4 x i32> %0  } + +define arm_aapcs_vfpcc <2 x i64> @neg_v2i64(<2 x i64> %s1) { +; CHECK-LABEL: neg_v2i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov r1, s2 +; CHECK-NEXT:    mov.w r12, #0 +; CHECK-NEXT:    vmov r0, s3 +; CHECK-NEXT:    vmov r2, s0 +; CHECK-NEXT:    vmov r3, s1 +; CHECK-NEXT:    rsbs r1, r1, #0 +; CHECK-NEXT:    sbc.w r0, r12, r0 +; CHECK-NEXT:    rsbs r2, r2, #0 +; CHECK-NEXT:    sbc.w r3, r12, r3 +; CHECK-NEXT:    vmov.32 q0[0], r2 +; CHECK-NEXT:    vmov.32 q0[1], r3 +; CHECK-NEXT:    vmov.32 q0[2], r1 +; CHECK-NEXT:    vmov.32 q0[3], r0 +; CHECK-NEXT:    bx lr +entry: +  %0 = sub nsw <2 x i64> zeroinitializer, %s1 +  ret <2 x i64> %0 +} + diff --git a/llvm/test/CodeGen/Thumb2/mve-sext.ll b/llvm/test/CodeGen/Thumb2/mve-sext.ll index 9458fdc47e5..452e68405fe 100644 --- a/llvm/test/CodeGen/Thumb2/mve-sext.ll +++ b/llvm/test/CodeGen/Thumb2/mve-sext.ll @@ -32,6 +32,24 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @sext_v2i32_v2i64(<2 x i32> %src) { +; CHECK-LABEL: sext_v2i32_v2i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov r0, s0 +; CHECK-NEXT:    vmov.32 q1[0], r0 +; CHECK-NEXT:    asrs r0, r0, #31 +; CHECK-NEXT:    vmov.32 q1[1], r0 +; CHECK-NEXT:    vmov r0, s2 +; CHECK-NEXT:    vmov.32 q1[2], r0 +; CHECK-NEXT:    asrs r0, r0, #31 +; CHECK-NEXT:    vmov.32 q1[3], r0 +; CHECK-NEXT:    vmov q0, q1 +; CHECK-NEXT:    bx lr +entry: +  %0 = sext <2 x i32> %src to <2 x i64> +  ret <2 x i64> %0 +} +  define arm_aapcs_vfpcc <8 x i16> @zext_v8i8_v8i16(<8 x i8> %src) {  ; CHECK-LABEL: zext_v8i8_v8i16: @@ -64,6 +82,25 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) { +; CHECK-LABEL: zext_v2i32_v2i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    adr r0, .LCPI7_0 +; CHECK-NEXT:    vldrw.u32 q1, [r0] +; CHECK-NEXT:    vand q0, q0, q1 +; CHECK-NEXT:    bx lr +; CHECK-NEXT:    .p2align 4 +; CHECK-NEXT:  @ %bb.1: +; CHECK-NEXT:  .LCPI7_0: +; CHECK-NEXT:    .long 4294967295 @ 0xffffffff +; CHECK-NEXT:    .long 0 @ 0x0 +; CHECK-NEXT:    .long 4294967295 @ 0xffffffff +; CHECK-NEXT:    .long 0 @ 0x0 +entry: +  %0 = zext <2 x i32> %src to <2 x i64> +  ret <2 x i64> %0 +} +  define arm_aapcs_vfpcc <8 x i8> @trunc_v8i16_v8i8(<8 x i16> %src) {  ; CHECK-LABEL: trunc_v8i16_v8i8: @@ -91,3 +128,13 @@ entry:    %0 = trunc <4 x i32> %src to <4 x i8>    ret <4 x i8> %0  } + +define arm_aapcs_vfpcc <2 x i32> @trunc_v2i64_v2i32(<2 x i64> %src) { +; CHECK-LABEL: trunc_v2i64_v2i32: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    bx lr +entry: +  %0 = trunc <2 x i64> %src to <2 x i32> +  ret <2 x i32> %0 +} + diff --git a/llvm/test/CodeGen/Thumb2/mve-shifts.ll b/llvm/test/CodeGen/Thumb2/mve-shifts.ll index 4e6e5ae7f6a..a321c2dd383 100644 --- a/llvm/test/CodeGen/Thumb2/mve-shifts.ll +++ b/llvm/test/CodeGen/Thumb2/mve-shifts.ll @@ -31,6 +31,28 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @shl_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: shl_qq_int64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov r0, s4 +; CHECK-NEXT:    vmov r1, s1 +; CHECK-NEXT:    vmov r2, s0 +; CHECK-NEXT:    lsll r2, r1, r0 +; CHECK-NEXT:    vmov r0, s6 +; CHECK-NEXT:    vmov.32 q2[0], r2 +; CHECK-NEXT:    vmov r2, s2 +; CHECK-NEXT:    vmov.32 q2[1], r1 +; CHECK-NEXT:    vmov r1, s3 +; CHECK-NEXT:    lsll r2, r1, r0 +; CHECK-NEXT:    vmov.32 q2[2], r2 +; CHECK-NEXT:    vmov.32 q2[3], r1 +; CHECK-NEXT:    vmov q0, q2 +; CHECK-NEXT:    bx lr +entry: +  %0 = shl <2 x i64> %src1, %src2 +  ret <2 x i64> %0 +} +  define arm_aapcs_vfpcc <16 x i8> @shru_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {  ; CHECK-LABEL: shru_qq_int8_t: @@ -65,6 +87,30 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @shru_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: shru_qq_int64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov r2, s4 +; CHECK-NEXT:    vmov r1, s1 +; CHECK-NEXT:    vmov r0, s0 +; CHECK-NEXT:    rsbs r2, r2, #0 +; CHECK-NEXT:    lsll r0, r1, r2 +; CHECK-NEXT:    vmov r2, s6 +; CHECK-NEXT:    vmov.32 q2[0], r0 +; CHECK-NEXT:    vmov r0, s2 +; CHECK-NEXT:    vmov.32 q2[1], r1 +; CHECK-NEXT:    vmov r1, s3 +; CHECK-NEXT:    rsbs r2, r2, #0 +; CHECK-NEXT:    lsll r0, r1, r2 +; CHECK-NEXT:    vmov.32 q2[2], r0 +; CHECK-NEXT:    vmov.32 q2[3], r1 +; CHECK-NEXT:    vmov q0, q2 +; CHECK-NEXT:    bx lr +entry: +  %0 = lshr <2 x i64> %src1, %src2 +  ret <2 x i64> %0 +} +  define arm_aapcs_vfpcc <16 x i8> @shrs_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {  ; CHECK-LABEL: shrs_qq_int8_t: @@ -99,6 +145,28 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @shrs_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: shrs_qq_int64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov r0, s4 +; CHECK-NEXT:    vmov r1, s1 +; CHECK-NEXT:    vmov r2, s0 +; CHECK-NEXT:    asrl r2, r1, r0 +; CHECK-NEXT:    vmov r0, s6 +; CHECK-NEXT:    vmov.32 q2[0], r2 +; CHECK-NEXT:    vmov r2, s2 +; CHECK-NEXT:    vmov.32 q2[1], r1 +; CHECK-NEXT:    vmov r1, s3 +; CHECK-NEXT:    asrl r2, r1, r0 +; CHECK-NEXT:    vmov.32 q2[2], r2 +; CHECK-NEXT:    vmov.32 q2[3], r1 +; CHECK-NEXT:    vmov q0, q2 +; CHECK-NEXT:    bx lr +entry: +  %0 = ashr <2 x i64> %src1, %src2 +  ret <2 x i64> %0 +} +  define arm_aapcs_vfpcc <16 x i8> @shl_qi_int8_t(<16 x i8> %src1) {  ; CHECK-LABEL: shl_qi_int8_t: @@ -130,6 +198,26 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @shl_qi_int64_t(<2 x i64> %src1) { +; CHECK-LABEL: shl_qi_int64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov r1, s1 +; CHECK-NEXT:    vmov r0, s0 +; CHECK-NEXT:    lsll r0, r1, #4 +; CHECK-NEXT:    vmov.32 q1[0], r0 +; CHECK-NEXT:    vmov r0, s2 +; CHECK-NEXT:    vmov.32 q1[1], r1 +; CHECK-NEXT:    vmov r1, s3 +; CHECK-NEXT:    lsll r0, r1, #4 +; CHECK-NEXT:    vmov.32 q1[2], r0 +; CHECK-NEXT:    vmov.32 q1[3], r1 +; CHECK-NEXT:    vmov q0, q1 +; CHECK-NEXT:    bx lr +entry: +  %0 = shl <2 x i64> %src1, <i64 4, i64 4> +  ret <2 x i64> %0 +} +  define arm_aapcs_vfpcc <16 x i8> @shru_qi_int8_t(<16 x i8> %src1) {  ; CHECK-LABEL: shru_qi_int8_t: @@ -161,6 +249,26 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @shru_qi_int64_t(<2 x i64> %src1) { +; CHECK-LABEL: shru_qi_int64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov r1, s1 +; CHECK-NEXT:    vmov r0, s0 +; CHECK-NEXT:    lsrl r0, r1, #4 +; CHECK-NEXT:    vmov.32 q1[0], r0 +; CHECK-NEXT:    vmov r0, s2 +; CHECK-NEXT:    vmov.32 q1[1], r1 +; CHECK-NEXT:    vmov r1, s3 +; CHECK-NEXT:    lsrl r0, r1, #4 +; CHECK-NEXT:    vmov.32 q1[2], r0 +; CHECK-NEXT:    vmov.32 q1[3], r1 +; CHECK-NEXT:    vmov q0, q1 +; CHECK-NEXT:    bx lr +entry: +  %0 = lshr <2 x i64> %src1, <i64 4, i64 4> +  ret <2 x i64> %0 +} +  define arm_aapcs_vfpcc <16 x i8> @shrs_qi_int8_t(<16 x i8> %src1) {  ; CHECK-LABEL: shrs_qi_int8_t: @@ -192,6 +300,25 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @shrs_qi_int64_t(<2 x i64> %src1) { +; CHECK-LABEL: shrs_qi_int64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov r1, s1 +; CHECK-NEXT:    vmov r0, s0 +; CHECK-NEXT:    asrl r0, r1, #4 +; CHECK-NEXT:    vmov.32 q1[0], r0 +; CHECK-NEXT:    vmov r0, s2 +; CHECK-NEXT:    vmov.32 q1[1], r1 +; CHECK-NEXT:    vmov r1, s3 +; CHECK-NEXT:    asrl r0, r1, #4 +; CHECK-NEXT:    vmov.32 q1[2], r0 +; CHECK-NEXT:    vmov.32 q1[3], r1 +; CHECK-NEXT:    vmov q0, q1 +; CHECK-NEXT:    bx lr +entry: +  %0 = ashr <2 x i64> %src1, <i64 4, i64 4> +  ret <2 x i64> %0 +}  define arm_aapcs_vfpcc <16 x i8> @shl_qr_int8_t(<16 x i8> %src1, i8 %src2) { @@ -230,6 +357,28 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @shl_qr_int64_t(<2 x i64> %src1, i64 %src2) { +; CHECK-LABEL: shl_qr_int64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov r1, s1 +; CHECK-NEXT:    vmov r2, s0 +; CHECK-NEXT:    lsll r2, r1, r0 +; CHECK-NEXT:    vmov.32 q1[0], r2 +; CHECK-NEXT:    vmov r2, s2 +; CHECK-NEXT:    vmov.32 q1[1], r1 +; CHECK-NEXT:    vmov r1, s3 +; CHECK-NEXT:    lsll r2, r1, r0 +; CHECK-NEXT:    vmov.32 q1[2], r2 +; CHECK-NEXT:    vmov.32 q1[3], r1 +; CHECK-NEXT:    vmov q0, q1 +; CHECK-NEXT:    bx lr +entry: +  %i = insertelement <2 x i64> undef, i64 %src2, i32 0 +  %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer +  %0 = shl <2 x i64> %src1, %s +  ret <2 x i64> %0 +} +  define arm_aapcs_vfpcc <16 x i8> @shru_qr_int8_t(<16 x i8> %src1, i8 %src2) {  ; CHECK-LABEL: shru_qr_int8_t: @@ -273,6 +422,29 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @shru_qr_int64_t(<2 x i64> %src1, i64 %src2) { +; CHECK-LABEL: shru_qr_int64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    rsbs r0, r0, #0 +; CHECK-NEXT:    vmov r1, s1 +; CHECK-NEXT:    vmov r2, s0 +; CHECK-NEXT:    lsll r2, r1, r0 +; CHECK-NEXT:    vmov.32 q1[0], r2 +; CHECK-NEXT:    vmov r2, s2 +; CHECK-NEXT:    vmov.32 q1[1], r1 +; CHECK-NEXT:    vmov r1, s3 +; CHECK-NEXT:    lsll r2, r1, r0 +; CHECK-NEXT:    vmov.32 q1[2], r2 +; CHECK-NEXT:    vmov.32 q1[3], r1 +; CHECK-NEXT:    vmov q0, q1 +; CHECK-NEXT:    bx lr +entry: +  %i = insertelement <2 x i64> undef, i64 %src2, i32 0 +  %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer +  %0 = lshr <2 x i64> %src1, %s +  ret <2 x i64> %0 +} +  define arm_aapcs_vfpcc <16 x i8> @shrs_qr_int8_t(<16 x i8> %src1, i8 %src2) {  ; CHECK-LABEL: shrs_qr_int8_t: @@ -316,17 +488,38 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @shrs_qr_int64_t(<2 x i64> %src1, i64 %src2) { +; CHECK-LABEL: shrs_qr_int64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov r1, s1 +; CHECK-NEXT:    vmov r2, s0 +; CHECK-NEXT:    asrl r2, r1, r0 +; CHECK-NEXT:    vmov.32 q1[0], r2 +; CHECK-NEXT:    vmov r2, s2 +; CHECK-NEXT:    vmov.32 q1[1], r1 +; CHECK-NEXT:    vmov r1, s3 +; CHECK-NEXT:    asrl r2, r1, r0 +; CHECK-NEXT:    vmov.32 q1[2], r2 +; CHECK-NEXT:    vmov.32 q1[3], r1 +; CHECK-NEXT:    vmov q0, q1 +; CHECK-NEXT:    bx lr +entry: +  %i = insertelement <2 x i64> undef, i64 %src2, i32 0 +  %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer +  %0 = ashr <2 x i64> %src1, %s +  ret <2 x i64> %0 +}  define arm_aapcs_vfpcc <16 x i8> @shl_qiv_int8_t(<16 x i8> %src1) {  ; CHECK-LABEL: shl_qiv_int8_t:  ; CHECK:       @ %bb.0: @ %entry -; CHECK-NEXT:    adr r0, .LCPI27_0 +; CHECK-NEXT:    adr r0, .LCPI36_0  ; CHECK-NEXT:    vldrw.u32 q1, [r0]  ; CHECK-NEXT:    vshl.u8 q0, q0, q1  ; CHECK-NEXT:    bx lr  ; CHECK-NEXT:    .p2align 4  ; CHECK-NEXT:  @ %bb.1: -; CHECK-NEXT:  .LCPI27_0: +; CHECK-NEXT:  .LCPI36_0:  ; CHECK-NEXT:    .byte 1 @ 0x1  ; CHECK-NEXT:    .byte 2 @ 0x2  ; CHECK-NEXT:    .byte 3 @ 0x3 @@ -351,13 +544,13 @@ entry:  define arm_aapcs_vfpcc <8 x i16> @shl_qiv_int16_t(<8 x i16> %src1) {  ; CHECK-LABEL: shl_qiv_int16_t:  ; CHECK:       @ %bb.0: @ %entry -; CHECK-NEXT:    adr r0, .LCPI28_0 +; CHECK-NEXT:    adr r0, .LCPI37_0  ; CHECK-NEXT:    vldrw.u32 q1, [r0]  ; CHECK-NEXT:    vshl.u16 q0, q0, q1  ; CHECK-NEXT:    bx lr  ; CHECK-NEXT:    .p2align 4  ; CHECK-NEXT:  @ %bb.1: -; CHECK-NEXT:  .LCPI28_0: +; CHECK-NEXT:  .LCPI37_0:  ; CHECK-NEXT:    .short 1 @ 0x1  ; CHECK-NEXT:    .short 2 @ 0x2  ; CHECK-NEXT:    .short 3 @ 0x3 @@ -374,13 +567,13 @@ entry:  define arm_aapcs_vfpcc <4 x i32> @shl_qiv_int32_t(<4 x i32> %src1) {  ; CHECK-LABEL: shl_qiv_int32_t:  ; CHECK:       @ %bb.0: @ %entry -; CHECK-NEXT:    adr r0, .LCPI29_0 +; CHECK-NEXT:    adr r0, .LCPI38_0  ; CHECK-NEXT:    vldrw.u32 q1, [r0]  ; CHECK-NEXT:    vshl.u32 q0, q0, q1  ; CHECK-NEXT:    bx lr  ; CHECK-NEXT:    .p2align 4  ; CHECK-NEXT:  @ %bb.1: -; CHECK-NEXT:  .LCPI29_0: +; CHECK-NEXT:  .LCPI38_0:  ; CHECK-NEXT:    .long 1 @ 0x1  ; CHECK-NEXT:    .long 2 @ 0x2  ; CHECK-NEXT:    .long 3 @ 0x3 diff --git a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll index 87d209b5d2f..b5c96f9f86d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll +++ b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll @@ -262,6 +262,38 @@ entry:    ret <16 x i8> %out  } +define arm_aapcs_vfpcc <2 x i64> @shuffle1_i64(<2 x i64> %src) { +; CHECK-LABEL: shuffle1_i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    bx lr +entry: +  %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 0, i32 1> +  ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @shuffle2_i64(<2 x i64> %src) { +; CHECK-LABEL: shuffle2_i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov.f32 s4, s2 +; CHECK-NEXT:    vmov.f32 s5, s3 +; CHECK-NEXT:    vmov.f32 s6, s0 +; CHECK-NEXT:    vmov.f32 s7, s1 +; CHECK-NEXT:    vmov q0, q1 +; CHECK-NEXT:    bx lr +entry: +  %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 0> +  ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @shuffle3_i64(<2 x i64> %src) { +; CHECK-LABEL: shuffle3_i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    bx lr +entry: +  %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 undef, i32 1> +  ret <2 x i64> %out +} +  define arm_aapcs_vfpcc <4 x float> @shuffle1_f32(<4 x float> %src) {  ; CHECK-LABEL: shuffle1_f32:  ; CHECK:       @ %bb.0: @ %entry @@ -390,6 +422,38 @@ entry:    ret <8 x half> %out  } +define arm_aapcs_vfpcc <2 x double> @shuffle1_f64(<2 x double> %src) { +; CHECK-LABEL: shuffle1_f64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    bx lr +entry: +  %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 0, i32 1> +  ret <2 x double> %out +} + +define arm_aapcs_vfpcc <2 x double> @shuffle2_f64(<2 x double> %src) { +; CHECK-LABEL: shuffle2_f64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov.f32 s4, s2 +; CHECK-NEXT:    vmov.f32 s5, s3 +; CHECK-NEXT:    vmov.f32 s6, s0 +; CHECK-NEXT:    vmov.f32 s7, s1 +; CHECK-NEXT:    vmov q0, q1 +; CHECK-NEXT:    bx lr +entry: +  %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 0> +  ret <2 x double> %out +} + +define arm_aapcs_vfpcc <2 x double> @shuffle3_f64(<2 x double> %src) { +; CHECK-LABEL: shuffle3_f64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    bx lr +entry: +  %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 undef, i32 1> +  ret <2 x double> %out +} +  define arm_aapcs_vfpcc <4 x i32> @insert_i32(i32 %a) {  ; CHECK-LABEL: insert_i32: @@ -421,6 +485,17 @@ entry:    ret <16 x i8> %res  } +define arm_aapcs_vfpcc <2 x i64> @insert_i64(i64 %a) { +; CHECK-LABEL: insert_i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov.32 q0[0], r0 +; CHECK-NEXT:    vmov.32 q0[1], r1 +; CHECK-NEXT:    bx lr +entry: +  %res = insertelement <2 x i64> undef, i64 %a, i32 0 +  ret <2 x i64> %res +} +  define arm_aapcs_vfpcc <4 x float> @insert_f32(float %a) {  ; CHECK-LABEL: insert_f32:  ; CHECK:       @ %bb.0: @ %entry @@ -443,12 +518,35 @@ entry:    ret <8 x half> %res  } +define arm_aapcs_vfpcc <2 x double> @insert_f64(double %a) { +; CHECK-LABEL: insert_f64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r4, r6, r7, lr} +; CHECK-NEXT:    push {r4, r6, r7, lr} +; CHECK-NEXT:    .setfp r7, sp, #8 +; CHECK-NEXT:    add r7, sp, #8 +; CHECK-NEXT:    .pad #16 +; CHECK-NEXT:    sub sp, #16 +; CHECK-NEXT:    mov r4, sp +; CHECK-NEXT:    bfc r4, #0, #4 +; CHECK-NEXT:    mov sp, r4 +; CHECK-NEXT:    sub.w r4, r7, #8 +; CHECK-NEXT:    vstr d0, [sp] +; CHECK-NEXT:    mov r0, sp +; CHECK-NEXT:    vldrw.u32 q0, [r0] +; CHECK-NEXT:    mov sp, r4 +; CHECK-NEXT:    pop {r4, r6, r7, pc} +entry: +  %res = insertelement <2 x double> undef, double %a, i32 0 +  ret <2 x double> %res +} +  define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {  ; CHECK-LABEL: scalar_to_vector_i32:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    .pad #8  ; CHECK-NEXT:    sub sp, #8 -; CHECK-NEXT:    adr r1, .LCPI30_0 +; CHECK-NEXT:    adr r1, .LCPI38_0  ; CHECK-NEXT:    vmov.u16 r0, q0[0]  ; CHECK-NEXT:    vldrw.u32 q1, [r1]  ; CHECK-NEXT:    vmov.32 q0[0], r0 @@ -461,7 +559,7 @@ define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {  ; CHECK-NEXT:    bx lr  ; CHECK-NEXT:    .p2align 4  ; CHECK-NEXT:  @ %bb.1: -; CHECK-NEXT:  .LCPI30_0: +; CHECK-NEXT:  .LCPI38_0:  ; CHECK-NEXT:    .zero 4  ; CHECK-NEXT:    .long 7 @ 0x7  ; CHECK-NEXT:    .long 1 @ 0x1 @@ -533,6 +631,28 @@ entry:    ret i8 %res  } +define arm_aapcs_vfpcc i64 @extract_i64_0(<2 x i64> %a) { +; CHECK-LABEL: extract_i64_0: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov r0, s0 +; CHECK-NEXT:    vmov r1, s1 +; CHECK-NEXT:    bx lr +entry: +  %res = extractelement <2 x i64> %a, i32 0 +  ret i64 %res +} + +define arm_aapcs_vfpcc i64 @extract_i64_1(<2 x i64> %a) { +; CHECK-LABEL: extract_i64_1: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov r0, s2 +; CHECK-NEXT:    vmov r1, s3 +; CHECK-NEXT:    bx lr +entry: +  %res = extractelement <2 x i64> %a, i32 1 +  ret i64 %res +} +  define arm_aapcs_vfpcc float @extract_f32_0(<4 x float> %a) {  ; CHECK-LABEL: extract_f32_0:  ; CHECK:       @ %bb.0: @ %entry @@ -576,3 +696,25 @@ entry:    %res = extractelement <8 x half> %a, i32 3    ret half %res  } + +define arm_aapcs_vfpcc double @extract_f64_0(<2 x double> %a) { +; CHECK-LABEL: extract_f64_0: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    @ kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT:    bx lr +entry: +  %res = extractelement <2 x double> %a, i32 0 +  ret double %res +} + +define arm_aapcs_vfpcc double @extract_f64_1(<2 x double> %a) { +; CHECK-LABEL: extract_f64_1: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov.f32 s0, s2 +; CHECK-NEXT:    vmov.f32 s1, s3 +; CHECK-NEXT:    bx lr +entry: +  %res = extractelement <2 x double> %a, i32 1 +  ret double %res +} + diff --git a/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll b/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll index 570aae3f21b..ecad0c1c5df 100644 --- a/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll +++ b/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll @@ -32,6 +32,33 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @add_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: add_int64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    vmov r2, s6 +; CHECK-NEXT:    vmov r3, s2 +; CHECK-NEXT:    vmov r0, s7 +; CHECK-NEXT:    vmov r1, s3 +; CHECK-NEXT:    adds.w lr, r3, r2 +; CHECK-NEXT:    vmov r2, s0 +; CHECK-NEXT:    vmov r3, s1 +; CHECK-NEXT:    adc.w r12, r1, r0 +; CHECK-NEXT:    vmov r0, s4 +; CHECK-NEXT:    vmov r1, s5 +; CHECK-NEXT:    adds r0, r0, r2 +; CHECK-NEXT:    adcs r1, r3 +; CHECK-NEXT:    vmov.32 q0[0], r0 +; CHECK-NEXT:    vmov.32 q0[1], r1 +; CHECK-NEXT:    vmov.32 q0[2], lr +; CHECK-NEXT:    vmov.32 q0[3], r12 +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = add nsw <2 x i64> %src1, %src2 +  ret <2 x i64> %0 +} +  define arm_aapcs_vfpcc <4 x float> @add_float32_t(<4 x float> %src1, <4 x float> %src2) {  ; CHECK-MVE-LABEL: add_float32_t:  ; CHECK-MVE:       @ %bb.0: @ %entry @@ -122,6 +149,33 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @add_float64_t(<2 x double> %src1, <2 x double> %src2) { +; CHECK-LABEL: add_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9, d10, d11} +; CHECK-NEXT:    vpush {d8, d9, d10, d11} +; CHECK-NEXT:    vmov q4, q1 +; CHECK-NEXT:    vmov q5, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    vmov r2, r3, d11 +; CHECK-NEXT:    bl __aeabi_dadd +; CHECK-NEXT:    vmov lr, r12, d8 +; CHECK-NEXT:    vmov r2, r3, d10 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, lr +; CHECK-NEXT:    mov r1, r12 +; CHECK-NEXT:    bl __aeabi_dadd +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9, d10, d11} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = fadd nnan ninf nsz <2 x double> %src2, %src1 +  ret <2 x double> %0 +} +  define arm_aapcs_vfpcc <16 x i8> @sub_int8_t(<16 x i8> %src1, <16 x i8> %src2) {  ; CHECK-LABEL: sub_int8_t: @@ -153,6 +207,33 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @sub_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: sub_int64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    vmov r2, s2 +; CHECK-NEXT:    vmov r3, s6 +; CHECK-NEXT:    vmov r0, s3 +; CHECK-NEXT:    vmov r1, s7 +; CHECK-NEXT:    subs.w lr, r3, r2 +; CHECK-NEXT:    vmov r2, s4 +; CHECK-NEXT:    vmov r3, s5 +; CHECK-NEXT:    sbc.w r12, r1, r0 +; CHECK-NEXT:    vmov r0, s0 +; CHECK-NEXT:    vmov r1, s1 +; CHECK-NEXT:    subs r0, r2, r0 +; CHECK-NEXT:    sbc.w r1, r3, r1 +; CHECK-NEXT:    vmov.32 q0[0], r0 +; CHECK-NEXT:    vmov.32 q0[1], r1 +; CHECK-NEXT:    vmov.32 q0[2], lr +; CHECK-NEXT:    vmov.32 q0[3], r12 +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = sub nsw <2 x i64> %src2, %src1 +  ret <2 x i64> %0 +} +  define arm_aapcs_vfpcc <4 x float> @sub_float32_t(<4 x float> %src1, <4 x float> %src2) {  ; CHECK-MVE-LABEL: sub_float32_t:  ; CHECK-MVE:       @ %bb.0: @ %entry @@ -243,6 +324,34 @@ entry:    ret <8 x half> %0  } +define arm_aapcs_vfpcc <2 x double> @sub_float64_t(<2 x double> %src1, <2 x double> %src2) { +; CHECK-LABEL: sub_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9, d10, d11} +; CHECK-NEXT:    vpush {d8, d9, d10, d11} +; CHECK-NEXT:    vmov q4, q1 +; CHECK-NEXT:    vmov q5, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    vmov r2, r3, d11 +; CHECK-NEXT:    bl __aeabi_dsub +; CHECK-NEXT:    vmov lr, r12, d8 +; CHECK-NEXT:    vmov r2, r3, d10 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, lr +; CHECK-NEXT:    mov r1, r12 +; CHECK-NEXT:    bl __aeabi_dsub +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9, d10, d11} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = fsub nnan ninf nsz <2 x double> %src2, %src1 +  ret <2 x double> %0 +} + +  define arm_aapcs_vfpcc <16 x i8> @mul_int8_t(<16 x i8> %src1, <16 x i8> %src2) {  ; CHECK-LABEL: mul_int8_t:  ; CHECK:       @ %bb.0: @ %entry @@ -273,6 +382,35 @@ entry:    ret <4 x i32> %0  } +define arm_aapcs_vfpcc <2 x i64> @mul_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: mul_int64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r4, r5, r7, lr} +; CHECK-NEXT:    push {r4, r5, r7, lr} +; CHECK-NEXT:    vmov r0, s4 +; CHECK-NEXT:    vmov r1, s0 +; CHECK-NEXT:    vmov r2, s5 +; CHECK-NEXT:    umull r12, r3, r1, r0 +; CHECK-NEXT:    mla lr, r1, r2, r3 +; CHECK-NEXT:    vmov r3, s6 +; CHECK-NEXT:    vmov r1, s2 +; CHECK-NEXT:    vmov r2, s7 +; CHECK-NEXT:    umull r4, r5, r1, r3 +; CHECK-NEXT:    mla r1, r1, r2, r5 +; CHECK-NEXT:    vmov r2, s1 +; CHECK-NEXT:    mla r0, r2, r0, lr +; CHECK-NEXT:    vmov r2, s3 +; CHECK-NEXT:    vmov.32 q0[0], r12 +; CHECK-NEXT:    vmov.32 q0[1], r0 +; CHECK-NEXT:    vmov.32 q0[2], r4 +; CHECK-NEXT:    mla r1, r2, r3, r1 +; CHECK-NEXT:    vmov.32 q0[3], r1 +; CHECK-NEXT:    pop {r4, r5, r7, pc} +entry: +  %0 = mul nsw <2 x i64> %src1, %src2 +  ret <2 x i64> %0 +} +  define arm_aapcs_vfpcc <8 x half> @mul_float16_t(<8 x half> %src1, <8 x half> %src2) {  ; CHECK-MVE-LABEL: mul_float16_t:  ; CHECK-MVE:       @ %bb.0: @ %entry @@ -362,3 +500,31 @@ entry:    %0 = fmul nnan ninf nsz <4 x float> %src2, %src1    ret <4 x float> %0  } + +define arm_aapcs_vfpcc <2 x double> @mul_float64_t(<2 x double> %src1, <2 x double> %src2) { +; CHECK-LABEL: mul_float64_t: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9, d10, d11} +; CHECK-NEXT:    vpush {d8, d9, d10, d11} +; CHECK-NEXT:    vmov q4, q1 +; CHECK-NEXT:    vmov q5, q0 +; CHECK-NEXT:    vmov r0, r1, d9 +; CHECK-NEXT:    vmov r2, r3, d11 +; CHECK-NEXT:    bl __aeabi_dmul +; CHECK-NEXT:    vmov lr, r12, d8 +; CHECK-NEXT:    vmov r2, r3, d10 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, lr +; CHECK-NEXT:    mov r1, r12 +; CHECK-NEXT:    bl __aeabi_dmul +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9, d10, d11} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %0 = fmul nnan ninf nsz <2 x double> %src2, %src1 +  ret <2 x double> %0 +} + diff --git a/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll b/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll index 794f7ba20c2..37ca5a2f202 100644 --- a/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll +++ b/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll @@ -50,6 +50,39 @@ entry:    ret <4 x i32> %sum  } +define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) { +; CHECK-FP-LABEL: vector_add_i64: +; CHECK-FP:       @ %bb.0: @ %entry +; CHECK-FP-NEXT:    .save {r7, lr} +; CHECK-FP-NEXT:    push {r7, lr} +; CHECK-FP-NEXT:    vmov d1, r2, r3 +; CHECK-FP-NEXT:    vmov d0, r0, r1 +; CHECK-FP-NEXT:    add r0, sp, #8 +; CHECK-FP-NEXT:    vldrw.u32 q1, [r0] +; CHECK-FP-NEXT:    vmov r1, s2 +; CHECK-FP-NEXT:    vmov r0, s3 +; CHECK-FP-NEXT:    vmov r3, s6 +; CHECK-FP-NEXT:    vmov r2, s7 +; CHECK-FP-NEXT:    adds.w lr, r1, r3 +; CHECK-FP-NEXT:    vmov r3, s0 +; CHECK-FP-NEXT:    vmov r1, s4 +; CHECK-FP-NEXT:    adc.w r12, r0, r2 +; CHECK-FP-NEXT:    vmov r2, s1 +; CHECK-FP-NEXT:    vmov r0, s5 +; CHECK-FP-NEXT:    adds r1, r1, r3 +; CHECK-FP-NEXT:    vmov.32 q0[0], r1 +; CHECK-FP-NEXT:    adcs r0, r2 +; CHECK-FP-NEXT:    vmov.32 q0[1], r0 +; CHECK-FP-NEXT:    vmov.32 q0[2], lr +; CHECK-FP-NEXT:    vmov.32 q0[3], r12 +; CHECK-FP-NEXT:    vmov r0, r1, d0 +; CHECK-FP-NEXT:    vmov r2, r3, d1 +; CHECK-FP-NEXT:    pop {r7, pc} +entry: +  %sum = add <2 x i64> %lhs, %rhs +  ret <2 x i64> %sum +} +  define <8 x half> @vector_add_f16(<8 x half> %lhs, <8 x half> %rhs) {  ; CHECK-FP-LABEL: vector_add_f16:  ; CHECK-FP:       @ %bb.0: @ %entry @@ -81,3 +114,38 @@ entry:    %sum = fadd <4 x float> %lhs, %rhs    ret <4 x float> %sum  } + +define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) { +; CHECK-FP-LABEL: vector_add_f64: +; CHECK-FP:       @ %bb.0: @ %entry +; CHECK-FP-NEXT:    .save {r4, r5, r6, r7, lr} +; CHECK-FP-NEXT:    push {r4, r5, r6, r7, lr} +; CHECK-FP-NEXT:    .pad #4 +; CHECK-FP-NEXT:    sub sp, #4 +; CHECK-FP-NEXT:    .vsave {d8, d9} +; CHECK-FP-NEXT:    vpush {d8, d9} +; CHECK-FP-NEXT:    mov r5, r0 +; CHECK-FP-NEXT:    add r0, sp, #40 +; CHECK-FP-NEXT:    vldrw.u32 q4, [r0] +; CHECK-FP-NEXT:    mov r4, r2 +; CHECK-FP-NEXT:    mov r6, r3 +; CHECK-FP-NEXT:    mov r7, r1 +; CHECK-FP-NEXT:    vmov r2, r3, d9 +; CHECK-FP-NEXT:    mov r0, r4 +; CHECK-FP-NEXT:    mov r1, r6 +; CHECK-FP-NEXT:    bl __aeabi_dadd +; CHECK-FP-NEXT:    vmov r2, r3, d8 +; CHECK-FP-NEXT:    vmov d9, r0, r1 +; CHECK-FP-NEXT:    mov r0, r5 +; CHECK-FP-NEXT:    mov r1, r7 +; CHECK-FP-NEXT:    bl __aeabi_dadd +; CHECK-FP-NEXT:    vmov d8, r0, r1 +; CHECK-FP-NEXT:    vmov r2, r3, d9 +; CHECK-FP-NEXT:    vmov r0, r1, d8 +; CHECK-FP-NEXT:    vpop {d8, d9} +; CHECK-FP-NEXT:    add sp, #4 +; CHECK-FP-NEXT:    pop {r4, r5, r6, r7, pc} +entry: +  %sum = fadd <2 x double> %lhs, %rhs +  ret <2 x double> %sum +} diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll index 55d354ae639..524ec692c8c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll @@ -317,3 +317,111 @@ entry:    %out = fptoui <8 x half> %src to <8 x i16>    ret <8 x i16> %out  } + +define arm_aapcs_vfpcc <2 x double> @foo_float_int64(<2 x i64> %src) { +; CHECK-LABEL: foo_float_int64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, s18 +; CHECK-NEXT:    vmov r1, s19 +; CHECK-NEXT:    bl __aeabi_l2d +; CHECK-NEXT:    vmov r2, s16 +; CHECK-NEXT:    vmov r3, s17 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl __aeabi_l2d +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %out = sitofp <2 x i64> %src to <2 x double> +  ret <2 x double> %out +} + +define arm_aapcs_vfpcc <2 x double> @foo_float_uint64(<2 x i64> %src) { +; CHECK-LABEL: foo_float_uint64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, s18 +; CHECK-NEXT:    vmov r1, s19 +; CHECK-NEXT:    bl __aeabi_ul2d +; CHECK-NEXT:    vmov r2, s16 +; CHECK-NEXT:    vmov r3, s17 +; CHECK-NEXT:    vmov d9, r0, r1 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl __aeabi_ul2d +; CHECK-NEXT:    vmov d8, r0, r1 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %out = uitofp <2 x i64> %src to <2 x double> +  ret <2 x double> %out +} + +define arm_aapcs_vfpcc <2 x i64> @foo_int64_float(<2 x double> %src) { +; CHECK-LABEL: foo_int64_float: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d8 +; CHECK-NEXT:    bl __aeabi_d2lz +; CHECK-NEXT:    vmov r2, r3, d9 +; CHECK-NEXT:    vmov.32 q4[0], r0 +; CHECK-NEXT:    lsrl r0, r1, #32 +; CHECK-NEXT:    vmov.32 q4[1], r0 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl __aeabi_d2lz +; CHECK-NEXT:    vmov.32 q4[2], r0 +; CHECK-NEXT:    lsrl r0, r1, #32 +; CHECK-NEXT:    vmov.32 q4[3], r0 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %out = fptosi <2 x double> %src to <2 x i64> +  ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @foo_uint64_float(<2 x double> %src) { +; CHECK-LABEL: foo_uint64_float: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    .save {r7, lr} +; CHECK-NEXT:    push {r7, lr} +; CHECK-NEXT:    .vsave {d8, d9} +; CHECK-NEXT:    vpush {d8, d9} +; CHECK-NEXT:    vmov q4, q0 +; CHECK-NEXT:    vmov r0, r1, d8 +; CHECK-NEXT:    bl __aeabi_d2ulz +; CHECK-NEXT:    vmov r2, r3, d9 +; CHECK-NEXT:    vmov.32 q4[0], r0 +; CHECK-NEXT:    lsrl r0, r1, #32 +; CHECK-NEXT:    vmov.32 q4[1], r0 +; CHECK-NEXT:    mov r0, r2 +; CHECK-NEXT:    mov r1, r3 +; CHECK-NEXT:    bl __aeabi_d2ulz +; CHECK-NEXT:    vmov.32 q4[2], r0 +; CHECK-NEXT:    lsrl r0, r1, #32 +; CHECK-NEXT:    vmov.32 q4[3], r0 +; CHECK-NEXT:    vmov q0, q4 +; CHECK-NEXT:    vpop {d8, d9} +; CHECK-NEXT:    pop {r7, pc} +entry: +  %out = fptoui <2 x double> %src to <2 x i64> +  ret <2 x i64> %out +} diff --git a/llvm/test/CodeGen/Thumb2/mve-vdup.ll b/llvm/test/CodeGen/Thumb2/mve-vdup.ll index 9629024f63e..3cc9cfd3f44 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vdup.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vdup.ll @@ -35,6 +35,20 @@ entry:    ret <16 x i8> %out  } +define arm_aapcs_vfpcc <2 x i64> @vdup_i64(i64 %src) { +; CHECK-LABEL: vdup_i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov.32 q0[0], r0 +; CHECK-NEXT:    vmov.32 q0[1], r1 +; CHECK-NEXT:    vmov.32 q0[2], r0 +; CHECK-NEXT:    vmov.32 q0[3], r1 +; CHECK-NEXT:    bx lr +entry: +  %0 = insertelement <2 x i64> undef, i64 %src, i32 0 +  %out = shufflevector <2 x i64> %0, <2 x i64> undef, <2 x i32> zeroinitializer +  ret <2 x i64> %out +} +  define arm_aapcs_vfpcc <4 x float> @vdup_f32_1(float %src) {  ; CHECK-LABEL: vdup_f32_1:  ; CHECK:       @ %bb.0: @ %entry @@ -80,6 +94,19 @@ entry:    ret <8 x half> %out  } +define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) { +; CHECK-LABEL: vdup_f64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    @ kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT:    vmov.f32 s2, s0 +; CHECK-NEXT:    vmov.f32 s3, s1 +; CHECK-NEXT:    bx lr +entry: +  %0 = insertelement <2 x double> undef, double %src, i32 0 +  %out = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer +  ret <2 x double> %out +} +  define arm_aapcs_vfpcc <4 x i32> @vduplane_i32(<4 x i32> %src) { @@ -115,6 +142,17 @@ entry:    ret <16 x i8> %out  } +define arm_aapcs_vfpcc <2 x i64> @vduplane_i64(<2 x i64> %src) { +; CHECK-LABEL: vduplane_i64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov.f32 s0, s2 +; CHECK-NEXT:    vmov.f32 s1, s3 +; CHECK-NEXT:    bx lr +entry: +  %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 1> +  ret <2 x i64> %out +} +  define arm_aapcs_vfpcc <4 x float> @vduplane_f32(<4 x float> %src) {  ; CHECK-LABEL: vduplane_f32:  ; CHECK:       @ %bb.0: @ %entry @@ -136,3 +174,14 @@ entry:    %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>    ret <8 x half> %out  } + +define arm_aapcs_vfpcc <2 x double> @vduplane_f64(<2 x double> %src) { +; CHECK-LABEL: vduplane_f64: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov.f32 s0, s2 +; CHECK-NEXT:    vmov.f32 s1, s3 +; CHECK-NEXT:    bx lr +entry: +  %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 1> +  ret <2 x double> %out +} diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll index 5281ecd17c3..94721a54b94 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll @@ -11,7 +11,7 @@ entry:    ret <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>  } -define arm_aapcs_vfpcc <16 x i8> @mov_int8_m1(i8 *%dest) { +define arm_aapcs_vfpcc <16 x i8> @mov_int8_m1() {  ; CHECK-LABEL: mov_int8_m1:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmov.i8 q0, #0xff @@ -20,7 +20,7 @@ entry:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>  } -define arm_aapcs_vfpcc <8 x i16> @mov_int16_1(i16 *%dest) { +define arm_aapcs_vfpcc <8 x i16> @mov_int16_1() {  ; CHECK-LABEL: mov_int16_1:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmov.i16 q0, #0x1 @@ -29,7 +29,7 @@ entry:    ret <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>  } -define arm_aapcs_vfpcc <8 x i16> @mov_int16_m1(i16 *%dest) { +define arm_aapcs_vfpcc <8 x i16> @mov_int16_m1() {  ; CHECK-LABEL: mov_int16_m1:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmov.i8 q0, #0xff @@ -38,7 +38,7 @@ entry:    ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>  } -define arm_aapcs_vfpcc <8 x i16> @mov_int16_256(i16 *%dest) { +define arm_aapcs_vfpcc <8 x i16> @mov_int16_256() {  ; CHECK-LABEL: mov_int16_256:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmov.i16 q0, #0x100 @@ -56,7 +56,7 @@ entry:    ret <8 x i16> <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>  } -define arm_aapcs_vfpcc <8 x i16> @mov_int16_258(i16 *%dest) { +define arm_aapcs_vfpcc <8 x i16> @mov_int16_258() {  ; CHECK-LABEL: mov_int16_258:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    adr r0, .LCPI6_0 @@ -73,7 +73,7 @@ entry:    ret <8 x i16> <i16 258, i16 258, i16 258, i16 258, i16 258, i16 258, i16 258, i16 258>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_1(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_1() {  ; CHECK-LABEL: mov_int32_1:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmov.i32 q0, #0x1 @@ -82,7 +82,7 @@ entry:    ret <4 x i32> <i32 1, i32 1, i32 1, i32 1>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_256(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_256() {  ; CHECK-LABEL: mov_int32_256:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmov.i32 q0, #0x100 @@ -91,7 +91,7 @@ entry:    ret <4 x i32> <i32 256, i32 256, i32 256, i32 256>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_65536(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_65536() {  ; CHECK-LABEL: mov_int32_65536:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmov.i32 q0, #0x10000 @@ -100,7 +100,7 @@ entry:    ret <4 x i32> <i32 65536, i32 65536, i32 65536, i32 65536>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777216(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777216() {  ; CHECK-LABEL: mov_int32_16777216:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmov.i32 q0, #0x1000000 @@ -109,7 +109,7 @@ entry:    ret <4 x i32> <i32 16777216, i32 16777216, i32 16777216, i32 16777216>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777217(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777217() {  ; CHECK-LABEL: mov_int32_16777217:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    adr r0, .LCPI11_0 @@ -126,7 +126,7 @@ entry:    ret <4 x i32> <i32 16777217, i32 16777217, i32 16777217, i32 16777217>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_17919(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_17919() {  ; CHECK-LABEL: mov_int32_17919:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmov.i32 q0, #0x45ff @@ -135,7 +135,7 @@ entry:    ret <4 x i32> <i32 17919, i32 17919, i32 17919, i32 17919>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_4587519(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4587519() {  ; CHECK-LABEL: mov_int32_4587519:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmov.i32 q0, #0x45ffff @@ -144,7 +144,7 @@ entry:    ret <4 x i32> <i32 4587519, i32 4587519, i32 4587519, i32 4587519>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_m1(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_m1() {  ; CHECK-LABEL: mov_int32_m1:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmov.i8 q0, #0xff @@ -153,7 +153,7 @@ entry:    ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294901760(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294901760() {  ; CHECK-LABEL: mov_int32_4294901760:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmvn.i32 q0, #0xffff @@ -162,7 +162,7 @@ entry:    ret <4 x i32> <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278190335(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278190335() {  ; CHECK-LABEL: mov_int32_4278190335:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    adr r0, .LCPI16_0 @@ -179,7 +179,7 @@ entry:    ret <4 x i32> <i32 4278190335, i32 4278190335, i32 4278190335, i32 4278190335>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278255615(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278255615() {  ; CHECK-LABEL: mov_int32_4278255615:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmvn.i32 q0, #0xff0000 @@ -188,8 +188,8 @@ entry:    ret <4 x i32> <i32 4278255615, i32 4278255615, i32 4278255615, i32 4278255615>  } -define arm_aapcs_vfpcc <4 x float> @mov_float_1(float *%dest) { -; CHECK-LABEL: mov_float_1: +define arm_aapcs_vfpcc <2 x i64> @mov_int64_1() { +; CHECK-LABEL: mov_int64_1:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    adr r0, .LCPI18_0  ; CHECK-NEXT:    vldrw.u32 q0, [r0] @@ -197,6 +197,32 @@ define arm_aapcs_vfpcc <4 x float> @mov_float_1(float *%dest) {  ; CHECK-NEXT:    .p2align 4  ; CHECK-NEXT:  @ %bb.1:  ; CHECK-NEXT:  .LCPI18_0: +; CHECK-NEXT:    .long 1 @ double 4.9406564584124654E-324 +; CHECK-NEXT:    .long 0 +; CHECK-NEXT:    .long 1 @ double 4.9406564584124654E-324 +; CHECK-NEXT:    .long 0 +entry: +  ret <2 x i64> <i64 1, i64 1> +} + +define arm_aapcs_vfpcc <2 x i64> @mov_int64_m1() { +; CHECK-LABEL: mov_int64_m1: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    vmov.i8 q0, #0xff +; CHECK-NEXT:    bx lr +entry: +  ret <2 x i64> <i64 -1, i64 -1> +} + +define arm_aapcs_vfpcc <4 x float> @mov_float_1() { +; CHECK-LABEL: mov_float_1: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    adr r0, .LCPI20_0 +; CHECK-NEXT:    vldrw.u32 q0, [r0] +; CHECK-NEXT:    bx lr +; CHECK-NEXT:    .p2align 4 +; CHECK-NEXT:  @ %bb.1: +; CHECK-NEXT:  .LCPI20_0:  ; CHECK-NEXT:    .long 1065353216 @ double 0.007812501848093234  ; CHECK-NEXT:    .long 1065353216  ; CHECK-NEXT:    .long 1065353216 @ double 0.007812501848093234 @@ -205,15 +231,15 @@ entry:    ret <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>  } -define arm_aapcs_vfpcc <4 x float> @mov_float_m3(float *%dest) { +define arm_aapcs_vfpcc <4 x float> @mov_float_m3() {  ; CHECK-LABEL: mov_float_m3:  ; CHECK:       @ %bb.0: @ %entry -; CHECK-NEXT:    adr r0, .LCPI19_0 +; CHECK-NEXT:    adr r0, .LCPI21_0  ; CHECK-NEXT:    vldrw.u32 q0, [r0]  ; CHECK-NEXT:    bx lr  ; CHECK-NEXT:    .p2align 4  ; CHECK-NEXT:  @ %bb.1: -; CHECK-NEXT:  .LCPI19_0: +; CHECK-NEXT:  .LCPI21_0:  ; CHECK-NEXT:    .long 3225419776 @ double -32.000022917985916  ; CHECK-NEXT:    .long 3225419776  ; CHECK-NEXT:    .long 3225419776 @ double -32.000022917985916 @@ -222,7 +248,7 @@ entry:    ret <4 x float> <float -3.000000e+00, float -3.000000e+00, float -3.000000e+00, float -3.000000e+00>  } -define arm_aapcs_vfpcc <8 x half> @mov_float16_1(half *%dest) { +define arm_aapcs_vfpcc <8 x half> @mov_float16_1() {  ; CHECK-LABEL: mov_float16_1:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmov.i16 q0, #0x3c00 @@ -232,7 +258,7 @@ entry:    ret <8 x half> <half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00>  } -define arm_aapcs_vfpcc <8 x half> @mov_float16_m3(half *%dest) { +define arm_aapcs_vfpcc <8 x half> @mov_float16_m3() {  ; CHECK-LABEL: mov_float16_m3:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmov.i16 q0, #0xc200 @@ -241,3 +267,20 @@ define arm_aapcs_vfpcc <8 x half> @mov_float16_m3(half *%dest) {  entry:    ret <8 x half> <half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00>  } + +define arm_aapcs_vfpcc <2 x double> @mov_double_1() { +; CHECK-LABEL: mov_double_1: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    adr r0, .LCPI24_0 +; CHECK-NEXT:    vldrw.u32 q0, [r0] +; CHECK-NEXT:    bx lr +; CHECK-NEXT:    .p2align 4 +; CHECK-NEXT:  @ %bb.1: +; CHECK-NEXT:  .LCPI24_0: +; CHECK-NEXT:    .long 0 @ double 1 +; CHECK-NEXT:    .long 1072693248 +; CHECK-NEXT:    .long 0 @ double 1 +; CHECK-NEXT:    .long 1072693248 +entry: +  ret <2 x double> <double 1.000000e+00, double 1.000000e+00> +} diff --git a/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll index fb27d895484..8f6ea13befc 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll @@ -2,7 +2,7 @@  ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s  ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s -define arm_aapcs_vfpcc <8 x i16> @mov_int16_511(i16 *%dest) { +define arm_aapcs_vfpcc <8 x i16> @mov_int16_511() {  ; CHECK-LABEL: mov_int16_511:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmvn.i16 q0, #0xfe00 @@ -11,7 +11,7 @@ entry:    ret <8 x i16> <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>  } -define arm_aapcs_vfpcc <8 x i16> @mov_int16_65281(i16 *%dest) { +define arm_aapcs_vfpcc <8 x i16> @mov_int16_65281() {  ; CHECK-LABEL: mov_int16_65281:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmvn.i16 q0, #0xfe @@ -20,7 +20,7 @@ entry:    ret <8 x i16> <i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_m7(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_m7() {  ; CHECK-LABEL: mov_int32_m7:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmvn.i32 q0, #0x6 @@ -29,7 +29,7 @@ entry:    ret <4 x i32> <i32 -7, i32 -7, i32 -7, i32 -7>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_m769(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_m769() {  ; CHECK-LABEL: mov_int32_m769:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmvn.i32 q0, #0x300 @@ -38,7 +38,7 @@ entry:    ret <4 x i32> <i32 -769, i32 -769, i32 -769, i32 -769>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_m262145(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_m262145() {  ; CHECK-LABEL: mov_int32_m262145:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmvn.i32 q0, #0x40000 @@ -47,7 +47,7 @@ entry:    ret <4 x i32> <i32 -262145, i32 -262145, i32 -262145, i32 -262145>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_m134217729(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_m134217729() {  ; CHECK-LABEL: mov_int32_m134217729:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmvn.i32 q0, #0x8000000 @@ -56,7 +56,7 @@ entry:    ret <4 x i32> <i32 -134217729, i32 -134217729, i32 -134217729, i32 -134217729>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294902528(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294902528() {  ; CHECK-LABEL: mov_int32_4294902528:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    vmvn.i32 q0, #0xfcff @@ -65,7 +65,7 @@ entry:    ret <4 x i32> <i32 4294902528, i32 4294902528, i32 4294902528, i32 4294902528>  } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278386688(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278386688() {  ; CHECK-LABEL: mov_int32_4278386688:  ; CHECK:       @ %bb.0: @ %entry  ; CHECK-NEXT:    adr r0, .LCPI7_0  | 

