summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp6
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td14
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-abs.ll47
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-bitarith.ll65
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-div-expand.ll194
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-fmath.ll250
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll57
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-frint.ll150
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-minmax.ll215
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-neg.ll23
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-sext.ll47
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-shifts.ll205
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-shuffle.ll146
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-simple-arith.ll166
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll68
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-vcvt.ll108
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-vdup.ll49
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-vmovimm.ll89
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll16
19 files changed, 1875 insertions, 40 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index a67adde262d..4eb8e0738a9 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -320,6 +320,10 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
}
+ // We can do bitwise operations on v2i64 vectors
+ setOperationAction(ISD::AND, MVT::v2i64, Legal);
+ setOperationAction(ISD::OR, MVT::v2i64, Legal);
+ setOperationAction(ISD::XOR, MVT::v2i64, Legal);
// It is legal to extload from v4i8 to v4i16 or v4i32.
addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
@@ -12855,6 +12859,8 @@ static SDValue PerformShiftCombine(SDNode *N,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!VT.isVector() || !TLI.isTypeLegal(VT))
return SDValue();
+ if (ST->hasMVEIntegerOps() && VT == MVT::v2i64)
+ return SDValue();
int64_t Cnt;
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 1880daa6294..3e7ae55c7fc 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1035,6 +1035,8 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
def : Pat<(v4i32 (vnotq (v4i32 MQPR:$val1))),
(v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
+ def : Pat<(v2i64 (vnotq (v2i64 MQPR:$val1))),
+ (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>;
}
class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
@@ -1081,6 +1083,8 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
(v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+ (v2i64 (MVE_VAND (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
(v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
@@ -1088,6 +1092,8 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
(v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+ (v2i64 (MVE_VORR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
(v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
@@ -1095,6 +1101,8 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
(v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (xor (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+ (v2i64 (MVE_VEOR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
(v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
@@ -1102,13 +1110,17 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
(v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
+ (v2i64 (MVE_VBIC (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
- def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq (v16i8 MQPR:$val2)))),
+ def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
(v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
(v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
(v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
+ (v2i64 (MVE_VORN (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
}
class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
diff --git a/llvm/test/CodeGen/Thumb2/mve-abs.ll b/llvm/test/CodeGen/Thumb2/mve-abs.ll
index e0af56abf12..6e2100e2f46 100644
--- a/llvm/test/CodeGen/Thumb2/mve-abs.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-abs.ll
@@ -36,3 +36,50 @@ entry:
%2 = select <4 x i1> %0, <4 x i32> %1, <4 x i32> %s1
ret <4 x i32> %2
}
+
+define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) {
+; CHECK-LABEL: abs_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: vmov r12, s2
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov r3, s3
+; CHECK-NEXT: vmov r1, s0
+; CHECK-NEXT: rsbs.w lr, r12, #0
+; CHECK-NEXT: sbc.w r5, r0, r3
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mov r2, lr
+; CHECK-NEXT: lsrl r2, r5, #32
+; CHECK-NEXT: mov.w r5, #0
+; CHECK-NEXT: it mi
+; CHECK-NEXT: movmi r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: it eq
+; CHECK-NEXT: moveq r2, r3
+; CHECK-NEXT: vmov r3, s1
+; CHECK-NEXT: rsbs r4, r1, #0
+; CHECK-NEXT: mov r6, r4
+; CHECK-NEXT: sbc.w r7, r0, r3
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: lsrl r6, r7, #32
+; CHECK-NEXT: it mi
+; CHECK-NEXT: movmi r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ite eq
+; CHECK-NEXT: moveq r6, r3
+; CHECK-NEXT: movne r1, r4
+; CHECK-NEXT: vmov.32 q0[0], r1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: vmov.32 q0[1], r6
+; CHECK-NEXT: it eq
+; CHECK-NEXT: moveq lr, r12
+; CHECK-NEXT: vmov.32 q0[2], lr
+; CHECK-NEXT: vmov.32 q0[3], r2
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+entry:
+ %0 = icmp slt <2 x i64> %s1, zeroinitializer
+ %1 = sub nsw <2 x i64> zeroinitializer, %s1
+ %2 = select <2 x i1> %0, <2 x i64> %1, <2 x i64> %s1
+ ret <2 x i64> %2
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-bitarith.ll b/llvm/test/CodeGen/Thumb2/mve-bitarith.ll
index 1ee57124a60..30981816922 100644
--- a/llvm/test/CodeGen/Thumb2/mve-bitarith.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-bitarith.ll
@@ -31,6 +31,16 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @and_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: and_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vand q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = and <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @or_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: or_int8_t:
@@ -62,6 +72,16 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @or_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: or_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = or <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @xor_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: xor_int8_t:
@@ -93,6 +113,16 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @xor_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: xor_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: veor q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = xor <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @v_mvn_i8(<16 x i8> %src) {
; CHECK-LABEL: v_mvn_i8:
; CHECK: @ %bb.0: @ %entry
@@ -123,6 +153,17 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @v_mvn_i64(<2 x i64> %src) {
+; CHECK-LABEL: v_mvn_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmvn q0, q0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = xor <2 x i64> %src, <i64 -1, i64 -1>
+ ret <2 x i64> %0
+}
+
+
define arm_aapcs_vfpcc <16 x i8> @v_bic_i8(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: v_bic_i8:
; CHECK: @ %bb.0: @ %entry
@@ -156,6 +197,18 @@ entry:
ret <4 x i32> %1
}
+define arm_aapcs_vfpcc <2 x i64> @v_bic_i64(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: v_bic_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vbic q0, q1, q0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = xor <2 x i64> %src1, <i64 -1, i64 -1>
+ %1 = and <2 x i64> %src2, %0
+ ret <2 x i64> %1
+}
+
+
define arm_aapcs_vfpcc <16 x i8> @v_or_i8(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: v_or_i8:
; CHECK: @ %bb.0: @ %entry
@@ -188,3 +241,15 @@ entry:
%1 = or <4 x i32> %src2, %0
ret <4 x i32> %1
}
+
+define arm_aapcs_vfpcc <2 x i64> @v_or_i64(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: v_or_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vorn q0, q1, q0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = xor <2 x i64> %src1, <i64 -1, i64 -1>
+ %1 = or <2 x i64> %src2, %0
+ ret <2 x i64> %1
+}
+
diff --git a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll
index 02f2225c177..e0dddcd273c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll
@@ -736,6 +736,144 @@ entry:
ret <16 x i8> %out
}
+define arm_aapcs_vfpcc <2 x i64> @udiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: udiv_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, s20
+; CHECK-NEXT: vmov r1, s21
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: bl __aeabi_uldivmod
+; CHECK-NEXT: vmov r12, s22
+; CHECK-NEXT: vmov lr, s23
+; CHECK-NEXT: vmov r2, s18
+; CHECK-NEXT: vmov r3, s19
+; CHECK-NEXT: vmov.32 q4[0], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[1], r0
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: mov r1, lr
+; CHECK-NEXT: bl __aeabi_uldivmod
+; CHECK-NEXT: vmov.32 q4[2], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[3], r0
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = udiv <2 x i64> %in1, %in2
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @sdiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: sdiv_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, s20
+; CHECK-NEXT: vmov r1, s21
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: bl __aeabi_ldivmod
+; CHECK-NEXT: vmov r12, s22
+; CHECK-NEXT: vmov lr, s23
+; CHECK-NEXT: vmov r2, s18
+; CHECK-NEXT: vmov r3, s19
+; CHECK-NEXT: vmov.32 q4[0], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[1], r0
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: mov r1, lr
+; CHECK-NEXT: bl __aeabi_ldivmod
+; CHECK-NEXT: vmov.32 q4[2], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[3], r0
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = sdiv <2 x i64> %in1, %in2
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @urem_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: urem_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, s20
+; CHECK-NEXT: vmov r1, s21
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: bl __aeabi_uldivmod
+; CHECK-NEXT: vmov r12, s18
+; CHECK-NEXT: vmov lr, s19
+; CHECK-NEXT: vmov.32 q4[0], r2
+; CHECK-NEXT: vmov r0, s22
+; CHECK-NEXT: vmov.32 q4[1], r3
+; CHECK-NEXT: vmov r1, s23
+; CHECK-NEXT: mov r2, r12
+; CHECK-NEXT: mov r3, lr
+; CHECK-NEXT: bl __aeabi_uldivmod
+; CHECK-NEXT: vmov.32 q4[2], r2
+; CHECK-NEXT: vmov.32 q4[3], r3
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = urem <2 x i64> %in1, %in2
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @srem_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: srem_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, s20
+; CHECK-NEXT: vmov r1, s21
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: bl __aeabi_ldivmod
+; CHECK-NEXT: vmov r12, s18
+; CHECK-NEXT: vmov lr, s19
+; CHECK-NEXT: vmov.32 q4[0], r2
+; CHECK-NEXT: vmov r0, s22
+; CHECK-NEXT: vmov.32 q4[1], r3
+; CHECK-NEXT: vmov r1, s23
+; CHECK-NEXT: mov r2, r12
+; CHECK-NEXT: mov r3, lr
+; CHECK-NEXT: bl __aeabi_ldivmod
+; CHECK-NEXT: vmov.32 q4[2], r2
+; CHECK-NEXT: vmov.32 q4[3], r3
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = srem <2 x i64> %in1, %in2
+ ret <2 x i64> %out
+}
+
+
+
define arm_aapcs_vfpcc <4 x float> @fdiv_f32(<4 x float> %in1, <4 x float> %in2) {
; CHECK-LABEL: fdiv_f32:
@@ -992,3 +1130,59 @@ entry:
%out = frem <8 x half> %in1, %in2
ret <8 x half> %out
}
+
+define arm_aapcs_vfpcc <2 x double> @fdiv_f64(<2 x double> %in1, <2 x double> %in2) {
+; CHECK-LABEL: fdiv_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d11
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: bl __aeabi_ddiv
+; CHECK-NEXT: vmov lr, r12, d10
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl __aeabi_ddiv
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = fdiv <2 x double> %in1, %in2
+ ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @frem_f64(<2 x double> %in1, <2 x double> %in2) {
+; CHECK-LABEL: frem_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d11
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: bl fmod
+; CHECK-NEXT: vmov lr, r12, d10
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl fmod
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = frem <2 x double> %in1, %in2
+ ret <2 x double> %out
+}
+
+
diff --git a/llvm/test/CodeGen/Thumb2/mve-fmath.ll b/llvm/test/CodeGen/Thumb2/mve-fmath.ll
index 41054e2d34d..31956979dd9 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fmath.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fmath.ll
@@ -66,6 +66,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @sqrt_float64_t(<2 x double> %src) {
+; CHECK-LABEL: sqrt_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl sqrt
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl sqrt
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @cos_float32_t(<4 x float> %src) {
; CHECK-LABEL: cos_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -198,6 +222,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @cos_float64_t(<2 x double> %src) {
+; CHECK-LABEL: cos_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl cos
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl cos
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @sin_float32_t(<4 x float> %src) {
; CHECK-LABEL: sin_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -330,6 +378,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @sin_float64_t(<2 x double> %src) {
+; CHECK-LABEL: sin_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl sin
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl sin
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) {
; CHECK-LABEL: exp_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -462,6 +534,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @exp_float64_t(<2 x double> %src) {
+; CHECK-LABEL: exp_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl exp
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl exp
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @exp2_float32_t(<4 x float> %src) {
; CHECK-LABEL: exp2_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -594,6 +690,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @exp2_float64_t(<2 x double> %src) {
+; CHECK-LABEL: exp2_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl exp2
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl exp2
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @log_float32_t(<4 x float> %src) {
; CHECK-LABEL: log_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -726,6 +846,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @log_float64_t(<2 x double> %src) {
+; CHECK-LABEL: log_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl log
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl log
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @log2_float32_t(<4 x float> %src) {
; CHECK-LABEL: log2_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -858,6 +1002,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @log2_float64_t(<2 x double> %src) {
+; CHECK-LABEL: log2_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl log2
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl log2
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @log10_float32_t(<4 x float> %src) {
; CHECK-LABEL: log10_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -990,6 +1158,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @log10_float64_t(<2 x double> %src) {
+; CHECK-LABEL: log10_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl log10
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl log10
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @pow_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-LABEL: pow_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -1165,6 +1357,33 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @pow_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: pow_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d11
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: bl pow
+; CHECK-NEXT: vmov lr, r12, d10
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl pow
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %src1, <2 x double> %src2)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @copysign_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-LABEL: copysign_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -1340,6 +1559,27 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @copysign_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: copysign_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r0, r1, d3
+; CHECK-NEXT: vmov r0, lr, d2
+; CHECK-NEXT: vmov r0, r3, d1
+; CHECK-NEXT: vmov r12, r2, d0
+; CHECK-NEXT: lsrs r1, r1, #31
+; CHECK-NEXT: bfi r3, r1, #31, #1
+; CHECK-NEXT: lsr.w r1, lr, #31
+; CHECK-NEXT: bfi r2, r1, #31, #1
+; CHECK-NEXT: vmov d1, r0, r3
+; CHECK-NEXT: vmov d0, r12, r2
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> %src1, <2 x double> %src2)
+ ret <2 x double> %0
+}
+
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
declare <4 x float> @llvm.cos.v4f32(<4 x float>)
declare <4 x float> @llvm.sin.v4f32(<4 x float>)
@@ -1360,4 +1600,14 @@ declare <8 x half> @llvm.log2.v8f16(<8 x half>)
declare <8 x half> @llvm.log10.v8f16(<8 x half>)
declare <8 x half> @llvm.pow.v8f16(<8 x half>, <8 x half>)
declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
+declare <2 x double> @llvm.cos.v2f64(<2 x double>)
+declare <2 x double> @llvm.sin.v2f64(<2 x double>)
+declare <2 x double> @llvm.exp.v2f64(<2 x double>)
+declare <2 x double> @llvm.exp2.v2f64(<2 x double>)
+declare <2 x double> @llvm.log.v2f64(<2 x double>)
+declare <2 x double> @llvm.log2.v2f64(<2 x double>)
+declare <2 x double> @llvm.log10.v2f64(<2 x double>)
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
index 2489646ad6d..d1fd4b26762 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
@@ -76,6 +76,39 @@ entry:
ret <4 x float> %0
}
+define arm_aapcs_vfpcc <2 x double> @fneg_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fneg_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r7, lr}
+; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vldr d0, .LCPI2_0
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: vmov r4, r5, d0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl __aeabi_dsub
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl __aeabi_dsub
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI2_0:
+; CHECK-NEXT: .long 0 @ double -0
+; CHECK-NEXT: .long 2147483648
+entry:
+ %0 = fsub nnan ninf nsz <2 x double> <double 0.0e0, double 0.0e0>, %src
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <8 x half> @fabs_float16_t(<8 x half> %src) {
; CHECK-MVE-LABEL: fabs_float16_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -150,6 +183,30 @@ entry:
ret <4 x float> %0
}
+define arm_aapcs_vfpcc <2 x double> @fabs_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fabs_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vldr d2, .LCPI5_0
+; CHECK-NEXT: vmov r12, r3, d0
+; CHECK-NEXT: vmov r0, r1, d2
+; CHECK-NEXT: vmov r0, r2, d1
+; CHECK-NEXT: lsrs r1, r1, #31
+; CHECK-NEXT: bfi r2, r1, #31, #1
+; CHECK-NEXT: bfi r3, r1, #31, #1
+; CHECK-NEXT: vmov d1, r0, r2
+; CHECK-NEXT: vmov d0, r12, r3
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI5_0:
+; CHECK-NEXT: .long 0 @ double 0
+; CHECK-NEXT: .long 0
+entry:
+ %0 = call nnan ninf nsz <2 x double> @llvm.fabs.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-frint.ll b/llvm/test/CodeGen/Thumb2/mve-frint.ll
index 847d7ede1d7..e1758d5ed3b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-frint.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-frint.ll
@@ -76,6 +76,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @fceil_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fceil_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl ceil
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl ceil
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.ceil.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @ftrunc_float32_t(<4 x float> %src) {
; CHECK-MVE-LABEL: ftrunc_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -150,6 +174,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @ftrunc_float64_t(<2 x double> %src) {
+; CHECK-LABEL: ftrunc_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl trunc
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl trunc
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.trunc.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @frint_float32_t(<4 x float> %src) {
; CHECK-MVE-LABEL: frint_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -224,6 +272,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @frint_float64_t(<2 x double> %src) {
+; CHECK-LABEL: frint_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl rint
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl rint
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.rint.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @fnearbyint_float32_t(<4 x float> %src) {
; CHECK-LABEL: fnearbyint_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -288,6 +360,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @fnearbyint_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fnearbyint_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl nearbyint
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl nearbyint
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.nearbyint.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @ffloor_float32_t(<4 x float> %src) {
; CHECK-MVE-LABEL: ffloor_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -362,6 +458,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @ffloor_float64_t(<2 x double> %src) {
+; CHECK-LABEL: ffloor_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl floor
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl floor
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @fround_float32_t(<4 x float> %src) {
; CHECK-MVE-LABEL: fround_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -436,6 +556,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @fround_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fround_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl round
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl round
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.round.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
declare <4 x float> @llvm.rint.v4f32(<4 x float>)
@@ -448,3 +592,9 @@ declare <8 x half> @llvm.rint.v8f16(<8 x half>)
declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
declare <8 x half> @llvm.floor.v8f16(<8 x half>)
declare <8 x half> @llvm.round.v8f16(<8 x half>)
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
+declare <2 x double> @llvm.rint.v2f64(<2 x double>)
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
+declare <2 x double> @llvm.floor.v2f64(<2 x double>)
+declare <2 x double> @llvm.round.v2f64(<2 x double>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-minmax.ll b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
index 38990d35717..0b6308f8800 100644
--- a/llvm/test/CodeGen/Thumb2/mve-minmax.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
@@ -35,6 +35,49 @@ entry:
ret <4 x i32> %1
}
+define arm_aapcs_vfpcc <2 x i64> @smin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: smin_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s6
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov r3, s2
+; CHECK-NEXT: vmov r12, s7
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: vmov lr, s1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: vmov r3, s0
+; CHECK-NEXT: vmov r2, s4
+; CHECK-NEXT: sbcs.w r1, r1, r12
+; CHECK-NEXT: vmov r12, s5
+; CHECK-NEXT: mov.w r1, #0
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: sbcs.w r2, lr, r12
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q2[0], r0
+; CHECK-NEXT: vmov.32 q2[1], r0
+; CHECK-NEXT: vmov.32 q2[2], r1
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vbic q1, q1, q2
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = icmp slt <2 x i64> %s1, %s2
+ %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+ ret <2 x i64> %1
+}
+
define arm_aapcs_vfpcc <16 x i8> @umin_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
; CHECK-LABEL: umin_v16i8:
; CHECK: @ %bb.0: @ %entry
@@ -68,6 +111,49 @@ entry:
ret <4 x i32> %1
}
+define arm_aapcs_vfpcc <2 x i64> @umin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: umin_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s6
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov r3, s2
+; CHECK-NEXT: vmov r12, s7
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: vmov lr, s1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: vmov r3, s0
+; CHECK-NEXT: vmov r2, s4
+; CHECK-NEXT: sbcs.w r1, r1, r12
+; CHECK-NEXT: vmov r12, s5
+; CHECK-NEXT: mov.w r1, #0
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: sbcs.w r2, lr, r12
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q2[0], r0
+; CHECK-NEXT: vmov.32 q2[1], r0
+; CHECK-NEXT: vmov.32 q2[2], r1
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vbic q1, q1, q2
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = icmp ult <2 x i64> %s1, %s2
+ %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+ ret <2 x i64> %1
+}
+
define arm_aapcs_vfpcc <16 x i8> @smax_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
; CHECK-LABEL: smax_v16i8:
@@ -102,6 +188,49 @@ entry:
ret <4 x i32> %1
}
+define arm_aapcs_vfpcc <2 x i64> @smax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: smax_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov r3, s6
+; CHECK-NEXT: vmov r12, s3
+; CHECK-NEXT: vmov r1, s7
+; CHECK-NEXT: vmov lr, s5
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: vmov r3, s4
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: sbcs.w r1, r1, r12
+; CHECK-NEXT: vmov r12, s1
+; CHECK-NEXT: mov.w r1, #0
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: sbcs.w r2, lr, r12
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q2[0], r0
+; CHECK-NEXT: vmov.32 q2[1], r0
+; CHECK-NEXT: vmov.32 q2[2], r1
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vbic q1, q1, q2
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = icmp sgt <2 x i64> %s1, %s2
+ %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+ ret <2 x i64> %1
+}
+
define arm_aapcs_vfpcc <16 x i8> @umax_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
; CHECK-LABEL: umax_v16i8:
; CHECK: @ %bb.0: @ %entry
@@ -135,6 +264,49 @@ entry:
ret <4 x i32> %1
}
+define arm_aapcs_vfpcc <2 x i64> @umax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: umax_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov r3, s6
+; CHECK-NEXT: vmov r12, s3
+; CHECK-NEXT: vmov r1, s7
+; CHECK-NEXT: vmov lr, s5
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: vmov r3, s4
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: sbcs.w r1, r1, r12
+; CHECK-NEXT: vmov r12, s1
+; CHECK-NEXT: mov.w r1, #0
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: sbcs.w r2, lr, r12
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q2[0], r0
+; CHECK-NEXT: vmov.32 q2[1], r0
+; CHECK-NEXT: vmov.32 q2[2], r1
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vbic q1, q1, q2
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = icmp ugt <2 x i64> %s1, %s2
+ %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+ ret <2 x i64> %1
+}
+
define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-MVE-LABEL: maxnm_float32_t:
@@ -227,3 +399,46 @@ entry:
%0 = select <8 x i1> %cmp, <8 x half> %src1, <8 x half> %src2
ret <8 x half> %0
}
+
+define arm_aapcs_vfpcc <2 x double> @maxnm_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: maxnm_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov r2, r3, d11
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: vmov r2, r3, d10
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q0[0], r0
+; CHECK-NEXT: vmov.32 q0[1], r0
+; CHECK-NEXT: vmov.32 q0[2], r4
+; CHECK-NEXT: vmov.32 q0[3], r4
+; CHECK-NEXT: vbic q1, q5, q0
+; CHECK-NEXT: vand q0, q4, q0
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r4, pc}
+entry:
+ %cmp = fcmp fast ogt <2 x double> %src2, %src1
+ %0 = select <2 x i1> %cmp, <2 x double> %src2, <2 x double> %src1
+ ret <2 x double> %0
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-neg.ll b/llvm/test/CodeGen/Thumb2/mve-neg.ll
index f1c4352e3ed..602ce3d5f9b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-neg.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-neg.ll
@@ -30,3 +30,26 @@ entry:
%0 = sub nsw <4 x i32> zeroinitializer, %s1
ret <4 x i32> %0
}
+
+define arm_aapcs_vfpcc <2 x i64> @neg_v2i64(<2 x i64> %s1) {
+; CHECK-LABEL: neg_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s2
+; CHECK-NEXT: mov.w r12, #0
+; CHECK-NEXT: vmov r0, s3
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: vmov r3, s1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: sbc.w r0, r12, r0
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: sbc.w r3, r12, r3
+; CHECK-NEXT: vmov.32 q0[0], r2
+; CHECK-NEXT: vmov.32 q0[1], r3
+; CHECK-NEXT: vmov.32 q0[2], r1
+; CHECK-NEXT: vmov.32 q0[3], r0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = sub nsw <2 x i64> zeroinitializer, %s1
+ ret <2 x i64> %0
+}
+
diff --git a/llvm/test/CodeGen/Thumb2/mve-sext.ll b/llvm/test/CodeGen/Thumb2/mve-sext.ll
index 9458fdc47e5..452e68405fe 100644
--- a/llvm/test/CodeGen/Thumb2/mve-sext.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-sext.ll
@@ -32,6 +32,24 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @sext_v2i32_v2i64(<2 x i32> %src) {
+; CHECK-LABEL: sext_v2i32_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.32 q1[0], r0
+; CHECK-NEXT: asrs r0, r0, #31
+; CHECK-NEXT: vmov.32 q1[1], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q1[2], r0
+; CHECK-NEXT: asrs r0, r0, #31
+; CHECK-NEXT: vmov.32 q1[3], r0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = sext <2 x i32> %src to <2 x i64>
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <8 x i16> @zext_v8i8_v8i16(<8 x i8> %src) {
; CHECK-LABEL: zext_v8i8_v8i16:
@@ -64,6 +82,25 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) {
+; CHECK-LABEL: zext_v2i32_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: adr r0, .LCPI7_0
+; CHECK-NEXT: vldrw.u32 q1, [r0]
+; CHECK-NEXT: vand q0, q0, q1
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI7_0:
+; CHECK-NEXT: .long 4294967295 @ 0xffffffff
+; CHECK-NEXT: .long 0 @ 0x0
+; CHECK-NEXT: .long 4294967295 @ 0xffffffff
+; CHECK-NEXT: .long 0 @ 0x0
+entry:
+ %0 = zext <2 x i32> %src to <2 x i64>
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <8 x i8> @trunc_v8i16_v8i8(<8 x i16> %src) {
; CHECK-LABEL: trunc_v8i16_v8i8:
@@ -91,3 +128,13 @@ entry:
%0 = trunc <4 x i32> %src to <4 x i8>
ret <4 x i8> %0
}
+
+define arm_aapcs_vfpcc <2 x i32> @trunc_v2i64_v2i32(<2 x i64> %src) {
+; CHECK-LABEL: trunc_v2i64_v2i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %0 = trunc <2 x i64> %src to <2 x i32>
+ ret <2 x i32> %0
+}
+
diff --git a/llvm/test/CodeGen/Thumb2/mve-shifts.ll b/llvm/test/CodeGen/Thumb2/mve-shifts.ll
index 4e6e5ae7f6a..a321c2dd383 100644
--- a/llvm/test/CodeGen/Thumb2/mve-shifts.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-shifts.ll
@@ -31,6 +31,28 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shl_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: shl_qq_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov r0, s6
+; CHECK-NEXT: vmov.32 q2[0], r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov.32 q2[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov.32 q2[2], r2
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = shl <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shru_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: shru_qq_int8_t:
@@ -65,6 +87,30 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shru_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: shru_qq_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r2, s4
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: lsll r0, r1, r2
+; CHECK-NEXT: vmov r2, s6
+; CHECK-NEXT: vmov.32 q2[0], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q2[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: lsll r0, r1, r2
+; CHECK-NEXT: vmov.32 q2[2], r0
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = lshr <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shrs_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: shrs_qq_int8_t:
@@ -99,6 +145,28 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shrs_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: shrs_qq_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: asrl r2, r1, r0
+; CHECK-NEXT: vmov r0, s6
+; CHECK-NEXT: vmov.32 q2[0], r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov.32 q2[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: asrl r2, r1, r0
+; CHECK-NEXT: vmov.32 q2[2], r2
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = ashr <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shl_qi_int8_t(<16 x i8> %src1) {
; CHECK-LABEL: shl_qi_int8_t:
@@ -130,6 +198,26 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shl_qi_int64_t(<2 x i64> %src1) {
+; CHECK-LABEL: shl_qi_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: lsll r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[0], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: lsll r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[2], r0
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = shl <2 x i64> %src1, <i64 4, i64 4>
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shru_qi_int8_t(<16 x i8> %src1) {
; CHECK-LABEL: shru_qi_int8_t:
@@ -161,6 +249,26 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shru_qi_int64_t(<2 x i64> %src1) {
+; CHECK-LABEL: shru_qi_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: lsrl r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[0], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: lsrl r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[2], r0
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = lshr <2 x i64> %src1, <i64 4, i64 4>
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shrs_qi_int8_t(<16 x i8> %src1) {
; CHECK-LABEL: shrs_qi_int8_t:
@@ -192,6 +300,25 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shrs_qi_int64_t(<2 x i64> %src1) {
+; CHECK-LABEL: shrs_qi_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: asrl r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[0], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: asrl r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[2], r0
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = ashr <2 x i64> %src1, <i64 4, i64 4>
+ ret <2 x i64> %0
+}
define arm_aapcs_vfpcc <16 x i8> @shl_qr_int8_t(<16 x i8> %src1, i8 %src2) {
@@ -230,6 +357,28 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shl_qr_int64_t(<2 x i64> %src1, i64 %src2) {
+; CHECK-LABEL: shl_qr_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[0], r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[2], r2
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <2 x i64> undef, i64 %src2, i32 0
+ %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
+ %0 = shl <2 x i64> %src1, %s
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shru_qr_int8_t(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: shru_qr_int8_t:
@@ -273,6 +422,29 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shru_qr_int64_t(<2 x i64> %src1, i64 %src2) {
+; CHECK-LABEL: shru_qr_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: rsbs r0, r0, #0
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[0], r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[2], r2
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <2 x i64> undef, i64 %src2, i32 0
+ %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
+ %0 = lshr <2 x i64> %src1, %s
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shrs_qr_int8_t(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: shrs_qr_int8_t:
@@ -316,17 +488,38 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shrs_qr_int64_t(<2 x i64> %src1, i64 %src2) {
+; CHECK-LABEL: shrs_qr_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: asrl r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[0], r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: asrl r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[2], r2
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <2 x i64> undef, i64 %src2, i32 0
+ %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
+ %0 = ashr <2 x i64> %src1, %s
+ ret <2 x i64> %0
+}
define arm_aapcs_vfpcc <16 x i8> @shl_qiv_int8_t(<16 x i8> %src1) {
; CHECK-LABEL: shl_qiv_int8_t:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adr r0, .LCPI27_0
+; CHECK-NEXT: adr r0, .LCPI36_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vshl.u8 q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI27_0:
+; CHECK-NEXT: .LCPI36_0:
; CHECK-NEXT: .byte 1 @ 0x1
; CHECK-NEXT: .byte 2 @ 0x2
; CHECK-NEXT: .byte 3 @ 0x3
@@ -351,13 +544,13 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @shl_qiv_int16_t(<8 x i16> %src1) {
; CHECK-LABEL: shl_qiv_int16_t:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adr r0, .LCPI28_0
+; CHECK-NEXT: adr r0, .LCPI37_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vshl.u16 q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI28_0:
+; CHECK-NEXT: .LCPI37_0:
; CHECK-NEXT: .short 1 @ 0x1
; CHECK-NEXT: .short 2 @ 0x2
; CHECK-NEXT: .short 3 @ 0x3
@@ -374,13 +567,13 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @shl_qiv_int32_t(<4 x i32> %src1) {
; CHECK-LABEL: shl_qiv_int32_t:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adr r0, .LCPI29_0
+; CHECK-NEXT: adr r0, .LCPI38_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vshl.u32 q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI29_0:
+; CHECK-NEXT: .LCPI38_0:
; CHECK-NEXT: .long 1 @ 0x1
; CHECK-NEXT: .long 2 @ 0x2
; CHECK-NEXT: .long 3 @ 0x3
diff --git a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
index 87d209b5d2f..b5c96f9f86d 100644
--- a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
@@ -262,6 +262,38 @@ entry:
ret <16 x i8> %out
}
+define arm_aapcs_vfpcc <2 x i64> @shuffle1_i64(<2 x i64> %src) {
+; CHECK-LABEL: shuffle1_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @shuffle2_i64(<2 x i64> %src) {
+; CHECK-LABEL: shuffle2_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 s4, s2
+; CHECK-NEXT: vmov.f32 s5, s3
+; CHECK-NEXT: vmov.f32 s6, s0
+; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @shuffle3_i64(<2 x i64> %src) {
+; CHECK-LABEL: shuffle3_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 undef, i32 1>
+ ret <2 x i64> %out
+}
+
define arm_aapcs_vfpcc <4 x float> @shuffle1_f32(<4 x float> %src) {
; CHECK-LABEL: shuffle1_f32:
; CHECK: @ %bb.0: @ %entry
@@ -390,6 +422,38 @@ entry:
ret <8 x half> %out
}
+define arm_aapcs_vfpcc <2 x double> @shuffle1_f64(<2 x double> %src) {
+; CHECK-LABEL: shuffle1_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @shuffle2_f64(<2 x double> %src) {
+; CHECK-LABEL: shuffle2_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 s4, s2
+; CHECK-NEXT: vmov.f32 s5, s3
+; CHECK-NEXT: vmov.f32 s6, s0
+; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+ ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @shuffle3_f64(<2 x double> %src) {
+; CHECK-LABEL: shuffle3_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 undef, i32 1>
+ ret <2 x double> %out
+}
+
define arm_aapcs_vfpcc <4 x i32> @insert_i32(i32 %a) {
; CHECK-LABEL: insert_i32:
@@ -421,6 +485,17 @@ entry:
ret <16 x i8> %res
}
+define arm_aapcs_vfpcc <2 x i64> @insert_i64(i64 %a) {
+; CHECK-LABEL: insert_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.32 q0[0], r0
+; CHECK-NEXT: vmov.32 q0[1], r1
+; CHECK-NEXT: bx lr
+entry:
+ %res = insertelement <2 x i64> undef, i64 %a, i32 0
+ ret <2 x i64> %res
+}
+
define arm_aapcs_vfpcc <4 x float> @insert_f32(float %a) {
; CHECK-LABEL: insert_f32:
; CHECK: @ %bb.0: @ %entry
@@ -443,12 +518,35 @@ entry:
ret <8 x half> %res
}
+define arm_aapcs_vfpcc <2 x double> @insert_f64(double %a) {
+; CHECK-LABEL: insert_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r6, r7, lr}
+; CHECK-NEXT: push {r4, r6, r7, lr}
+; CHECK-NEXT: .setfp r7, sp, #8
+; CHECK-NEXT: add r7, sp, #8
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: mov r4, sp
+; CHECK-NEXT: bfc r4, #0, #4
+; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: sub.w r4, r7, #8
+; CHECK-NEXT: vstr d0, [sp]
+; CHECK-NEXT: mov r0, sp
+; CHECK-NEXT: vldrw.u32 q0, [r0]
+; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: pop {r4, r6, r7, pc}
+entry:
+ %res = insertelement <2 x double> undef, double %a, i32 0
+ ret <2 x double> %res
+}
+
define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {
; CHECK-LABEL: scalar_to_vector_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: adr r1, .LCPI30_0
+; CHECK-NEXT: adr r1, .LCPI38_0
; CHECK-NEXT: vmov.u16 r0, q0[0]
; CHECK-NEXT: vldrw.u32 q1, [r1]
; CHECK-NEXT: vmov.32 q0[0], r0
@@ -461,7 +559,7 @@ define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI30_0:
+; CHECK-NEXT: .LCPI38_0:
; CHECK-NEXT: .zero 4
; CHECK-NEXT: .long 7 @ 0x7
; CHECK-NEXT: .long 1 @ 0x1
@@ -533,6 +631,28 @@ entry:
ret i8 %res
}
+define arm_aapcs_vfpcc i64 @extract_i64_0(<2 x i64> %a) {
+; CHECK-LABEL: extract_i64_0:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: bx lr
+entry:
+ %res = extractelement <2 x i64> %a, i32 0
+ ret i64 %res
+}
+
+define arm_aapcs_vfpcc i64 @extract_i64_1(<2 x i64> %a) {
+; CHECK-LABEL: extract_i64_1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: bx lr
+entry:
+ %res = extractelement <2 x i64> %a, i32 1
+ ret i64 %res
+}
+
define arm_aapcs_vfpcc float @extract_f32_0(<4 x float> %a) {
; CHECK-LABEL: extract_f32_0:
; CHECK: @ %bb.0: @ %entry
@@ -576,3 +696,25 @@ entry:
%res = extractelement <8 x half> %a, i32 3
ret half %res
}
+
+define arm_aapcs_vfpcc double @extract_f64_0(<2 x double> %a) {
+; CHECK-LABEL: extract_f64_0:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bx lr
+entry:
+ %res = extractelement <2 x double> %a, i32 0
+ ret double %res
+}
+
+define arm_aapcs_vfpcc double @extract_f64_1(<2 x double> %a) {
+; CHECK-LABEL: extract_f64_1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 s0, s2
+; CHECK-NEXT: vmov.f32 s1, s3
+; CHECK-NEXT: bx lr
+entry:
+ %res = extractelement <2 x double> %a, i32 1
+ ret double %res
+}
+
diff --git a/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll b/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll
index 570aae3f21b..ecad0c1c5df 100644
--- a/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll
@@ -32,6 +32,33 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @add_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: add_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s6
+; CHECK-NEXT: vmov r3, s2
+; CHECK-NEXT: vmov r0, s7
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: adds.w lr, r3, r2
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: vmov r3, s1
+; CHECK-NEXT: adc.w r12, r1, r0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov r1, s5
+; CHECK-NEXT: adds r0, r0, r2
+; CHECK-NEXT: adcs r1, r3
+; CHECK-NEXT: vmov.32 q0[0], r0
+; CHECK-NEXT: vmov.32 q0[1], r1
+; CHECK-NEXT: vmov.32 q0[2], lr
+; CHECK-NEXT: vmov.32 q0[3], r12
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = add nsw <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @add_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-MVE-LABEL: add_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -122,6 +149,33 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @add_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: add_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov r2, r3, d11
+; CHECK-NEXT: bl __aeabi_dadd
+; CHECK-NEXT: vmov lr, r12, d8
+; CHECK-NEXT: vmov r2, r3, d10
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl __aeabi_dadd
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = fadd nnan ninf nsz <2 x double> %src2, %src1
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @sub_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: sub_int8_t:
@@ -153,6 +207,33 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @sub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: sub_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov r3, s6
+; CHECK-NEXT: vmov r0, s3
+; CHECK-NEXT: vmov r1, s7
+; CHECK-NEXT: subs.w lr, r3, r2
+; CHECK-NEXT: vmov r2, s4
+; CHECK-NEXT: vmov r3, s5
+; CHECK-NEXT: sbc.w r12, r1, r0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbc.w r1, r3, r1
+; CHECK-NEXT: vmov.32 q0[0], r0
+; CHECK-NEXT: vmov.32 q0[1], r1
+; CHECK-NEXT: vmov.32 q0[2], lr
+; CHECK-NEXT: vmov.32 q0[3], r12
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = sub nsw <2 x i64> %src2, %src1
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @sub_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-MVE-LABEL: sub_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -243,6 +324,34 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @sub_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: sub_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov r2, r3, d11
+; CHECK-NEXT: bl __aeabi_dsub
+; CHECK-NEXT: vmov lr, r12, d8
+; CHECK-NEXT: vmov r2, r3, d10
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl __aeabi_dsub
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = fsub nnan ninf nsz <2 x double> %src2, %src1
+ ret <2 x double> %0
+}
+
+
define arm_aapcs_vfpcc <16 x i8> @mul_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: mul_int8_t:
; CHECK: @ %bb.0: @ %entry
@@ -273,6 +382,35 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @mul_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: mul_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r7, lr}
+; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov r1, s0
+; CHECK-NEXT: vmov r2, s5
+; CHECK-NEXT: umull r12, r3, r1, r0
+; CHECK-NEXT: mla lr, r1, r2, r3
+; CHECK-NEXT: vmov r3, s6
+; CHECK-NEXT: vmov r1, s2
+; CHECK-NEXT: vmov r2, s7
+; CHECK-NEXT: umull r4, r5, r1, r3
+; CHECK-NEXT: mla r1, r1, r2, r5
+; CHECK-NEXT: vmov r2, s1
+; CHECK-NEXT: mla r0, r2, r0, lr
+; CHECK-NEXT: vmov r2, s3
+; CHECK-NEXT: vmov.32 q0[0], r12
+; CHECK-NEXT: vmov.32 q0[1], r0
+; CHECK-NEXT: vmov.32 q0[2], r4
+; CHECK-NEXT: mla r1, r2, r3, r1
+; CHECK-NEXT: vmov.32 q0[3], r1
+; CHECK-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %0 = mul nsw <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <8 x half> @mul_float16_t(<8 x half> %src1, <8 x half> %src2) {
; CHECK-MVE-LABEL: mul_float16_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -362,3 +500,31 @@ entry:
%0 = fmul nnan ninf nsz <4 x float> %src2, %src1
ret <4 x float> %0
}
+
+define arm_aapcs_vfpcc <2 x double> @mul_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: mul_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov r2, r3, d11
+; CHECK-NEXT: bl __aeabi_dmul
+; CHECK-NEXT: vmov lr, r12, d8
+; CHECK-NEXT: vmov r2, r3, d10
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl __aeabi_dmul
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = fmul nnan ninf nsz <2 x double> %src2, %src1
+ ret <2 x double> %0
+}
+
diff --git a/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll b/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll
index 794f7ba20c2..37ca5a2f202 100644
--- a/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll
@@ -50,6 +50,39 @@ entry:
ret <4 x i32> %sum
}
+define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK-FP-LABEL: vector_add_i64:
+; CHECK-FP: @ %bb.0: @ %entry
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov d1, r2, r3
+; CHECK-FP-NEXT: vmov d0, r0, r1
+; CHECK-FP-NEXT: add r0, sp, #8
+; CHECK-FP-NEXT: vldrw.u32 q1, [r0]
+; CHECK-FP-NEXT: vmov r1, s2
+; CHECK-FP-NEXT: vmov r0, s3
+; CHECK-FP-NEXT: vmov r3, s6
+; CHECK-FP-NEXT: vmov r2, s7
+; CHECK-FP-NEXT: adds.w lr, r1, r3
+; CHECK-FP-NEXT: vmov r3, s0
+; CHECK-FP-NEXT: vmov r1, s4
+; CHECK-FP-NEXT: adc.w r12, r0, r2
+; CHECK-FP-NEXT: vmov r2, s1
+; CHECK-FP-NEXT: vmov r0, s5
+; CHECK-FP-NEXT: adds r1, r1, r3
+; CHECK-FP-NEXT: vmov.32 q0[0], r1
+; CHECK-FP-NEXT: adcs r0, r2
+; CHECK-FP-NEXT: vmov.32 q0[1], r0
+; CHECK-FP-NEXT: vmov.32 q0[2], lr
+; CHECK-FP-NEXT: vmov.32 q0[3], r12
+; CHECK-FP-NEXT: vmov r0, r1, d0
+; CHECK-FP-NEXT: vmov r2, r3, d1
+; CHECK-FP-NEXT: pop {r7, pc}
+entry:
+ %sum = add <2 x i64> %lhs, %rhs
+ ret <2 x i64> %sum
+}
+
define <8 x half> @vector_add_f16(<8 x half> %lhs, <8 x half> %rhs) {
; CHECK-FP-LABEL: vector_add_f16:
; CHECK-FP: @ %bb.0: @ %entry
@@ -81,3 +114,38 @@ entry:
%sum = fadd <4 x float> %lhs, %rhs
ret <4 x float> %sum
}
+
+define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK-FP-LABEL: vector_add_f64:
+; CHECK-FP: @ %bb.0: @ %entry
+; CHECK-FP-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-FP-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-FP-NEXT: .pad #4
+; CHECK-FP-NEXT: sub sp, #4
+; CHECK-FP-NEXT: .vsave {d8, d9}
+; CHECK-FP-NEXT: vpush {d8, d9}
+; CHECK-FP-NEXT: mov r5, r0
+; CHECK-FP-NEXT: add r0, sp, #40
+; CHECK-FP-NEXT: vldrw.u32 q4, [r0]
+; CHECK-FP-NEXT: mov r4, r2
+; CHECK-FP-NEXT: mov r6, r3
+; CHECK-FP-NEXT: mov r7, r1
+; CHECK-FP-NEXT: vmov r2, r3, d9
+; CHECK-FP-NEXT: mov r0, r4
+; CHECK-FP-NEXT: mov r1, r6
+; CHECK-FP-NEXT: bl __aeabi_dadd
+; CHECK-FP-NEXT: vmov r2, r3, d8
+; CHECK-FP-NEXT: vmov d9, r0, r1
+; CHECK-FP-NEXT: mov r0, r5
+; CHECK-FP-NEXT: mov r1, r7
+; CHECK-FP-NEXT: bl __aeabi_dadd
+; CHECK-FP-NEXT: vmov d8, r0, r1
+; CHECK-FP-NEXT: vmov r2, r3, d9
+; CHECK-FP-NEXT: vmov r0, r1, d8
+; CHECK-FP-NEXT: vpop {d8, d9}
+; CHECK-FP-NEXT: add sp, #4
+; CHECK-FP-NEXT: pop {r4, r5, r6, r7, pc}
+entry:
+ %sum = fadd <2 x double> %lhs, %rhs
+ ret <2 x double> %sum
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll
index 55d354ae639..524ec692c8c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll
@@ -317,3 +317,111 @@ entry:
%out = fptoui <8 x half> %src to <8 x i16>
ret <8 x i16> %out
}
+
+define arm_aapcs_vfpcc <2 x double> @foo_float_int64(<2 x i64> %src) {
+; CHECK-LABEL: foo_float_int64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vmov r1, s19
+; CHECK-NEXT: bl __aeabi_l2d
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl __aeabi_l2d
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = sitofp <2 x i64> %src to <2 x double>
+ ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @foo_float_uint64(<2 x i64> %src) {
+; CHECK-LABEL: foo_float_uint64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vmov r1, s19
+; CHECK-NEXT: bl __aeabi_ul2d
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl __aeabi_ul2d
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = uitofp <2 x i64> %src to <2 x double>
+ ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @foo_int64_float(<2 x double> %src) {
+; CHECK-LABEL: foo_int64_float:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: vmov.32 q4[0], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[1], r0
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: vmov.32 q4[2], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[3], r0
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = fptosi <2 x double> %src to <2 x i64>
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @foo_uint64_float(<2 x double> %src) {
+; CHECK-LABEL: foo_uint64_float:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: vmov.32 q4[0], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[1], r0
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: vmov.32 q4[2], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[3], r0
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = fptoui <2 x double> %src to <2 x i64>
+ ret <2 x i64> %out
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vdup.ll b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
index 9629024f63e..3cc9cfd3f44 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vdup.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
@@ -35,6 +35,20 @@ entry:
ret <16 x i8> %out
}
+define arm_aapcs_vfpcc <2 x i64> @vdup_i64(i64 %src) {
+; CHECK-LABEL: vdup_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.32 q0[0], r0
+; CHECK-NEXT: vmov.32 q0[1], r1
+; CHECK-NEXT: vmov.32 q0[2], r0
+; CHECK-NEXT: vmov.32 q0[3], r1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = insertelement <2 x i64> undef, i64 %src, i32 0
+ %out = shufflevector <2 x i64> %0, <2 x i64> undef, <2 x i32> zeroinitializer
+ ret <2 x i64> %out
+}
+
define arm_aapcs_vfpcc <4 x float> @vdup_f32_1(float %src) {
; CHECK-LABEL: vdup_f32_1:
; CHECK: @ %bb.0: @ %entry
@@ -80,6 +94,19 @@ entry:
ret <8 x half> %out
}
+define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) {
+; CHECK-LABEL: vdup_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: vmov.f32 s2, s0
+; CHECK-NEXT: vmov.f32 s3, s1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = insertelement <2 x double> undef, double %src, i32 0
+ %out = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer
+ ret <2 x double> %out
+}
+
define arm_aapcs_vfpcc <4 x i32> @vduplane_i32(<4 x i32> %src) {
@@ -115,6 +142,17 @@ entry:
ret <16 x i8> %out
}
+define arm_aapcs_vfpcc <2 x i64> @vduplane_i64(<2 x i64> %src) {
+; CHECK-LABEL: vduplane_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 s0, s2
+; CHECK-NEXT: vmov.f32 s1, s3
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %out
+}
+
define arm_aapcs_vfpcc <4 x float> @vduplane_f32(<4 x float> %src) {
; CHECK-LABEL: vduplane_f32:
; CHECK: @ %bb.0: @ %entry
@@ -136,3 +174,14 @@ entry:
%out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
ret <8 x half> %out
}
+
+define arm_aapcs_vfpcc <2 x double> @vduplane_f64(<2 x double> %src) {
+; CHECK-LABEL: vduplane_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 s0, s2
+; CHECK-NEXT: vmov.f32 s1, s3
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x double> %out
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
index 5281ecd17c3..94721a54b94 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
@@ -11,7 +11,7 @@ entry:
ret <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
}
-define arm_aapcs_vfpcc <16 x i8> @mov_int8_m1(i8 *%dest) {
+define arm_aapcs_vfpcc <16 x i8> @mov_int8_m1() {
; CHECK-LABEL: mov_int8_m1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i8 q0, #0xff
@@ -20,7 +20,7 @@ entry:
ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
}
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_1(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_1() {
; CHECK-LABEL: mov_int16_1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i16 q0, #0x1
@@ -29,7 +29,7 @@ entry:
ret <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
}
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_m1(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_m1() {
; CHECK-LABEL: mov_int16_m1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i8 q0, #0xff
@@ -38,7 +38,7 @@ entry:
ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
}
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_256(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_256() {
; CHECK-LABEL: mov_int16_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i16 q0, #0x100
@@ -56,7 +56,7 @@ entry:
ret <8 x i16> <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>
}
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_258(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_258() {
; CHECK-LABEL: mov_int16_258:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI6_0
@@ -73,7 +73,7 @@ entry:
ret <8 x i16> <i16 258, i16 258, i16 258, i16 258, i16 258, i16 258, i16 258, i16 258>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_1(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_1() {
; CHECK-LABEL: mov_int32_1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x1
@@ -82,7 +82,7 @@ entry:
ret <4 x i32> <i32 1, i32 1, i32 1, i32 1>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_256(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_256() {
; CHECK-LABEL: mov_int32_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x100
@@ -91,7 +91,7 @@ entry:
ret <4 x i32> <i32 256, i32 256, i32 256, i32 256>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_65536(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_65536() {
; CHECK-LABEL: mov_int32_65536:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x10000
@@ -100,7 +100,7 @@ entry:
ret <4 x i32> <i32 65536, i32 65536, i32 65536, i32 65536>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777216(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777216() {
; CHECK-LABEL: mov_int32_16777216:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x1000000
@@ -109,7 +109,7 @@ entry:
ret <4 x i32> <i32 16777216, i32 16777216, i32 16777216, i32 16777216>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777217(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777217() {
; CHECK-LABEL: mov_int32_16777217:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI11_0
@@ -126,7 +126,7 @@ entry:
ret <4 x i32> <i32 16777217, i32 16777217, i32 16777217, i32 16777217>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_17919(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_17919() {
; CHECK-LABEL: mov_int32_17919:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x45ff
@@ -135,7 +135,7 @@ entry:
ret <4 x i32> <i32 17919, i32 17919, i32 17919, i32 17919>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4587519(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4587519() {
; CHECK-LABEL: mov_int32_4587519:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x45ffff
@@ -144,7 +144,7 @@ entry:
ret <4 x i32> <i32 4587519, i32 4587519, i32 4587519, i32 4587519>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m1(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m1() {
; CHECK-LABEL: mov_int32_m1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i8 q0, #0xff
@@ -153,7 +153,7 @@ entry:
ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294901760(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294901760() {
; CHECK-LABEL: mov_int32_4294901760:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0xffff
@@ -162,7 +162,7 @@ entry:
ret <4 x i32> <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278190335(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278190335() {
; CHECK-LABEL: mov_int32_4278190335:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI16_0
@@ -179,7 +179,7 @@ entry:
ret <4 x i32> <i32 4278190335, i32 4278190335, i32 4278190335, i32 4278190335>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278255615(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278255615() {
; CHECK-LABEL: mov_int32_4278255615:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0xff0000
@@ -188,8 +188,8 @@ entry:
ret <4 x i32> <i32 4278255615, i32 4278255615, i32 4278255615, i32 4278255615>
}
-define arm_aapcs_vfpcc <4 x float> @mov_float_1(float *%dest) {
-; CHECK-LABEL: mov_float_1:
+define arm_aapcs_vfpcc <2 x i64> @mov_int64_1() {
+; CHECK-LABEL: mov_int64_1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI18_0
; CHECK-NEXT: vldrw.u32 q0, [r0]
@@ -197,6 +197,32 @@ define arm_aapcs_vfpcc <4 x float> @mov_float_1(float *%dest) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI18_0:
+; CHECK-NEXT: .long 1 @ double 4.9406564584124654E-324
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1 @ double 4.9406564584124654E-324
+; CHECK-NEXT: .long 0
+entry:
+ ret <2 x i64> <i64 1, i64 1>
+}
+
+define arm_aapcs_vfpcc <2 x i64> @mov_int64_m1() {
+; CHECK-LABEL: mov_int64_m1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i8 q0, #0xff
+; CHECK-NEXT: bx lr
+entry:
+ ret <2 x i64> <i64 -1, i64 -1>
+}
+
+define arm_aapcs_vfpcc <4 x float> @mov_float_1() {
+; CHECK-LABEL: mov_float_1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: adr r0, .LCPI20_0
+; CHECK-NEXT: vldrw.u32 q0, [r0]
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI20_0:
; CHECK-NEXT: .long 1065353216 @ double 0.007812501848093234
; CHECK-NEXT: .long 1065353216
; CHECK-NEXT: .long 1065353216 @ double 0.007812501848093234
@@ -205,15 +231,15 @@ entry:
ret <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
}
-define arm_aapcs_vfpcc <4 x float> @mov_float_m3(float *%dest) {
+define arm_aapcs_vfpcc <4 x float> @mov_float_m3() {
; CHECK-LABEL: mov_float_m3:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adr r0, .LCPI19_0
+; CHECK-NEXT: adr r0, .LCPI21_0
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI19_0:
+; CHECK-NEXT: .LCPI21_0:
; CHECK-NEXT: .long 3225419776 @ double -32.000022917985916
; CHECK-NEXT: .long 3225419776
; CHECK-NEXT: .long 3225419776 @ double -32.000022917985916
@@ -222,7 +248,7 @@ entry:
ret <4 x float> <float -3.000000e+00, float -3.000000e+00, float -3.000000e+00, float -3.000000e+00>
}
-define arm_aapcs_vfpcc <8 x half> @mov_float16_1(half *%dest) {
+define arm_aapcs_vfpcc <8 x half> @mov_float16_1() {
; CHECK-LABEL: mov_float16_1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i16 q0, #0x3c00
@@ -232,7 +258,7 @@ entry:
ret <8 x half> <half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00>
}
-define arm_aapcs_vfpcc <8 x half> @mov_float16_m3(half *%dest) {
+define arm_aapcs_vfpcc <8 x half> @mov_float16_m3() {
; CHECK-LABEL: mov_float16_m3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i16 q0, #0xc200
@@ -241,3 +267,20 @@ define arm_aapcs_vfpcc <8 x half> @mov_float16_m3(half *%dest) {
entry:
ret <8 x half> <half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00>
}
+
+define arm_aapcs_vfpcc <2 x double> @mov_double_1() {
+; CHECK-LABEL: mov_double_1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: adr r0, .LCPI24_0
+; CHECK-NEXT: vldrw.u32 q0, [r0]
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI24_0:
+; CHECK-NEXT: .long 0 @ double 1
+; CHECK-NEXT: .long 1072693248
+; CHECK-NEXT: .long 0 @ double 1
+; CHECK-NEXT: .long 1072693248
+entry:
+ ret <2 x double> <double 1.000000e+00, double 1.000000e+00>
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll
index fb27d895484..8f6ea13befc 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll
@@ -2,7 +2,7 @@
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_511(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_511() {
; CHECK-LABEL: mov_int16_511:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i16 q0, #0xfe00
@@ -11,7 +11,7 @@ entry:
ret <8 x i16> <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
}
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_65281(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_65281() {
; CHECK-LABEL: mov_int16_65281:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i16 q0, #0xfe
@@ -20,7 +20,7 @@ entry:
ret <8 x i16> <i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m7(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m7() {
; CHECK-LABEL: mov_int32_m7:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0x6
@@ -29,7 +29,7 @@ entry:
ret <4 x i32> <i32 -7, i32 -7, i32 -7, i32 -7>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m769(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m769() {
; CHECK-LABEL: mov_int32_m769:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0x300
@@ -38,7 +38,7 @@ entry:
ret <4 x i32> <i32 -769, i32 -769, i32 -769, i32 -769>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m262145(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m262145() {
; CHECK-LABEL: mov_int32_m262145:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0x40000
@@ -47,7 +47,7 @@ entry:
ret <4 x i32> <i32 -262145, i32 -262145, i32 -262145, i32 -262145>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m134217729(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m134217729() {
; CHECK-LABEL: mov_int32_m134217729:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0x8000000
@@ -56,7 +56,7 @@ entry:
ret <4 x i32> <i32 -134217729, i32 -134217729, i32 -134217729, i32 -134217729>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294902528(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294902528() {
; CHECK-LABEL: mov_int32_4294902528:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0xfcff
@@ -65,7 +65,7 @@ entry:
ret <4 x i32> <i32 4294902528, i32 4294902528, i32 4294902528, i32 4294902528>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278386688(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278386688() {
; CHECK-LABEL: mov_int32_4278386688:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI7_0
OpenPOWER on IntegriCloud