diff options
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsAArch64.td | 32 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 42 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/SVEInstrFormats.td | 19 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll | 273 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/sve-int-div-pred.ll | 91 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/sve-int-log-pred.ll | 140 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll | 199 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll | 134 |
8 files changed, 901 insertions, 29 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 15030d0cd18..e4913148aa9 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -834,6 +834,11 @@ class AdvSIMD_Pred2VectorArg_Intrinsic [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; +class AdvSIMD_Pred3VectorArg_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; + // // Integer arithmetic @@ -843,7 +848,32 @@ def int_aarch64_sve_add : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_sub : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_subr : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_bic : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_and : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_or : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_xor : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_bic : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_bic_pred : AdvSIMD_Pred2VectorArg_Intrinsic; + +def int_aarch64_sve_mul : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_smulh : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umulh : AdvSIMD_Pred2VectorArg_Intrinsic; + +def int_aarch64_sve_sdiv : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_udiv : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sdivr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_udivr : AdvSIMD_Pred2VectorArg_Intrinsic; + +def int_aarch64_sve_smax : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umax : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_smin : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umin : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sabd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uabd : AdvSIMD_Pred2VectorArg_Intrinsic; + +def int_aarch64_sve_mad : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_msb : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_mla : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_mls : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 8d8ec199f8a..a732c6e1e31 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -34,10 +34,10 @@ let Predicates = [HasSVE] in { defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", int_aarch64_sve_sub>; defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", int_aarch64_sve_subr>; - defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", null_frag>; - defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", null_frag>; - defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", null_frag>; - defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", null_frag>; + defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", int_aarch64_sve_or>; + defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", int_aarch64_sve_xor>; + defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", int_aarch64_sve_and>; + defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", int_aarch64_sve_bic_pred>; defm ADD_ZI : sve_int_arith_imm0<0b000, "add">; defm SUB_ZI : sve_int_arith_imm0<0b001, "sub">; @@ -47,10 +47,10 @@ let Predicates = [HasSVE] in { defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub">; defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub">; - defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad">; - defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb">; - defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla">; - defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls">; + defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", int_aarch64_sve_mad>; + defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", int_aarch64_sve_msb>; + defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", int_aarch64_sve_mla>; + defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls>; // SVE predicated integer reductions. defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv">; @@ -73,14 +73,14 @@ let Predicates = [HasSVE] in { defm UMIN_ZI : sve_int_arith_imm1<0b11, "umin", imm0_255>; defm MUL_ZI : sve_int_arith_imm2<"mul">; - defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", null_frag>; - defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", null_frag>; - defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", null_frag>; + defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>; + defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", int_aarch64_sve_smulh>; + defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", int_aarch64_sve_umulh>; - defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", null_frag>; - defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", null_frag>; - defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", null_frag>; - defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", null_frag>; + defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", int_aarch64_sve_sdiv>; + defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", int_aarch64_sve_udiv>; + defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", int_aarch64_sve_sdivr>; + defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", int_aarch64_sve_udivr>; defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", int_aarch64_sve_sdot>; defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", int_aarch64_sve_udot>; @@ -105,12 +105,12 @@ let Predicates = [HasSVE] in { defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs">; defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg">; - defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", null_frag>; - defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", null_frag>; - defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", null_frag>; - defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", null_frag>; - defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", null_frag>; - defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", null_frag>; + defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", int_aarch64_sve_smax>; + defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", int_aarch64_sve_umax>; + defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", int_aarch64_sve_smin>; + defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", int_aarch64_sve_umin>; + defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", int_aarch64_sve_sabd>; + defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", int_aarch64_sve_uabd>; defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe">; defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 8dc4efc7c88..467b573627f 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -298,6 +298,11 @@ class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1, : Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)), (inst $Op1, $Op2, $Op3)>; +class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1, + ValueType vt2, ValueType vt3, ValueType vt4, Instruction inst> +: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)), + (inst $Op1, $Op2, $Op3, $Op4)>; + def SVEDup0Undef : ComplexPattern<i64, 0, "SelectDupZeroOrUndef", []>; //===----------------------------------------------------------------------===// @@ -1926,11 +1931,16 @@ class sve_int_mladdsub_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm, let ElementSize = zprty.ElementSize; } -multiclass sve_int_mladdsub_vvv_pred<bits<1> opc, string asm> { +multiclass sve_int_mladdsub_vvv_pred<bits<1> opc, string asm, SDPatternOperator op> { def _B : sve_int_mladdsub_vvv_pred<0b00, opc, asm, ZPR8>; def _H : sve_int_mladdsub_vvv_pred<0b01, opc, asm, ZPR16>; def _S : sve_int_mladdsub_vvv_pred<0b10, opc, asm, ZPR32>; def _D : sve_int_mladdsub_vvv_pred<0b11, opc, asm, ZPR64>; + + def : SVE_4_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>; + def : SVE_4_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_4_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_4_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>; } class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm, @@ -1958,11 +1968,16 @@ class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm, let ElementSize = zprty.ElementSize; } -multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm> { +multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op> { def _B : sve_int_mlas_vvv_pred<0b00, opc, asm, ZPR8>; def _H : sve_int_mlas_vvv_pred<0b01, opc, asm, ZPR16>; def _S : sve_int_mlas_vvv_pred<0b10, opc, asm, ZPR32>; def _D : sve_int_mlas_vvv_pred<0b11, opc, asm, ZPR64>; + + def : SVE_4_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>; + def : SVE_4_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_4_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_4_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll index 7f642e7a146..d0660e733d7 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll @@ -40,9 +40,6 @@ define <vscale x 2 x i64> @add_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, ret <vscale x 2 x i64> %out } - - - define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { ; CHECK-LABEL: sub_i8: ; CHECK: sub z0.b, p0/m, z0.b, z1.b @@ -83,8 +80,6 @@ define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, ret <vscale x 2 x i64> %out } - - define <vscale x 16 x i8> @subr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { ; CHECK-LABEL: subr_i8: ; CHECK: subr z0.b, p0/m, z0.b, z1.b @@ -125,7 +120,245 @@ define <vscale x 2 x i64> @subr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a ret <vscale x 2 x i64> %out } +define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: smax_i8: +; CHECK: smax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: smax_i16: +; CHECK: smax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} +define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: smax_i32: +; CHECK: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: smax_i64: +; CHECK: smax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: umax_i8: +; CHECK: umax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: umax_i16: +; CHECK: umax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: umax_i32: +; CHECK: umax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: umax_i64: +; CHECK: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: smin_i8: +; CHECK: smin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: smin_i16: +; CHECK: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: smin_i32: +; CHECK: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: smin_i64: +; CHECK: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: umin_i8: +; CHECK: umin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: umin_i16: +; CHECK: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: umin_i32: +; CHECK: umin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: umin_i64: +; CHECK: umin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +define <vscale x 16 x i8> @sabd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: sabd_i8: +; CHECK: sabd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @sabd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: sabd_i16: +; CHECK: sabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @sabd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: sabd_i32: +; CHECK: sabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @sabd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: sabd_i64: +; CHECK: sabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +define <vscale x 16 x i8> @uabd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: uabd_i8: +; CHECK: uabd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uabd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: uabd_i16: +; CHECK: uabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uabd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: uabd_i32: +; CHECK: uabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @uabd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: uabd_i64: +; CHECK: uabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} declare <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) declare <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) @@ -141,3 +374,33 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1>, < declare <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) declare <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) declare <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) diff --git a/llvm/test/CodeGen/AArch64/sve-int-div-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-div-pred.ll new file mode 100644 index 00000000000..dd25f27ab4e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-int-div-pred.ll @@ -0,0 +1,91 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: sdiv_i32: +; CHECK: sdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: sdiv_i64: +; CHECK: sdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +define <vscale x 4 x i32> @udiv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: udiv_i32: +; CHECK: udiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @udiv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: udiv_i64: +; CHECK: udiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.udiv.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +define <vscale x 4 x i32> @sdivr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: sdivr_i32: +; CHECK: sdivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @sdivr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: sdivr_i64: +; CHECK: sdivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdivr.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +define <vscale x 4 x i32> @udivr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: udivr_i32: +; CHECK: udivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @udivr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: udivr_i64: +; CHECK: udivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.udivr.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.udiv.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sdivr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.udivr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + diff --git a/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll new file mode 100644 index 00000000000..5e12981fd67 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll @@ -0,0 +1,140 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define <vscale x 16 x i8> @and_pred_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: and_pred_i8: +; CHECK: and z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv2i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @and_pred_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: and_pred_i16: +; CHECK: and z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv2i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + + +define <vscale x 4 x i32> @and_pred_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: and_pred_i32: +; CHECK: and z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv2i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @and_pred_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: and_pred_i64: +; CHECK: and z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + + +define <vscale x 16 x i8> @or_pred_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: or_pred_i8: +; CHECK: orr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.or.nxv2i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @or_pred_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: or_pred_i16: +; CHECK: orr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.or.nxv2i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + + +define <vscale x 4 x i32> @or_pred_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: or_pred_i32: +; CHECK: orr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.or.nxv2i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @or_pred_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: or_pred_i64: +; CHECK: orr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.or.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + + +define <vscale x 16 x i8> @xor_pred_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: xor_pred_i8: +; CHECK: eor z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.xor.nxv2i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @xor_pred_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: xor_pred_i16: +; CHECK: eor z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.xor.nxv2i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + + +define <vscale x 4 x i32> @xor_pred_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: xor_pred_i32: +; CHECK: eor z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.xor.nxv2i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @xor_pred_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: xor_pred_i64: +; CHECK: eor z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.xor.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + + +declare <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv2i8(<vscale x 16 x i1>,<vscale x 16 x i8>,<vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv2i16(<vscale x 8 x i1>,<vscale x 8 x i16>,<vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv2i32(<vscale x 4 x i1>,<vscale x 4 x i32>,<vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1>,<vscale x 2 x i64>,<vscale x 2 x i64>) +declare <vscale x 16 x i8> @llvm.aarch64.sve.or.nxv2i8(<vscale x 16 x i1>,<vscale x 16 x i8>,<vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.or.nxv2i16(<vscale x 8 x i1>,<vscale x 8 x i16>,<vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.or.nxv2i32(<vscale x 4 x i1>,<vscale x 4 x i32>,<vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.or.nxv2i64(<vscale x 2 x i1>,<vscale x 2 x i64>,<vscale x 2 x i64>) +declare <vscale x 16 x i8> @llvm.aarch64.sve.xor.nxv2i8(<vscale x 16 x i1>,<vscale x 16 x i8>,<vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.xor.nxv2i16(<vscale x 8 x i1>,<vscale x 8 x i16>,<vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.xor.nxv2i32(<vscale x 4 x i1>,<vscale x 4 x i32>,<vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.xor.nxv2i64(<vscale x 2 x i1>,<vscale x 2 x i64>,<vscale x 2 x i64>) diff --git a/llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll new file mode 100644 index 00000000000..30dc76daa16 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll @@ -0,0 +1,199 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define <vscale x 16 x i8> @mad_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: mad_i8: +; CHECK: mad z0.b, p0/m, z1.b, z2.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.mad.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, + <vscale x 16 x i8> %c) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @mad_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { +; CHECK-LABEL: mad_i16: +; CHECK: mad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.mad.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b, + <vscale x 8 x i16> %c) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @mad_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { +; CHECK-LABEL: mad_i32: +; CHECK: mad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mad.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b, + <vscale x 4 x i32> %c) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @mad_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { +; CHECK-LABEL: mad_i64: +; CHECK: mad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mad.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b, + <vscale x 2 x i64> %c) + ret <vscale x 2 x i64> %out +} + +define <vscale x 16 x i8> @msb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: msb_i8: +; CHECK: msb z0.b, p0/m, z1.b, z2.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.msb.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, + <vscale x 16 x i8> %c) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @msb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { +; CHECK-LABEL: msb_i16: +; CHECK: msb z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.msb.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b, + <vscale x 8 x i16> %c) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @msb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { +; CHECK-LABEL: msb_i32: +; CHECK: msb z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.msb.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b, + <vscale x 4 x i32> %c) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @msb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { +; CHECK-LABEL: msb_i64: +; CHECK: msb z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.msb.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b, + <vscale x 2 x i64> %c) + ret <vscale x 2 x i64> %out +} + + +define <vscale x 16 x i8> @mla_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: mla_i8: +; CHECK: mla z0.b, p0/m, z1.b, z2.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, + <vscale x 16 x i8> %c) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @mla_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { +; CHECK-LABEL: mla_i16: +; CHECK: mla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b, + <vscale x 8 x i16> %c) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @mla_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { +; CHECK-LABEL: mla_i32: +; CHECK: mla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b, + <vscale x 4 x i32> %c) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @mla_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { +; CHECK-LABEL: mla_i64: +; CHECK: mla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b, + <vscale x 2 x i64> %c) + ret <vscale x 2 x i64> %out +} + + +define <vscale x 16 x i8> @mls_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: mls_i8: +; CHECK: mls z0.b, p0/m, z1.b, z2.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, + <vscale x 16 x i8> %c) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @mls_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { +; CHECK-LABEL: mls_i16: +; CHECK: mls z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b, + <vscale x 8 x i16> %c) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @mls_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { +; CHECK-LABEL: mls_i32: +; CHECK: mls z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b, + <vscale x 4 x i32> %c) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @mls_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { +; CHECK-LABEL: mls_i64: +; CHECK: mls z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b, + <vscale x 2 x i64> %c) + ret <vscale x 2 x i64> %out +} + +declare <vscale x 16 x i8> @llvm.aarch64.sve.mad.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.mad.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.mad.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.mad.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.msb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.msb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.msb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.msb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>) diff --git a/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll new file mode 100644 index 00000000000..287a3372907 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll @@ -0,0 +1,134 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define <vscale x 16 x i8> @mul_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: mul_i8: +; CHECK: mul z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @mul_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: mul_i16: +; CHECK: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @mul_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: mul_i32: +; CHECK: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @mul_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: mul_i64: +; CHECK: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: smulh_i8: +; CHECK: smulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: smulh_i16: +; CHECK: smulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: smulh_i32: +; CHECK: smulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: smulh_i64: +; CHECK: smulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: umulh_i8: +; CHECK: umulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: umulh_i16: +; CHECK: umulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: umulh_i32: +; CHECK: umulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: umulh_i64: +; CHECK: umulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) |