diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/include/llvm/IR/IntrinsicsAArch64.td | 39 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrFormats.td | 24 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 32 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/SVEInstrFormats.td | 25 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll | 512 |
5 files changed, 610 insertions, 22 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index ffafe83d25f..fa1a3c918be 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1021,6 +1021,17 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [LLVMSubdivide2VectorType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; + class SVE2_1VectorArg_Imm_Narrowing_Intrinsic + : Intrinsic<[LLVMSubdivide2VectorType<0>], + [llvm_anyvector_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<1>]>; + + class SVE2_2VectorArg_Imm_Narrowing_Intrinsic + : Intrinsic<[LLVMSubdivide2VectorType<0>], + [LLVMSubdivide2VectorType<0>, llvm_anyvector_ty, + llvm_i32_ty], + [IntrNoMem, ImmArg<2>]>; + // NOTE: There is no relationship between these intrinsics beyond an attempt // to reuse currently identical class definitions. class AdvSIMD_SVE_LOGB_Intrinsic : AdvSIMD_SVE_CNT_Intrinsic; @@ -1559,4 +1570,32 @@ def int_aarch64_sve_subhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic; def int_aarch64_sve_rsubhnb : SVE2_2VectorArg_Narrowing_Intrinsic; def int_aarch64_sve_rsubhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic; + +// Narrowing shift right +def int_aarch64_sve_shrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_shrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; + +def int_aarch64_sve_rshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_rshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; + +// Saturating shift right - signed input/output +def int_aarch64_sve_sqshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_sqshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; + +def int_aarch64_sve_sqrshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_sqrshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; + +// Saturating shift right - unsigned input/output +def int_aarch64_sve_uqshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_uqshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; + +def int_aarch64_sve_uqrshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_uqrshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; + +// Saturating shift right - signed input, unsigned output +def int_aarch64_sve_sqshrunb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_sqshrunt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; + +def int_aarch64_sve_sqrshrunb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_sqrshrunt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 222365fd787..4ac52a48b3a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -624,6 +624,30 @@ def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm1_32Operand; } +// Same as vecshiftR#N, but use TargetConstant (TimmLeaf) instead of Constant +// (ImmLeaf) +def tvecshiftR8 : Operand<i32>, TImmLeaf<i32, [{ + return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9); +}]> { + let EncoderMethod = "getVecShiftR8OpValue"; + let DecoderMethod = "DecodeVecShiftR8Imm"; + let ParserMatchClass = Imm1_8Operand; +} +def tvecshiftR16 : Operand<i32>, TImmLeaf<i32, [{ + return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17); +}]> { + let EncoderMethod = "getVecShiftR16OpValue"; + let DecoderMethod = "DecodeVecShiftR16Imm"; + let ParserMatchClass = Imm1_16Operand; +} +def tvecshiftR32 : Operand<i32>, TImmLeaf<i32, [{ + return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33); +}]> { + let EncoderMethod = "getVecShiftR32OpValue"; + let DecoderMethod = "DecodeVecShiftR32Imm"; + let ParserMatchClass = Imm1_32Operand; +} + def Imm0_1Operand : AsmImmRange<0, 1>; def Imm0_7Operand : AsmImmRange<0, 7>; def Imm0_15Operand : AsmImmRange<0, 15>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 92bc59a3b72..42d8c36277b 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1426,24 +1426,24 @@ let Predicates = [HasSVE2] in { defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">; // SVE2 bitwise shift right narrow (bottom) - defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb">; - defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb">; - defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb">; - defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb">; - defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb">; - defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb">; - defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb">; - defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb">; + defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb", int_aarch64_sve_sqshrunb>; + defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb", int_aarch64_sve_sqrshrunb>; + defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb", int_aarch64_sve_shrnb>; + defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb", int_aarch64_sve_rshrnb>; + defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb", int_aarch64_sve_sqshrnb>; + defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb", int_aarch64_sve_sqrshrnb>; + defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb", int_aarch64_sve_uqshrnb>; + defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb", int_aarch64_sve_uqrshrnb>; // SVE2 bitwise shift right narrow (top) - defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt">; - defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt">; - defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt">; - defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt">; - defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt">; - defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt">; - defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt">; - defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt">; + defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt", int_aarch64_sve_sqshrunt>; + defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt", int_aarch64_sve_sqrshrunt>; + defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt", int_aarch64_sve_shrnt>; + defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt", int_aarch64_sve_rshrnt>; + defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt", int_aarch64_sve_sqshrnt>; + defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt", int_aarch64_sve_sqrshrnt>; + defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt", int_aarch64_sve_uqshrnt>; + defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt", int_aarch64_sve_uqrshrnt>; // SVE2 integer add/subtract narrow high part (bottom) defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb", int_aarch64_sve_addhnb>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 63214215add..31823118bff 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -334,6 +334,11 @@ class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1, : Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)), (inst $Op1, $Op2, $Op3, $Op4)>; +class SVE_2_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1, + ValueType vt2, Operand ImmTy, Instruction inst> +: Pat<(vtd (op vt1:$Op1, (vt2 ImmTy:$Op2))), + (inst $Op1, ImmTy:$Op2)>; + class SVE_3_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1, ValueType vt2, ValueType vt3, Operand ImmTy, Instruction inst> @@ -2965,17 +2970,21 @@ class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc, let Inst{4-0} = Zd; } -multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm> { +multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm, + SDPatternOperator op> { def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16, - vecshiftR8>; + tvecshiftR8>; def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32, - vecshiftR16> { + tvecshiftR16> { let Inst{19} = imm{3}; } def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64, vecshiftR32> { let Inst{20-19} = imm{4-3}; } + def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; } class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc, @@ -3001,17 +3010,21 @@ class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc, let Constraints = "$Zd = $_Zd"; } -multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm> { +multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm, + SDPatternOperator op> { def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16, - vecshiftR8>; + tvecshiftR8>; def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32, - vecshiftR16> { + tvecshiftR16> { let Inst{19} = imm{3}; } def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64, vecshiftR32> { let Inst{20-19} = imm{4-3}; } + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; } class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm, diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll new file mode 100644 index 00000000000..27f7d71c578 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll @@ -0,0 +1,512 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; +; SHRNB +; + +define <vscale x 16 x i8> @shrnb_h(<vscale x 8 x i16> %a) { +; CHECK-LABEL: shrnb_h: +; CHECK: shrnb z0.b, z0.h, #8 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shrnb.nxv8i16(<vscale x 8 x i16> %a, + i32 8) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @shrnb_s(<vscale x 4 x i32> %a) { +; CHECK-LABEL: shrnb_s: +; CHECK: shrnb z0.h, z0.s, #16 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shrnb.nxv4i32(<vscale x 4 x i32> %a, + i32 16) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @shrnb_d(<vscale x 2 x i64> %a) { +; CHECK-LABEL: shrnb_d: +; CHECK: shrnb z0.s, z0.d, #32 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shrnb.nxv2i64(<vscale x 2 x i64> %a, + i32 32) + ret <vscale x 4 x i32> %out +} + +; +; UQSHRNB +; + +define <vscale x 16 x i8> @uqshrnb_h(<vscale x 8 x i16> %a) { +; CHECK-LABEL: uqshrnb_h: +; CHECK: uqshrnb z0.b, z0.h, #1 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshrnb.nxv8i16(<vscale x 8 x i16> %a, + i32 1) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uqshrnb_s(<vscale x 4 x i32> %a) { +; CHECK-LABEL: uqshrnb_s: +; CHECK: uqshrnb z0.h, z0.s, #1 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshrnb.nxv4i32(<vscale x 4 x i32> %a, + i32 1) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uqshrnb_d(<vscale x 2 x i64> %a) { +; CHECK-LABEL: uqshrnb_d: +; CHECK: uqshrnb z0.s, z0.d, #1 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshrnb.nxv2i64(<vscale x 2 x i64> %a, + i32 1) + ret <vscale x 4 x i32> %out +} + +; +; SQSHRNB +; + +define <vscale x 16 x i8> @sqshrnb_h(<vscale x 8 x i16> %a) { +; CHECK-LABEL: sqshrnb_h: +; CHECK: sqshrnb z0.b, z0.h, #1 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrnb.nxv8i16(<vscale x 8 x i16> %a, + i32 1) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @sqshrnb_s(<vscale x 4 x i32> %a) { +; CHECK-LABEL: sqshrnb_s: +; CHECK: sqshrnb z0.h, z0.s, #1 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrnb.nxv4i32(<vscale x 4 x i32> %a, + i32 1) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @sqshrnb_d(<vscale x 2 x i64> %a) { +; CHECK-LABEL: sqshrnb_d: +; CHECK: sqshrnb z0.s, z0.d, #1 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrnb.nxv2i64(<vscale x 2 x i64> %a, + i32 1) + ret <vscale x 4 x i32> %out +} + +; +; SQSHRUNB +; + +define <vscale x 16 x i8> @sqshrunb_h(<vscale x 8 x i16> %a) { +; CHECK-LABEL: qshrunb_h: +; CHECK: sqshrunb z0.b, z0.h, #7 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrunb.nxv8i16(<vscale x 8 x i16> %a, + i32 7) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @sqshrunb_s(<vscale x 4 x i32> %a) { +; CHECK-LABEL: sqshrunb_s: +; CHECK: sqshrunb z0.h, z0.s, #15 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrunb.nxv4i32(<vscale x 4 x i32> %a, + i32 15) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @sqshrunb_d(<vscale x 2 x i64> %a) { +; CHECK-LABEL: sqshrunb_d: +; CHECK: sqshrunb z0.s, z0.d, #31 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrunb.nxv2i64(<vscale x 2 x i64> %a, + i32 31) + ret <vscale x 4 x i32> %out +} + +; +; UQRSHRNB +; + +define <vscale x 16 x i8> @uqrshrnb_h(<vscale x 8 x i16> %a) { +; CHECK-LABEL: uqrshrnb_h: +; CHECK: uqrshrnb z0.b, z0.h, #2 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrnb.nxv8i16(<vscale x 8 x i16> %a, + i32 2) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uqrshrnb_s(<vscale x 4 x i32> %a) { +; CHECK-LABEL: uqrshrnb_s: +; CHECK: uqrshrnb z0.h, z0.s, #2 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrnb.nxv4i32(<vscale x 4 x i32> %a, + i32 2) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uqrshrnb_d(<vscale x 2 x i64> %a) { +; CHECK-LABEL: uqrshrnb_d: +; CHECK: uqrshrnb z0.s, z0.d, #2 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshrnb.nxv2i64(<vscale x 2 x i64> %a, + i32 2) + ret <vscale x 4 x i32> %out +} + +; +; SQRSHRNB +; + +define <vscale x 16 x i8> @sqrshrnb_h(<vscale x 8 x i16> %a) { +; CHECK-LABEL: sqrshrnb_h: +; CHECK: sqrshrnb z0.b, z0.h, #2 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrnb.nxv8i16(<vscale x 8 x i16> %a, + i32 2) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @sqrshrnb_s(<vscale x 4 x i32> %a) { +; CHECK-LABEL: sqrshrnb_s: +; CHECK: sqrshrnb z0.h, z0.s, #2 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrnb.nxv4i32(<vscale x 4 x i32> %a, + i32 2) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @sqrshrnb_d(<vscale x 2 x i64> %a) { +; CHECK-LABEL: sqrshrnb_d: +; CHECK: sqrshrnb z0.s, z0.d, #2 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrnb.nxv2i64(<vscale x 2 x i64> %a, + i32 2) + ret <vscale x 4 x i32> %out +} + +; +; SQRSHRUNB +; + +define <vscale x 16 x i8> @sqrshrunb_h(<vscale x 8 x i16> %a) { +; CHECK-LABEL: sqrshrunb_h: +; CHECK: sqrshrunb z0.b, z0.h, #6 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrunb.nxv8i16(<vscale x 8 x i16> %a, + i32 6) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @sqrshrunb_s(<vscale x 4 x i32> %a) { +; CHECK-LABEL: sqrshrunb_s: +; CHECK: sqrshrunb z0.h, z0.s, #14 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrunb.nxv4i32(<vscale x 4 x i32> %a, + i32 14) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @sqrshrunb_d(<vscale x 2 x i64> %a) { +; CHECK-LABEL: sqrshrunb_d: +; CHECK: sqrshrunb z0.s, z0.d, #30 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrunb.nxv2i64(<vscale x 2 x i64> %a, + i32 30) + ret <vscale x 4 x i32> %out +} + +; +; SHRNT +; + +define <vscale x 16 x i8> @shrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: shrnt_h: +; CHECK: shrnt z0.b, z1.h, #3 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shrnt.nxv8i16(<vscale x 16 x i8> %a, + <vscale x 8 x i16> %b, + i32 3) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @shrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: shrnt_s: +; CHECK: shrnt z0.h, z1.s, #3 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shrnt.nxv4i32(<vscale x 8 x i16> %a, + <vscale x 4 x i32> %b, + i32 3) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @shrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: shrnt_d: +; CHECK: shrnt z0.s, z1.d, #3 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shrnt.nxv2i64(<vscale x 4 x i32> %a, + <vscale x 2 x i64> %b, + i32 3) + ret <vscale x 4 x i32> %out +} + +; +; UQSHRNT +; + +define <vscale x 16 x i8> @uqshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: uqshrnt_h: +; CHECK: uqshrnt z0.b, z1.h, #5 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshrnt.nxv8i16(<vscale x 16 x i8> %a, + <vscale x 8 x i16> %b, + i32 5) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uqshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: uqshrnt_s: +; CHECK: uqshrnt z0.h, z1.s, #13 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshrnt.nxv4i32(<vscale x 8 x i16> %a, + <vscale x 4 x i32> %b, + i32 13) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uqshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: uqshrnt_d: +; CHECK: uqshrnt z0.s, z1.d, #29 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshrnt.nxv2i64(<vscale x 4 x i32> %a, + <vscale x 2 x i64> %b, + i32 29) + ret <vscale x 4 x i32> %out +} + +; +; SQSHRNT +; + +define <vscale x 16 x i8> @sqshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: sqshrnt_h: +; CHECK: sqshrnt z0.b, z1.h, #5 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrnt.nxv8i16(<vscale x 16 x i8> %a, + <vscale x 8 x i16> %b, + i32 5) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @sqshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: sqshrnt_s: +; CHECK: sqshrnt z0.h, z1.s, #13 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrnt.nxv4i32(<vscale x 8 x i16> %a, + <vscale x 4 x i32> %b, + i32 13) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @sqshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: sqshrnt_d: +; CHECK: sqshrnt z0.s, z1.d, #29 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrnt.nxv2i64(<vscale x 4 x i32> %a, + <vscale x 2 x i64> %b, + i32 29) + ret <vscale x 4 x i32> %out +} + +; +; SQSHRUNT +; + +define <vscale x 16 x i8> @sqshrunt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: sqshrunt_h: +; CHECK: sqshrunt z0.b, z1.h, #4 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrunt.nxv8i16(<vscale x 16 x i8> %a, + <vscale x 8 x i16> %b, + i32 4) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @sqshrunt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: sqshrunt_s: +; CHECK: sqshrunt z0.h, z1.s, #4 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrunt.nxv4i32(<vscale x 8 x i16> %a, + <vscale x 4 x i32> %b, + i32 4) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @sqshrunt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: sqshrunt_d: +; CHECK: sqshrunt z0.s, z1.d, #4 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrunt.nxv2i64(<vscale x 4 x i32> %a, + <vscale x 2 x i64> %b, + i32 4) + ret <vscale x 4 x i32> %out +} + +; +; UQRSHRNT +; + +define <vscale x 16 x i8> @uqrshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: uqrshrnt_h: +; CHECK: uqrshrnt z0.b, z1.h, #8 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrnt.nxv8i16(<vscale x 16 x i8> %a, + <vscale x 8 x i16> %b, + i32 8) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uqrshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: uqrshrnt_s: +; CHECK: uqrshrnt z0.h, z1.s, #12 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrnt.nxv4i32(<vscale x 8 x i16> %a, + <vscale x 4 x i32> %b, + i32 12) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uqrshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: uqrshrnt_d: +; CHECK: uqrshrnt z0.s, z1.d, #28 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshrnt.nxv2i64(<vscale x 4 x i32> %a, + <vscale x 2 x i64> %b, + i32 28) + ret <vscale x 4 x i32> %out +} + +; +; SQRSHRNT +; + +define <vscale x 16 x i8> @sqrshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: sqrshrnt_h: +; CHECK: sqrshrnt z0.b, z1.h, #8 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrnt.nxv8i16(<vscale x 16 x i8> %a, + <vscale x 8 x i16> %b, + i32 8) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @sqrshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: sqrshrnt_s: +; CHECK: sqrshrnt z0.h, z1.s, #12 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrnt.nxv4i32(<vscale x 8 x i16> %a, + <vscale x 4 x i32> %b, + i32 12) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @sqrshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: sqrshrnt_d: +; CHECK: sqrshrnt z0.s, z1.d, #28 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrnt.nxv2i64(<vscale x 4 x i32> %a, + <vscale x 2 x i64> %b, + i32 28) + ret <vscale x 4 x i32> %out +} + +; +; SQRSHRUNT +; + +define <vscale x 16 x i8> @sqrshrunt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: sqrshrunt_h: +; CHECK: sqrshrunt z0.b, z1.h, #1 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrunt.nxv8i16(<vscale x 16 x i8> %a, + <vscale x 8 x i16> %b, + i32 1) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @sqrshrunt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: sqrshrunt_s: +; CHECK: sqrshrunt z0.h, z1.s, #5 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrunt.nxv4i32(<vscale x 8 x i16> %a, + <vscale x 4 x i32> %b, + i32 5) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @sqrshrunt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: sqrshrunt_d: +; CHECK: sqrshrunt z0.s, z1.d, #5 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrunt.nxv2i64(<vscale x 4 x i32> %a, + <vscale x 2 x i64> %b, + i32 5) + ret <vscale x 4 x i32> %out +} + +declare <vscale x 16 x i8> @llvm.aarch64.sve.shrnb.nxv8i16(<vscale x 8 x i16>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.shrnb.nxv4i32(<vscale x 4 x i32>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.shrnb.nxv2i64(<vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.uqshrnb.nxv8i16(<vscale x 8 x i16>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.uqshrnb.nxv4i32(<vscale x 4 x i32>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.uqshrnb.nxv2i64(<vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshrnb.nxv8i16(<vscale x 8 x i16>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshrnb.nxv4i32(<vscale x 4 x i32>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshrnb.nxv2i64(<vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrnb.nxv8i16(<vscale x 8 x i16>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrnb.nxv4i32(<vscale x 4 x i32>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.uqrshrnb.nxv2i64(<vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrnb.nxv8i16(<vscale x 8 x i16>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrnb.nxv4i32(<vscale x 4 x i32>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrnb.nxv2i64(<vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshrunb.nxv8i16(<vscale x 8 x i16>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshrunb.nxv4i32(<vscale x 4 x i32>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshrunb.nxv2i64(<vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrunb.nxv8i16(<vscale x 8 x i16>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrunb.nxv4i32(<vscale x 4 x i32>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrunb.nxv2i64(<vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.shrnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.shrnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.shrnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.uqshrnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.uqshrnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.uqshrnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshrnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshrnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshrnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshrunt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshrunt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshrunt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.uqrshrnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrunt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrunt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrunt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32) |

