diff options
Diffstat (limited to 'clang')
-rw-r--r-- | clang/include/clang/Basic/arm_mve.td | 162 | ||||
-rw-r--r-- | clang/include/clang/Basic/arm_mve_defs.td | 25 | ||||
-rw-r--r-- | clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c | 2146 | ||||
-rw-r--r-- | clang/test/Sema/arm-mve-immediates.c | 56 | ||||
-rw-r--r-- | clang/utils/TableGen/MveEmitter.cpp | 40 |
5 files changed, 2403 insertions, 26 deletions
diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index aca0d9fa925..6e0e8ce4e5e 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -72,22 +72,158 @@ def vcvt#half#q_m_f16: Intrinsic< } // loop over half = "b", "t" -let params = T.All32, pnt = PNT_None in -def vldrwq_gather_base_wb: Intrinsic< - Vector, (args Ptr<VecOf<Unsigned<Scalar>>>:$addr, imm_mem7bit<4>:$offset), - (seq (IRInt<"vldr_gather_base_wb", [Vector, VecOf<Unsigned<Scalar>>]> +multiclass gather_base<list<Type> types, int size> { + let params = types, pnt = PNT_None in { + def _gather_base: Intrinsic< + Vector, (args UVector:$addr, imm_mem7bit<size>:$offset), + (IRInt<"vldr_gather_base", [Vector, UVector]> $addr, $offset)>; + + def _gather_base_z: Intrinsic< + Vector, (args UVector:$addr, imm_mem7bit<size>:$offset, Predicate:$pred), + (IRInt<"vldr_gather_base_predicated", [Vector, UVector, Predicate]> + $addr, $offset, $pred)>; + + def _gather_base_wb: Intrinsic< + Vector, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset), + (seq (IRInt<"vldr_gather_base_wb", [Vector, UVector]> (load $addr), $offset):$pair, - (store (xval $pair, 1), $addr), - (xval $pair, 0))>; + (store (xval $pair, 1), $addr), + (xval $pair, 0))>; -let params = T.All64, pnt = PNT_None in -def vldrdq_gather_base_wb_z: Intrinsic< - Vector, (args Ptr<VecOf<Unsigned<Scalar>>>:$addr, imm_mem7bit<8>:$offset, - Predicate:$pred), - (seq (IRInt<"vldr_gather_base_wb_predicated", [Vector, VecOf<Unsigned<Scalar>>, Predicate]> + def _gather_base_wb_z: Intrinsic< + Vector, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset, + Predicate:$pred), + (seq (IRInt<"vldr_gather_base_wb_predicated", + [Vector, UVector, Predicate]> (load $addr), $offset, $pred):$pair, - (store (xval $pair, 1), $addr), - (xval $pair, 0))>; + (store (xval $pair, 1), $addr), + (xval $pair, 0))>; + } +} + +defm vldrwq: gather_base<T.All32, 4>; +defm vldrdq: gather_base<T.All64, 8>; + +multiclass scatter_base<list<Type> types, int size> { + let params = types in { + def _scatter_base: Intrinsic< + Void, (args UVector:$addr, imm_mem7bit<size>:$offset, Vector:$data), + (IRInt<"vstr_scatter_base", [UVector, Vector]> $addr, $offset, $data)>; + + def _scatter_base_p: Intrinsic< + Void, (args UVector:$addr, imm_mem7bit<size>:$offset, Vector:$data, + Predicate:$pred), + (IRInt<"vstr_scatter_base_predicated", [UVector, Vector, Predicate]> + $addr, $offset, $data, $pred)>; + + def _scatter_base_wb: Intrinsic< + Void, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset, Vector:$data), + (seq (IRInt<"vstr_scatter_base_wb", [UVector, Vector]> + (load $addr), $offset, $data):$wbaddr, + (store $wbaddr, $addr))>; + + def _scatter_base_wb_p: Intrinsic< + Void, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset, + Vector:$data, Predicate:$pred), + (seq (IRInt<"vstr_scatter_base_wb_predicated", + [UVector, Vector, Predicate]> + (load $addr), $offset, $data, $pred):$wbaddr, + (store $wbaddr, $addr))>; + } +} + +defm vstrwq: scatter_base<T.All32, 4>; +defm vstrdq: scatter_base<T.All64, 8>; + +multiclass gather_offset_unshifted<list<Type> types, PrimitiveType memtype> { + let params = types in { + def _gather_offset: Intrinsic< + Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets), + (IRInt<"vldr_gather_offset", + [Vector, CPtr<CopyKind<memtype, Scalar>>, UVector]> + $base, $offsets, memtype.size, 0, (unsignedflag Scalar))>; + def _gather_offset_z: Intrinsic< + Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets, + Predicate:$pred), + (IRInt<"vldr_gather_offset_predicated", + [Vector, CPtr<CopyKind<memtype, Scalar>>, UVector, Predicate]> + $base, $offsets, memtype.size, 0, (unsignedflag Scalar), $pred)>; + } +} + +multiclass gather_offset_shifted<list<Type> types, PrimitiveType memtype, + int shift> { + let params = types in { + def _gather_shifted_offset: Intrinsic< + Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets), + (IRInt<"vldr_gather_offset", + [Vector, CPtr<CopyKind<memtype, Scalar>>, UVector]> + $base, $offsets, memtype.size, shift, (unsignedflag Scalar))>; + def _gather_shifted_offset_z: Intrinsic< + Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets, + Predicate:$pred), + (IRInt<"vldr_gather_offset_predicated", + [Vector, CPtr<CopyKind<memtype, Scalar>>, UVector, Predicate]> + $base, $offsets, memtype.size, shift, (unsignedflag Scalar), $pred)>; + } +} + +multiclass gather_offset_both<list<Type> types, PrimitiveType memtype, + int shift> { + defm "": gather_offset_unshifted<types, memtype>; + defm "": gather_offset_shifted<types, memtype, shift>; +} + +defm vldrbq: gather_offset_unshifted<!listconcat(T.All8, T.Int16, T.Int32), u8>; +defm vldrhq: gather_offset_both<!listconcat(T.All16, T.Int32), u16, 1>; +defm vldrwq: gather_offset_both<T.All32, u32, 2>; +defm vldrdq: gather_offset_both<T.Int64, u64, 3>; + +multiclass scatter_offset_unshifted<list<Type> types, PrimitiveType memtype> { + let params = types in { + def _scatter_offset: Intrinsic< + Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets, + Vector:$data), + (IRInt<"vstr_scatter_offset", + [Ptr<CopyKind<memtype, Scalar>>, UVector, Vector]> + $base, $offsets, $data, memtype.size, 0)>; + def _scatter_offset_p: Intrinsic< + Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets, + Vector:$data, Predicate:$pred), + (IRInt<"vstr_scatter_offset_predicated", + [Ptr<CopyKind<memtype, Scalar>>, UVector, Vector, Predicate]> + $base, $offsets, $data, memtype.size, 0, $pred)>; + } +} + +multiclass scatter_offset_shifted<list<Type> types, PrimitiveType memtype, + int shift> { + let params = types in { + def _scatter_shifted_offset: Intrinsic< + Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets, + Vector:$data), + (IRInt<"vstr_scatter_offset", + [Ptr<CopyKind<memtype, Scalar>>, UVector, Vector]> + $base, $offsets, $data, memtype.size, shift)>; + def _scatter_shifted_offset_p: Intrinsic< + Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets, + Vector:$data, Predicate:$pred), + (IRInt<"vstr_scatter_offset_predicated", + [Ptr<CopyKind<memtype, Scalar>>, UVector, Vector, Predicate]> + $base, $offsets, $data, memtype.size, shift, $pred)>; + } +} + +multiclass scatter_offset_both<list<Type> types, PrimitiveType memtype, + int shift> { + defm "": scatter_offset_unshifted<types, memtype>; + defm "": scatter_offset_shifted<types, memtype, shift>; +} + +defm vstrbq: scatter_offset_unshifted<!listconcat(T.All8,T.Int16,T.Int32), u8>; +defm vstrhq: scatter_offset_both<!listconcat(T.All16, T.Int32), u16, 1>; +defm vstrwq: scatter_offset_both<T.All32, u32, 2>; +defm vstrdq: scatter_offset_both<T.Int64, u64, 3>; let params = [Void], pnt = PNT_None in def urshrl: Intrinsic<u64, (args u64:$value, imm_1to32:$shift), diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index 14afc04a825..3d9333f3d44 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -82,6 +82,11 @@ class IRInt<string name_, list<Type> params_ = [], bit appendKind_ = 0> { // the return value of the seq construction as a whole. def seq; +// Another magic operation is 'unsignedflag', which you give a scalar +// _type_ as an argument, and it expands into 1 for an unsigned type +// and 0 for a signed (or floating) one. +def unsignedflag; + // If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it // indicates that the IR generation for that intrinsic is done by handwritten // C++ and not autogenerated at all. The effect in the MVE builtin codegen @@ -109,7 +114,7 @@ def CTO_Vec: ComplexTypeOp; def CTO_Pred: ComplexTypeOp; class CTO_Tuple<int n_>: ComplexTypeOp { int n = n_; } class CTO_Pointer<bit const_>: ComplexTypeOp { bit const = const_; } -class CTO_Sign<bit signed_>: ComplexTypeOp { bit signed = signed_; } +def CTO_CopyKind: ComplexTypeOp; // ----------------------------------------------------------------------------- // Instances of Type intended to be used directly in the specification of an @@ -167,10 +172,20 @@ class MultiVector<int n>: ComplexType<(CTO_Tuple<n> Vector)>; class Ptr<Type t>: ComplexType<(CTO_Pointer<0> t)>; class CPtr<Type t>: ComplexType<(CTO_Pointer<1> t)>; -// Unsigned<t> expects t to be a scalar, and expands to the unsigned integer -// scalar of the same size. So it returns u16 if you give it s16 or f16 (or -// u16 itself). -class Unsigned<Type t>: ComplexType<(CTO_Sign<0> t)>; +// CopyKind<s,k> expects s and k to be scalar types. It returns a scalar type +// whose kind (signed, unsigned or float) matches that of k, and whose size +// matches that of s. +class CopyKind<Type s, Type k>: ComplexType<(CTO_CopyKind s, k)>; + +// Unsigned<t> expects t to be a scalar type, and expands to the unsigned +// integer scalar of the same size. So it returns u16 if you give it s16 or +// f16 (or u16 itself). +class Unsigned<Type t>: ComplexType<(CTO_CopyKind t, u32)>; + +// UScalar and UVector expand to the unsigned-integer versions of +// Scalar and Vector. +def UScalar: Unsigned<Scalar>; +def UVector: VecOf<UScalar>; // ----------------------------------------------------------------------------- // Internal definitions for specifying immediate arguments for an intrinsic. diff --git a/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c b/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c new file mode 100644 index 00000000000..830f62442c3 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c @@ -0,0 +1,2146 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include <arm_mve.h> + +// CHECK-LABEL: @test_vldrbq_gather_offset_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 8, i32 0, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vldrbq_gather_offset_s16(const int8_t *base, uint16x8_t offset) +{ +#ifdef POLYMORPHIC + return vldrbq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrbq_gather_offset_s16(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrbq_gather_offset_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 8, i32 0, i32 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vldrbq_gather_offset_s32(const int8_t *base, uint32x4_t offset) +{ +#ifdef POLYMORPHIC + return vldrbq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrbq_gather_offset_s32(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrbq_gather_offset_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], i32 8, i32 0, i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vldrbq_gather_offset_s8(const int8_t *base, uint8x16_t offset) +{ +#ifdef POLYMORPHIC + return vldrbq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrbq_gather_offset_s8(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrbq_gather_offset_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 8, i32 0, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vldrbq_gather_offset_u16(const uint8_t *base, uint16x8_t offset) +{ +#ifdef POLYMORPHIC + return vldrbq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrbq_gather_offset_u16(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrbq_gather_offset_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 8, i32 0, i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vldrbq_gather_offset_u32(const uint8_t *base, uint32x4_t offset) +{ +#ifdef POLYMORPHIC + return vldrbq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrbq_gather_offset_u32(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrbq_gather_offset_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], i32 8, i32 0, i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vldrbq_gather_offset_u8(const uint8_t *base, uint8x16_t offset) +{ +#ifdef POLYMORPHIC + return vldrbq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrbq_gather_offset_u8(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrbq_gather_offset_z_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 8, i32 0, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vldrbq_gather_offset_z_s16(const int8_t *base, uint16x8_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrbq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrbq_gather_offset_z_s16(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrbq_gather_offset_z_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 8, i32 0, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vldrbq_gather_offset_z_s32(const int8_t *base, uint32x4_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrbq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrbq_gather_offset_z_s32(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrbq_gather_offset_z_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], i32 8, i32 0, i32 0, <16 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vldrbq_gather_offset_z_s8(const int8_t *base, uint8x16_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrbq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrbq_gather_offset_z_s8(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrbq_gather_offset_z_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 8, i32 0, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vldrbq_gather_offset_z_u16(const uint8_t *base, uint16x8_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrbq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrbq_gather_offset_z_u16(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrbq_gather_offset_z_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 8, i32 0, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vldrbq_gather_offset_z_u32(const uint8_t *base, uint32x4_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrbq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrbq_gather_offset_z_u32(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrbq_gather_offset_z_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], i32 8, i32 0, i32 1, <16 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vldrbq_gather_offset_z_u8(const uint8_t *base, uint8x16_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrbq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrbq_gather_offset_z_u8(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrdq_gather_base_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> [[ADDR:%.*]], i32 616) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +int64x2_t test_vldrdq_gather_base_s64(uint64x2_t addr) +{ + return vldrdq_gather_base_s64(addr, 0x268); +} + +// CHECK-LABEL: @test_vldrdq_gather_base_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> [[ADDR:%.*]], i32 336) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +uint64x2_t test_vldrdq_gather_base_u64(uint64x2_t addr) +{ + return vldrdq_gather_base_u64(addr, 0x150); +} + +// CHECK-LABEL: @test_vldrdq_gather_base_wb_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 576) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP1]], 1 +// CHECK-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP1]], 0 +// CHECK-NEXT: ret <2 x i64> [[TMP3]] +// +int64x2_t test_vldrdq_gather_base_wb_s64(uint64x2_t *addr) +{ + return vldrdq_gather_base_wb_s64(addr, 0x240); +} + +// CHECK-LABEL: @test_vldrdq_gather_base_wb_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 328) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP1]], 1 +// CHECK-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP1]], 0 +// CHECK-NEXT: ret <2 x i64> [[TMP3]] +// +uint64x2_t test_vldrdq_gather_base_wb_u64(uint64x2_t *addr) +{ + return vldrdq_gather_base_wb_u64(addr, 0x148); +} + +// CHECK-LABEL: @test_vldrdq_gather_base_wb_z_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> [[TMP0]], i32 664, <4 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 1 +// CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 0 +// CHECK-NEXT: ret <2 x i64> [[TMP5]] +// +int64x2_t test_vldrdq_gather_base_wb_z_s64(uint64x2_t *addr, mve_pred16_t p) +{ + return vldrdq_gather_base_wb_z_s64(addr, 0x298, p); +} + +// CHECK-LABEL: @test_vldrdq_gather_base_wb_z_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> [[TMP0]], i32 656, <4 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 1 +// CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 0 +// CHECK-NEXT: ret <2 x i64> [[TMP5]] +// +uint64x2_t test_vldrdq_gather_base_wb_z_u64(uint64x2_t *addr, mve_pred16_t p) +{ + return vldrdq_gather_base_wb_z_u64(addr, 0x290, p); +} + +// CHECK-LABEL: @test_vldrdq_gather_base_z_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> [[ADDR:%.*]], i32 888, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <2 x i64> [[TMP2]] +// +int64x2_t test_vldrdq_gather_base_z_s64(uint64x2_t addr, mve_pred16_t p) +{ + return vldrdq_gather_base_z_s64(addr, 0x378, p); +} + +// CHECK-LABEL: @test_vldrdq_gather_base_z_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> [[ADDR:%.*]], i32 1000, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <2 x i64> [[TMP2]] +// +uint64x2_t test_vldrdq_gather_base_z_u64(uint64x2_t addr, mve_pred16_t p) +{ + return vldrdq_gather_base_z_u64(addr, 0x3e8, p); +} + +// CHECK-LABEL: @test_vldrdq_gather_offset_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 0, i32 0) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +int64x2_t test_vldrdq_gather_offset_s64(const int64_t *base, uint64x2_t offset) +{ +#ifdef POLYMORPHIC + return vldrdq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrdq_gather_offset_s64(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrdq_gather_offset_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 0, i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +uint64x2_t test_vldrdq_gather_offset_u64(const uint64_t *base, uint64x2_t offset) +{ +#ifdef POLYMORPHIC + return vldrdq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrdq_gather_offset_u64(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrdq_gather_offset_z_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 0, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <2 x i64> [[TMP2]] +// +int64x2_t test_vldrdq_gather_offset_z_s64(const int64_t *base, uint64x2_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrdq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrdq_gather_offset_z_s64(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrdq_gather_offset_z_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 0, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <2 x i64> [[TMP2]] +// +uint64x2_t test_vldrdq_gather_offset_z_u64(const uint64_t *base, uint64x2_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrdq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrdq_gather_offset_z_u64(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrdq_gather_shifted_offset_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 3, i32 0) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +int64x2_t test_vldrdq_gather_shifted_offset_s64(const int64_t *base, uint64x2_t offset) +{ +#ifdef POLYMORPHIC + return vldrdq_gather_shifted_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrdq_gather_shifted_offset_s64(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrdq_gather_shifted_offset_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 3, i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +uint64x2_t test_vldrdq_gather_shifted_offset_u64(const uint64_t *base, uint64x2_t offset) +{ +#ifdef POLYMORPHIC + return vldrdq_gather_shifted_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrdq_gather_shifted_offset_u64(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrdq_gather_shifted_offset_z_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 3, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <2 x i64> [[TMP2]] +// +int64x2_t test_vldrdq_gather_shifted_offset_z_s64(const int64_t *base, uint64x2_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrdq_gather_shifted_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrdq_gather_shifted_offset_z_s64(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrdq_gather_shifted_offset_z_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 3, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <2 x i64> [[TMP2]] +// +uint64x2_t test_vldrdq_gather_shifted_offset_z_u64(const uint64_t *base, uint64x2_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrdq_gather_shifted_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrdq_gather_shifted_offset_z_u64(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_offset_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 0, i32 0) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vldrhq_gather_offset_f16(const float16_t *base, uint16x8_t offset) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrhq_gather_offset_f16(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_offset_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 0, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vldrhq_gather_offset_s16(const int16_t *base, uint16x8_t offset) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrhq_gather_offset_s16(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_offset_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 0, i32 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vldrhq_gather_offset_s32(const int16_t *base, uint32x4_t offset) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrhq_gather_offset_s32(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_offset_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 0, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vldrhq_gather_offset_u16(const uint16_t *base, uint16x8_t offset) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrhq_gather_offset_u16(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_offset_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 0, i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vldrhq_gather_offset_u32(const uint16_t *base, uint32x4_t offset) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrhq_gather_offset_u32(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_offset_z_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 0, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vldrhq_gather_offset_z_f16(const float16_t *base, uint16x8_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrhq_gather_offset_z_f16(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_offset_z_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 0, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vldrhq_gather_offset_z_s16(const int16_t *base, uint16x8_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrhq_gather_offset_z_s16(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_offset_z_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 0, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vldrhq_gather_offset_z_s32(const int16_t *base, uint32x4_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrhq_gather_offset_z_s32(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_offset_z_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 0, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vldrhq_gather_offset_z_u16(const uint16_t *base, uint16x8_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrhq_gather_offset_z_u16(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_offset_z_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 0, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vldrhq_gather_offset_z_u32(const uint16_t *base, uint32x4_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrhq_gather_offset_z_u32(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 1, i32 0) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vldrhq_gather_shifted_offset_f16(const float16_t *base, uint16x8_t offset) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_shifted_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrhq_gather_shifted_offset_f16(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 1, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vldrhq_gather_shifted_offset_s16(const int16_t *base, uint16x8_t offset) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_shifted_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrhq_gather_shifted_offset_s16(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 1, i32 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vldrhq_gather_shifted_offset_s32(const int16_t *base, uint32x4_t offset) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_shifted_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrhq_gather_shifted_offset_s32(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 1, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vldrhq_gather_shifted_offset_u16(const uint16_t *base, uint16x8_t offset) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_shifted_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrhq_gather_shifted_offset_u16(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 1, i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vldrhq_gather_shifted_offset_u32(const uint16_t *base, uint32x4_t offset) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_shifted_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrhq_gather_shifted_offset_u32(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_z_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 1, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vldrhq_gather_shifted_offset_z_f16(const float16_t *base, uint16x8_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_shifted_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrhq_gather_shifted_offset_z_f16(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_z_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 1, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vldrhq_gather_shifted_offset_z_s16(const int16_t *base, uint16x8_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_shifted_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrhq_gather_shifted_offset_z_s16(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_z_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 1, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vldrhq_gather_shifted_offset_z_s32(const int16_t *base, uint32x4_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_shifted_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrhq_gather_shifted_offset_z_s32(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_z_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 1, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vldrhq_gather_shifted_offset_z_u16(const uint16_t *base, uint16x8_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_shifted_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrhq_gather_shifted_offset_z_u16(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_z_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 1, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vldrhq_gather_shifted_offset_z_u32(const uint16_t *base, uint32x4_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrhq_gather_shifted_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrhq_gather_shifted_offset_z_u32(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrwq_gather_base_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> [[ADDR:%.*]], i32 12) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vldrwq_gather_base_f32(uint32x4_t addr) +{ + return vldrwq_gather_base_f32(addr, 0xc); +} + +// CHECK-LABEL: @test_vldrwq_gather_base_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> [[ADDR:%.*]], i32 400) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vldrwq_gather_base_s32(uint32x4_t addr) +{ + return vldrwq_gather_base_s32(addr, 0x190); +} + +// CHECK-LABEL: @test_vldrwq_gather_base_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> [[ADDR:%.*]], i32 284) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vldrwq_gather_base_u32(uint32x4_t addr) +{ + return vldrwq_gather_base_u32(addr, 0x11c); +} + +// CHECK-LABEL: @test_vldrwq_gather_base_wb_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> [[TMP0]], i32 64) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP1]], 1 +// CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP1]], 0 +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float32x4_t test_vldrwq_gather_base_wb_f32(uint32x4_t *addr) +{ + return vldrwq_gather_base_wb_f32(addr, 0x40); +} + +// CHECK-LABEL: @test_vldrwq_gather_base_wb_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> [[TMP0]], i32 80) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 1 +// CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 0 +// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// +int32x4_t test_vldrwq_gather_base_wb_s32(uint32x4_t *addr) +{ + return vldrwq_gather_base_wb_s32(addr, 0x50); +} + +// CHECK-LABEL: @test_vldrwq_gather_base_wb_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> [[TMP0]], i32 480) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 1 +// CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 0 +// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// +uint32x4_t test_vldrwq_gather_base_wb_u32(uint32x4_t *addr) +{ + return vldrwq_gather_base_wb_u32(addr, 0x1e0); +} + +// CHECK-LABEL: @test_vldrwq_gather_base_wb_z_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> [[TMP0]], i32 352, <4 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP3]], 1 +// CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP3]], 0 +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float32x4_t test_vldrwq_gather_base_wb_z_f32(uint32x4_t *addr, mve_pred16_t p) +{ + return vldrwq_gather_base_wb_z_f32(addr, 0x160, p); +} + +// CHECK-LABEL: @test_vldrwq_gather_base_wb_z_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> [[TMP0]], i32 276, <4 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP3]], 1 +// CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP3]], 0 +// CHECK-NEXT: ret <4 x i32> [[TMP5]] +// +int32x4_t test_vldrwq_gather_base_wb_z_s32(uint32x4_t *addr, mve_pred16_t p) +{ + return vldrwq_gather_base_wb_z_s32(addr, 0x114, p); +} + +// CHECK-LABEL: @test_vldrwq_gather_base_wb_z_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> [[TMP0]], i32 88, <4 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP3]], 1 +// CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP3]], 0 +// CHECK-NEXT: ret <4 x i32> [[TMP5]] +// +uint32x4_t test_vldrwq_gather_base_wb_z_u32(uint32x4_t *addr, mve_pred16_t p) +{ + return vldrwq_gather_base_wb_z_u32(addr, 0x58, p); +} + +// CHECK-LABEL: @test_vldrwq_gather_base_z_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32> [[ADDR:%.*]], i32 300, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vldrwq_gather_base_z_f32(uint32x4_t addr, mve_pred16_t p) +{ + return vldrwq_gather_base_z_f32(addr, 0x12c, p); +} + +// CHECK-LABEL: @test_vldrwq_gather_base_z_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> [[ADDR:%.*]], i32 440, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vldrwq_gather_base_z_s32(uint32x4_t addr, mve_pred16_t p) +{ + return vldrwq_gather_base_z_s32(addr, 0x1b8, p); +} + +// CHECK-LABEL: @test_vldrwq_gather_base_z_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> [[ADDR:%.*]], i32 300, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vldrwq_gather_base_z_u32(uint32x4_t addr, mve_pred16_t p) +{ + return vldrwq_gather_base_z_u32(addr, 0x12c, p); +} + +// CHECK-LABEL: @test_vldrwq_gather_offset_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 0, i32 0) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vldrwq_gather_offset_f32(const float32_t *base, uint32x4_t offset) +{ +#ifdef POLYMORPHIC + return vldrwq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrwq_gather_offset_f32(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrwq_gather_offset_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 0, i32 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vldrwq_gather_offset_s32(const int32_t *base, uint32x4_t offset) +{ +#ifdef POLYMORPHIC + return vldrwq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrwq_gather_offset_s32(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrwq_gather_offset_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 0, i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vldrwq_gather_offset_u32(const uint32_t *base, uint32x4_t offset) +{ +#ifdef POLYMORPHIC + return vldrwq_gather_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrwq_gather_offset_u32(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrwq_gather_offset_z_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 0, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vldrwq_gather_offset_z_f32(const float32_t *base, uint32x4_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrwq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrwq_gather_offset_z_f32(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrwq_gather_offset_z_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 0, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vldrwq_gather_offset_z_s32(const int32_t *base, uint32x4_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrwq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrwq_gather_offset_z_s32(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrwq_gather_offset_z_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 0, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vldrwq_gather_offset_z_u32(const uint32_t *base, uint32x4_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrwq_gather_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrwq_gather_offset_z_u32(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 2, i32 0) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vldrwq_gather_shifted_offset_f32(const float32_t *base, uint32x4_t offset) +{ +#ifdef POLYMORPHIC + return vldrwq_gather_shifted_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrwq_gather_shifted_offset_f32(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 2, i32 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vldrwq_gather_shifted_offset_s32(const int32_t *base, uint32x4_t offset) +{ +#ifdef POLYMORPHIC + return vldrwq_gather_shifted_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrwq_gather_shifted_offset_s32(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 2, i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vldrwq_gather_shifted_offset_u32(const uint32_t *base, uint32x4_t offset) +{ +#ifdef POLYMORPHIC + return vldrwq_gather_shifted_offset(base, offset); +#else /* POLYMORPHIC */ + return vldrwq_gather_shifted_offset_u32(base, offset); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_z_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 2, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vldrwq_gather_shifted_offset_z_f32(const float32_t *base, uint32x4_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrwq_gather_shifted_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrwq_gather_shifted_offset_z_f32(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_z_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 2, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vldrwq_gather_shifted_offset_z_s32(const int32_t *base, uint32x4_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrwq_gather_shifted_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrwq_gather_shifted_offset_z_s32(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_z_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 2, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vldrwq_gather_shifted_offset_z_u32(const uint32_t *base, uint32x4_t offset, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vldrwq_gather_shifted_offset_z(base, offset, p); +#else /* POLYMORPHIC */ + return vldrwq_gather_shifted_offset_z_u32(base, offset, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrbq_scatter_offset_p_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 8, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrbq_scatter_offset_p_s16(int8_t *base, uint16x8_t offset, int16x8_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrbq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrbq_scatter_offset_p_s16(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrbq_scatter_offset_p_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 8, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrbq_scatter_offset_p_s32(int8_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrbq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrbq_scatter_offset_p_s32(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrbq_scatter_offset_p_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], <16 x i8> [[VALUE:%.*]], i32 8, i32 0, <16 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrbq_scatter_offset_p_s8(int8_t *base, uint8x16_t offset, int8x16_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrbq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrbq_scatter_offset_p_s8(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrbq_scatter_offset_p_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 8, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrbq_scatter_offset_p_u16(uint8_t *base, uint16x8_t offset, uint16x8_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrbq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrbq_scatter_offset_p_u16(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrbq_scatter_offset_p_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 8, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrbq_scatter_offset_p_u32(uint8_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrbq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrbq_scatter_offset_p_u32(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrbq_scatter_offset_p_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], <16 x i8> [[VALUE:%.*]], i32 8, i32 0, <16 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrbq_scatter_offset_p_u8(uint8_t *base, uint8x16_t offset, uint8x16_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrbq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrbq_scatter_offset_p_u8(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrbq_scatter_offset_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 8, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrbq_scatter_offset_s16(int8_t *base, uint16x8_t offset, int16x8_t value) +{ +#ifdef POLYMORPHIC + vstrbq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrbq_scatter_offset_s16(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrbq_scatter_offset_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 8, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrbq_scatter_offset_s32(int8_t *base, uint32x4_t offset, int32x4_t value) +{ +#ifdef POLYMORPHIC + vstrbq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrbq_scatter_offset_s32(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrbq_scatter_offset_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], <16 x i8> [[VALUE:%.*]], i32 8, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrbq_scatter_offset_s8(int8_t *base, uint8x16_t offset, int8x16_t value) +{ +#ifdef POLYMORPHIC + vstrbq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrbq_scatter_offset_s8(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrbq_scatter_offset_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 8, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrbq_scatter_offset_u16(uint8_t *base, uint16x8_t offset, uint16x8_t value) +{ +#ifdef POLYMORPHIC + vstrbq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrbq_scatter_offset_u16(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrbq_scatter_offset_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 8, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrbq_scatter_offset_u32(uint8_t *base, uint32x4_t offset, uint32x4_t value) +{ +#ifdef POLYMORPHIC + vstrbq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrbq_scatter_offset_u32(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrbq_scatter_offset_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], <16 x i8> [[VALUE:%.*]], i32 8, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrbq_scatter_offset_u8(uint8_t *base, uint8x16_t offset, uint8x16_t value) +{ +#ifdef POLYMORPHIC + vstrbq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrbq_scatter_offset_u8(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_base_p_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> [[ADDR:%.*]], i32 888, <2 x i64> [[VALUE:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_base_p_s64(uint64x2_t addr, int64x2_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_base_p(addr, 0x378, value, p); +#else /* POLYMORPHIC */ + vstrdq_scatter_base_p_s64(addr, 0x378, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_base_p_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> [[ADDR:%.*]], i32 264, <2 x i64> [[VALUE:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_base_p_u64(uint64x2_t addr, uint64x2_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_base_p(addr, 0x108, value, p); +#else /* POLYMORPHIC */ + vstrdq_scatter_base_p_u64(addr, 0x108, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_base_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> [[ADDR:%.*]], i32 408, <2 x i64> [[VALUE:%.*]]) +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_base_s64(uint64x2_t addr, int64x2_t value) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_base(addr, 0x198, value); +#else /* POLYMORPHIC */ + vstrdq_scatter_base_s64(addr, 0x198, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_base_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> [[ADDR:%.*]], i32 472, <2 x i64> [[VALUE:%.*]]) +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_base_u64(uint64x2_t addr, uint64x2_t value) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_base(addr, 0x1d8, value); +#else /* POLYMORPHIC */ + vstrdq_scatter_base_u64(addr, 0x1d8, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_base_wb_p_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> [[TMP0]], i32 248, <2 x i64> [[VALUE:%.*]], <4 x i1> [[TMP2]]) +// CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[ADDR]], align 8 +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_base_wb_p_s64(uint64x2_t *addr, int64x2_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_base_wb_p(addr, 0xf8, value, p); +#else /* POLYMORPHIC */ + vstrdq_scatter_base_wb_p_s64(addr, 0xf8, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_base_wb_p_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> [[TMP0]], i32 136, <2 x i64> [[VALUE:%.*]], <4 x i1> [[TMP2]]) +// CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[ADDR]], align 8 +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_base_wb_p_u64(uint64x2_t *addr, uint64x2_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_base_wb_p(addr, 0x88, value, p); +#else /* POLYMORPHIC */ + vstrdq_scatter_base_wb_p_u64(addr, 0x88, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_base_wb_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 208, <2 x i64> [[VALUE:%.*]]) +// CHECK-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[ADDR]], align 8 +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_base_wb_s64(uint64x2_t *addr, int64x2_t value) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_base_wb(addr, 0xd0, value); +#else /* POLYMORPHIC */ + vstrdq_scatter_base_wb_s64(addr, 0xd0, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_base_wb_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 168, <2 x i64> [[VALUE:%.*]]) +// CHECK-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[ADDR]], align 8 +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_base_wb_u64(uint64x2_t *addr, uint64x2_t value) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_base_wb(addr, 0xa8, value); +#else /* POLYMORPHIC */ + vstrdq_scatter_base_wb_u64(addr, 0xa8, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_offset_p_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_offset_p_s64(int64_t *base, uint64x2_t offset, int64x2_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrdq_scatter_offset_p_s64(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_offset_p_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_offset_p_u64(uint64_t *base, uint64x2_t offset, uint64x2_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrdq_scatter_offset_p_u64(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_offset_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_offset_s64(int64_t *base, uint64x2_t offset, int64x2_t value) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrdq_scatter_offset_s64(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_offset_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_offset_u64(uint64_t *base, uint64x2_t offset, uint64x2_t value) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrdq_scatter_offset_u64(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_shifted_offset_p_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 3, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_shifted_offset_p_s64(int64_t *base, uint64x2_t offset, int64x2_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_shifted_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrdq_scatter_shifted_offset_p_s64(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_shifted_offset_p_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 3, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_shifted_offset_p_u64(uint64_t *base, uint64x2_t offset, uint64x2_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_shifted_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrdq_scatter_shifted_offset_p_u64(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_shifted_offset_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 3) +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_shifted_offset_s64(int64_t *base, uint64x2_t offset, int64x2_t value) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_shifted_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrdq_scatter_shifted_offset_s64(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrdq_scatter_shifted_offset_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 3) +// CHECK-NEXT: ret void +// +void test_vstrdq_scatter_shifted_offset_u64(uint64_t *base, uint64x2_t offset, uint64x2_t value) +{ +#ifdef POLYMORPHIC + vstrdq_scatter_shifted_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrdq_scatter_shifted_offset_u64(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_offset_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x half> [[VALUE:%.*]], i32 16, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_offset_f16(float16_t *base, uint16x8_t offset, float16x8_t value) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrhq_scatter_offset_f16(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_offset_p_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x half> [[VALUE:%.*]], i32 16, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_offset_p_f16(float16_t *base, uint16x8_t offset, float16x8_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrhq_scatter_offset_p_f16(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_offset_p_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_offset_p_s16(int16_t *base, uint16x8_t offset, int16x8_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrhq_scatter_offset_p_s16(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_offset_p_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_offset_p_s32(int16_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrhq_scatter_offset_p_s32(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_offset_p_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_offset_p_u16(uint16_t *base, uint16x8_t offset, uint16x8_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrhq_scatter_offset_p_u16(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_offset_p_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_offset_p_u32(uint16_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrhq_scatter_offset_p_u32(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_offset_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_offset_s16(int16_t *base, uint16x8_t offset, int16x8_t value) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrhq_scatter_offset_s16(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_offset_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_offset_s32(int16_t *base, uint32x4_t offset, int32x4_t value) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrhq_scatter_offset_s32(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_offset_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_offset_u16(uint16_t *base, uint16x8_t offset, uint16x8_t value) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrhq_scatter_offset_u16(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_offset_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_offset_u32(uint16_t *base, uint32x4_t offset, uint32x4_t value) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrhq_scatter_offset_u32(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x half> [[VALUE:%.*]], i32 16, i32 1) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_shifted_offset_f16(float16_t *base, uint16x8_t offset, float16x8_t value) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_shifted_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrhq_scatter_shifted_offset_f16(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_p_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x half> [[VALUE:%.*]], i32 16, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_shifted_offset_p_f16(float16_t *base, uint16x8_t offset, float16x8_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_shifted_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrhq_scatter_shifted_offset_p_f16(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_p_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_shifted_offset_p_s16(int16_t *base, uint16x8_t offset, int16x8_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_shifted_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrhq_scatter_shifted_offset_p_s16(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_p_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_shifted_offset_p_s32(int16_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_shifted_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrhq_scatter_shifted_offset_p_s32(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_p_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_shifted_offset_p_u16(uint16_t *base, uint16x8_t offset, uint16x8_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_shifted_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrhq_scatter_shifted_offset_p_u16(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_p_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_shifted_offset_p_u32(uint16_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_shifted_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrhq_scatter_shifted_offset_p_u32(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 1) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_shifted_offset_s16(int16_t *base, uint16x8_t offset, int16x8_t value) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_shifted_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrhq_scatter_shifted_offset_s16(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 1) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_shifted_offset_s32(int16_t *base, uint32x4_t offset, int32x4_t value) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_shifted_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrhq_scatter_shifted_offset_s32(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 1) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_shifted_offset_u16(uint16_t *base, uint16x8_t offset, uint16x8_t value) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_shifted_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrhq_scatter_shifted_offset_u16(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 1) +// CHECK-NEXT: ret void +// +void test_vstrhq_scatter_shifted_offset_u32(uint16_t *base, uint32x4_t offset, uint32x4_t value) +{ +#ifdef POLYMORPHIC + vstrhq_scatter_shifted_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrhq_scatter_shifted_offset_u32(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_base_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> [[ADDR:%.*]], i32 380, <4 x float> [[VALUE:%.*]]) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_base_f32(uint32x4_t addr, float32x4_t value) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_base(addr, 0x17c, value); +#else /* POLYMORPHIC */ + vstrwq_scatter_base_f32(addr, 0x17c, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_base_p_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32> [[ADDR:%.*]], i32 400, <4 x float> [[VALUE:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_base_p_f32(uint32x4_t addr, float32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_base_p(addr, 0x190, value, p); +#else /* POLYMORPHIC */ + vstrwq_scatter_base_p_f32(addr, 0x190, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_base_p_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> [[ADDR:%.*]], i32 48, <4 x i32> [[VALUE:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_base_p_s32(uint32x4_t addr, int32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_base_p(addr, 0x30, value, p); +#else /* POLYMORPHIC */ + vstrwq_scatter_base_p_s32(addr, 0x30, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_base_p_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> [[ADDR:%.*]], i32 376, <4 x i32> [[VALUE:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_base_p_u32(uint32x4_t addr, uint32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_base_p(addr, 0x178, value, p); +#else /* POLYMORPHIC */ + vstrwq_scatter_base_p_u32(addr, 0x178, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_base_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> [[ADDR:%.*]], i32 156, <4 x i32> [[VALUE:%.*]]) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_base_s32(uint32x4_t addr, int32x4_t value) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_base(addr, 0x9c, value); +#else /* POLYMORPHIC */ + vstrwq_scatter_base_s32(addr, 0x9c, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_base_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> [[ADDR:%.*]], i32 212, <4 x i32> [[VALUE:%.*]]) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_base_u32(uint32x4_t addr, uint32x4_t value) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_base(addr, 0xd4, value); +#else /* POLYMORPHIC */ + vstrwq_scatter_base_u32(addr, 0xd4, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_base_wb_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32> [[TMP0]], i32 412, <4 x float> [[VALUE:%.*]]) +// CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[ADDR]], align 8 +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_base_wb_f32(uint32x4_t *addr, float32x4_t value) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_base_wb(addr, 0x19c, value); +#else /* POLYMORPHIC */ + vstrwq_scatter_base_wb_f32(addr, 0x19c, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_base_wb_p_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32> [[TMP0]], i32 236, <4 x float> [[VALUE:%.*]], <4 x i1> [[TMP2]]) +// CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[ADDR]], align 8 +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_base_wb_p_f32(uint32x4_t *addr, float32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_base_wb_p(addr, 0xec, value, p); +#else /* POLYMORPHIC */ + vstrwq_scatter_base_wb_p_f32(addr, 0xec, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_base_wb_p_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> [[TMP0]], i32 328, <4 x i32> [[VALUE:%.*]], <4 x i1> [[TMP2]]) +// CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[ADDR]], align 8 +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_base_wb_p_s32(uint32x4_t *addr, int32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_base_wb_p(addr, 0x148, value, p); +#else /* POLYMORPHIC */ + vstrwq_scatter_base_wb_p_s32(addr, 0x148, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_base_wb_p_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> [[TMP0]], i32 412, <4 x i32> [[VALUE:%.*]], <4 x i1> [[TMP2]]) +// CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[ADDR]], align 8 +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_base_wb_p_u32(uint32x4_t *addr, uint32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_base_wb_p(addr, 0x19c, value, p); +#else /* POLYMORPHIC */ + vstrwq_scatter_base_wb_p_u32(addr, 0x19c, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_base_wb_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> [[TMP0]], i32 152, <4 x i32> [[VALUE:%.*]]) +// CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[ADDR]], align 8 +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_base_wb_s32(uint32x4_t *addr, int32x4_t value) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_base_wb(addr, 0x98, value); +#else /* POLYMORPHIC */ + vstrwq_scatter_base_wb_s32(addr, 0x98, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_base_wb_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> [[TMP0]], i32 64, <4 x i32> [[VALUE:%.*]]) +// CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[ADDR]], align 8 +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_base_wb_u32(uint32x4_t *addr, uint32x4_t value) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_base_wb(addr, 0x40, value); +#else /* POLYMORPHIC */ + vstrwq_scatter_base_wb_u32(addr, 0x40, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_offset_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x float> [[VALUE:%.*]], i32 32, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_offset_f32(float32_t *base, uint32x4_t offset, float32x4_t value) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrwq_scatter_offset_f32(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_offset_p_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x float> [[VALUE:%.*]], i32 32, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_offset_p_f32(float32_t *base, uint32x4_t offset, float32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrwq_scatter_offset_p_f32(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_offset_p_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_offset_p_s32(int32_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrwq_scatter_offset_p_s32(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_offset_p_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_offset_p_u32(uint32_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrwq_scatter_offset_p_u32(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_offset_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_offset_s32(int32_t *base, uint32x4_t offset, int32x4_t value) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrwq_scatter_offset_s32(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_offset_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 0) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_offset_u32(uint32_t *base, uint32x4_t offset, uint32x4_t value) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrwq_scatter_offset_u32(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x float> [[VALUE:%.*]], i32 32, i32 2) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_shifted_offset_f32(float32_t *base, uint32x4_t offset, float32x4_t value) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_shifted_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrwq_scatter_shifted_offset_f32(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_p_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x float> [[VALUE:%.*]], i32 32, i32 2, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_shifted_offset_p_f32(float32_t *base, uint32x4_t offset, float32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_shifted_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrwq_scatter_shifted_offset_p_f32(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_p_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 2, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_shifted_offset_p_s32(int32_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_shifted_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrwq_scatter_shifted_offset_p_s32(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_p_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 2, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_shifted_offset_p_u32(uint32_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_shifted_offset_p(base, offset, value, p); +#else /* POLYMORPHIC */ + vstrwq_scatter_shifted_offset_p_u32(base, offset, value, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 2) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_shifted_offset_s32(int32_t *base, uint32x4_t offset, int32x4_t value) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_shifted_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrwq_scatter_shifted_offset_s32(base, offset, value); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 2) +// CHECK-NEXT: ret void +// +void test_vstrwq_scatter_shifted_offset_u32(uint32_t *base, uint32x4_t offset, uint32x4_t value) +{ +#ifdef POLYMORPHIC + vstrwq_scatter_shifted_offset(base, offset, value); +#else /* POLYMORPHIC */ + vstrwq_scatter_shifted_offset_u32(base, offset, value); +#endif /* POLYMORPHIC */ +} + diff --git a/clang/test/Sema/arm-mve-immediates.c b/clang/test/Sema/arm-mve-immediates.c new file mode 100644 index 00000000000..cdf68b8a949 --- /dev/null +++ b/clang/test/Sema/arm-mve-immediates.c @@ -0,0 +1,56 @@ +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -verify -fsyntax-only %s + +#include <arm_mve.h> + +void test_load_offsets(uint32x4_t addr32, uint64x2_t addr64) +{ + // Offsets that should be a multiple of 8 times 0,1,...,127 + vldrdq_gather_base_s64(addr64, 0); + vldrdq_gather_base_s64(addr64, 8); + vldrdq_gather_base_s64(addr64, 2*8); + vldrdq_gather_base_s64(addr64, 125*8); + vldrdq_gather_base_s64(addr64, 126*8); + vldrdq_gather_base_s64(addr64, 127*8); + vldrdq_gather_base_s64(addr64, -8); // expected-error {{argument value -8 is outside the valid range [0, 1016]}} + vldrdq_gather_base_s64(addr64, 128*8); // expected-error {{argument value 1024 is outside the valid range [0, 1016]}} + vldrdq_gather_base_s64(addr64, 4); // expected-error {{argument should be a multiple of 8}} + vldrdq_gather_base_s64(addr64, 1); // expected-error {{argument should be a multiple of 8}} + + // Offsets that should be a multiple of 4 times 0,1,...,127 + vldrwq_gather_base_s32(addr32, 0); + vldrwq_gather_base_s32(addr32, 4); + vldrwq_gather_base_s32(addr32, 2*4); + vldrwq_gather_base_s32(addr32, 125*4); + vldrwq_gather_base_s32(addr32, 126*4); + vldrwq_gather_base_s32(addr32, 127*4); + vldrwq_gather_base_s32(addr32, -4); // expected-error {{argument value -4 is outside the valid range [0, 508]}} + vldrwq_gather_base_s32(addr32, 128*4); // expected-error {{argument value 512 is outside the valid range [0, 508]}} + vldrwq_gather_base_s32(addr32, 2); // expected-error {{argument should be a multiple of 4}} + vldrwq_gather_base_s32(addr32, 1); // expected-error {{argument should be a multiple of 4}} + + // Show that the polymorphic store intrinsics get the right set of + // error checks after overload resolution. These ones expand to the + // 8-byte granular versions... + vstrdq_scatter_base(addr64, 0, addr64); + vstrdq_scatter_base(addr64, 8, addr64); + vstrdq_scatter_base(addr64, 2*8, addr64); + vstrdq_scatter_base(addr64, 125*8, addr64); + vstrdq_scatter_base(addr64, 126*8, addr64); + vstrdq_scatter_base(addr64, 127*8, addr64); + vstrdq_scatter_base(addr64, -8, addr64); // expected-error {{argument value -8 is outside the valid range [0, 1016]}} + vstrdq_scatter_base(addr64, 128*8, addr64); // expected-error {{argument value 1024 is outside the valid range [0, 1016]}} + vstrdq_scatter_base(addr64, 4, addr64); // expected-error {{argument should be a multiple of 8}} + vstrdq_scatter_base(addr64, 1, addr64); // expected-error {{argument should be a multiple of 8}} + + /// ... and these ones to the 4-byte. + vstrwq_scatter_base(addr32, 0, addr32); + vstrwq_scatter_base(addr32, 4, addr32); + vstrwq_scatter_base(addr32, 2*4, addr32); + vstrwq_scatter_base(addr32, 125*4, addr32); + vstrwq_scatter_base(addr32, 126*4, addr32); + vstrwq_scatter_base(addr32, 127*4, addr32); + vstrwq_scatter_base(addr32, -4, addr32); // expected-error {{argument value -4 is outside the valid range [0, 508]}} + vstrwq_scatter_base(addr32, 128*4, addr32); // expected-error {{argument value 512 is outside the valid range [0, 508]}} + vstrwq_scatter_base(addr32, 2, addr32); // expected-error {{argument should be a multiple of 4}} + vstrwq_scatter_base(addr32, 1, addr32); // expected-error {{argument should be a multiple of 4}} +} diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp index ddec171d671..aa3b475ea7b 100644 --- a/clang/utils/TableGen/MveEmitter.cpp +++ b/clang/utils/TableGen/MveEmitter.cpp @@ -204,6 +204,9 @@ public: Name = "const " + Name; return Name + " *"; } + std::string llvmName() const override { + return "llvm::PointerType::getUnqual(" + Pointee->llvmName() + ")"; + } static bool classof(const Type *T) { return T->typeKind() == TypeKind::Pointer; @@ -512,6 +515,11 @@ public: void setVarname(const StringRef s) { VarName = s; } bool varnameUsed() const { return VarNameUsed; } + // Emit code to generate this result as a Value *. + virtual std::string asValue() { + return varname(); + } + // Code generation happens in multiple passes. This method tracks whether a // Result has yet been visited in a given pass, without the need for a // tedious loop in between passes that goes through and resets a 'visited' @@ -547,6 +555,12 @@ public: std::string typeName() const override { return AddressType ? "Address" : Result::typeName(); } + // Emit code to generate this result as a Value *. + std::string asValue() override { + if (AddressType) + return "(" + varname() + ".getPointer())"; + return Result::asValue(); + } }; // Result subclass for an integer literal appearing in Tablegen. This may need @@ -665,7 +679,7 @@ public: OS << "), llvm::SmallVector<Value *, " << Args.size() << "> {"; const char *Sep = ""; for (auto Arg : Args) { - OS << Sep << Arg->varname(); + OS << Sep << Arg->asValue(); Sep = ", "; } OS << "})"; @@ -974,17 +988,15 @@ const Type *MveEmitter::getType(DagInit *D, const Type *Param) { return getPointerType(Pointee, Op->getValueAsBit("const")); } - if (Op->isSubClassOf("CTO_Sign")) { - const ScalarType *ST = cast<ScalarType>(getType(D->getArg(0), Param)); - ScalarTypeKind NewKind = Op->getValueAsBit("signed") - ? ScalarTypeKind::SignedInt - : ScalarTypeKind::UnsignedInt; + if (Op->getName() == "CTO_CopyKind") { + const ScalarType *STSize = cast<ScalarType>(getType(D->getArg(0), Param)); + const ScalarType *STKind = cast<ScalarType>(getType(D->getArg(1), Param)); for (const auto &kv : ScalarTypes) { const ScalarType *RT = kv.second.get(); - if (RT->kind() == NewKind && RT->sizeInBits() == ST->sizeInBits()) + if (RT->kind() == STKind->kind() && RT->sizeInBits() == STSize->sizeInBits()) return RT; } - PrintFatalError("Cannot change sign of this type"); + PrintFatalError("Cannot find a type to satisfy CopyKind"); } PrintFatalError("Bad operator in type dag expression"); @@ -1025,6 +1037,18 @@ Result::Ptr MveEmitter::getCodeForDag(DagInit *D, const Result::Scope &Scope, } } PrintFatalError("Unsupported type cast"); + } else if (Op->getName() == "unsignedflag") { + if (D->getNumArgs() != 1) + PrintFatalError("unsignedflag should have exactly one argument"); + Record *TypeRec = cast<DefInit>(D->getArg(0))->getDef(); + if (!TypeRec->isSubClassOf("Type")) + PrintFatalError("unsignedflag's argument should be a type"); + if (const auto *ST = dyn_cast<ScalarType>(getType(TypeRec, Param))) { + return std::make_shared<IntLiteralResult>( + getScalarType("u32"), ST->kind() == ScalarTypeKind::UnsignedInt); + } else { + PrintFatalError("unsignedflag's argument should be a scalar type"); + } } else { std::vector<Result::Ptr> Args; for (unsigned i = 0, e = D->getNumArgs(); i < e; ++i) |