summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Basic/arm_mve.td162
-rw-r--r--clang/include/clang/Basic/arm_mve_defs.td25
-rw-r--r--clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c2146
-rw-r--r--clang/test/Sema/arm-mve-immediates.c56
-rw-r--r--clang/utils/TableGen/MveEmitter.cpp40
5 files changed, 2403 insertions, 26 deletions
diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index aca0d9fa925..6e0e8ce4e5e 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -72,22 +72,158 @@ def vcvt#half#q_m_f16: Intrinsic<
} // loop over half = "b", "t"
-let params = T.All32, pnt = PNT_None in
-def vldrwq_gather_base_wb: Intrinsic<
- Vector, (args Ptr<VecOf<Unsigned<Scalar>>>:$addr, imm_mem7bit<4>:$offset),
- (seq (IRInt<"vldr_gather_base_wb", [Vector, VecOf<Unsigned<Scalar>>]>
+multiclass gather_base<list<Type> types, int size> {
+ let params = types, pnt = PNT_None in {
+ def _gather_base: Intrinsic<
+ Vector, (args UVector:$addr, imm_mem7bit<size>:$offset),
+ (IRInt<"vldr_gather_base", [Vector, UVector]> $addr, $offset)>;
+
+ def _gather_base_z: Intrinsic<
+ Vector, (args UVector:$addr, imm_mem7bit<size>:$offset, Predicate:$pred),
+ (IRInt<"vldr_gather_base_predicated", [Vector, UVector, Predicate]>
+ $addr, $offset, $pred)>;
+
+ def _gather_base_wb: Intrinsic<
+ Vector, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset),
+ (seq (IRInt<"vldr_gather_base_wb", [Vector, UVector]>
(load $addr), $offset):$pair,
- (store (xval $pair, 1), $addr),
- (xval $pair, 0))>;
+ (store (xval $pair, 1), $addr),
+ (xval $pair, 0))>;
-let params = T.All64, pnt = PNT_None in
-def vldrdq_gather_base_wb_z: Intrinsic<
- Vector, (args Ptr<VecOf<Unsigned<Scalar>>>:$addr, imm_mem7bit<8>:$offset,
- Predicate:$pred),
- (seq (IRInt<"vldr_gather_base_wb_predicated", [Vector, VecOf<Unsigned<Scalar>>, Predicate]>
+ def _gather_base_wb_z: Intrinsic<
+ Vector, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset,
+ Predicate:$pred),
+ (seq (IRInt<"vldr_gather_base_wb_predicated",
+ [Vector, UVector, Predicate]>
(load $addr), $offset, $pred):$pair,
- (store (xval $pair, 1), $addr),
- (xval $pair, 0))>;
+ (store (xval $pair, 1), $addr),
+ (xval $pair, 0))>;
+ }
+}
+
+defm vldrwq: gather_base<T.All32, 4>;
+defm vldrdq: gather_base<T.All64, 8>;
+
+multiclass scatter_base<list<Type> types, int size> {
+ let params = types in {
+ def _scatter_base: Intrinsic<
+ Void, (args UVector:$addr, imm_mem7bit<size>:$offset, Vector:$data),
+ (IRInt<"vstr_scatter_base", [UVector, Vector]> $addr, $offset, $data)>;
+
+ def _scatter_base_p: Intrinsic<
+ Void, (args UVector:$addr, imm_mem7bit<size>:$offset, Vector:$data,
+ Predicate:$pred),
+ (IRInt<"vstr_scatter_base_predicated", [UVector, Vector, Predicate]>
+ $addr, $offset, $data, $pred)>;
+
+ def _scatter_base_wb: Intrinsic<
+ Void, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset, Vector:$data),
+ (seq (IRInt<"vstr_scatter_base_wb", [UVector, Vector]>
+ (load $addr), $offset, $data):$wbaddr,
+ (store $wbaddr, $addr))>;
+
+ def _scatter_base_wb_p: Intrinsic<
+ Void, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset,
+ Vector:$data, Predicate:$pred),
+ (seq (IRInt<"vstr_scatter_base_wb_predicated",
+ [UVector, Vector, Predicate]>
+ (load $addr), $offset, $data, $pred):$wbaddr,
+ (store $wbaddr, $addr))>;
+ }
+}
+
+defm vstrwq: scatter_base<T.All32, 4>;
+defm vstrdq: scatter_base<T.All64, 8>;
+
+multiclass gather_offset_unshifted<list<Type> types, PrimitiveType memtype> {
+ let params = types in {
+ def _gather_offset: Intrinsic<
+ Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets),
+ (IRInt<"vldr_gather_offset",
+ [Vector, CPtr<CopyKind<memtype, Scalar>>, UVector]>
+ $base, $offsets, memtype.size, 0, (unsignedflag Scalar))>;
+ def _gather_offset_z: Intrinsic<
+ Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
+ Predicate:$pred),
+ (IRInt<"vldr_gather_offset_predicated",
+ [Vector, CPtr<CopyKind<memtype, Scalar>>, UVector, Predicate]>
+ $base, $offsets, memtype.size, 0, (unsignedflag Scalar), $pred)>;
+ }
+}
+
+multiclass gather_offset_shifted<list<Type> types, PrimitiveType memtype,
+ int shift> {
+ let params = types in {
+ def _gather_shifted_offset: Intrinsic<
+ Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets),
+ (IRInt<"vldr_gather_offset",
+ [Vector, CPtr<CopyKind<memtype, Scalar>>, UVector]>
+ $base, $offsets, memtype.size, shift, (unsignedflag Scalar))>;
+ def _gather_shifted_offset_z: Intrinsic<
+ Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
+ Predicate:$pred),
+ (IRInt<"vldr_gather_offset_predicated",
+ [Vector, CPtr<CopyKind<memtype, Scalar>>, UVector, Predicate]>
+ $base, $offsets, memtype.size, shift, (unsignedflag Scalar), $pred)>;
+ }
+}
+
+multiclass gather_offset_both<list<Type> types, PrimitiveType memtype,
+ int shift> {
+ defm "": gather_offset_unshifted<types, memtype>;
+ defm "": gather_offset_shifted<types, memtype, shift>;
+}
+
+defm vldrbq: gather_offset_unshifted<!listconcat(T.All8, T.Int16, T.Int32), u8>;
+defm vldrhq: gather_offset_both<!listconcat(T.All16, T.Int32), u16, 1>;
+defm vldrwq: gather_offset_both<T.All32, u32, 2>;
+defm vldrdq: gather_offset_both<T.Int64, u64, 3>;
+
+multiclass scatter_offset_unshifted<list<Type> types, PrimitiveType memtype> {
+ let params = types in {
+ def _scatter_offset: Intrinsic<
+ Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
+ Vector:$data),
+ (IRInt<"vstr_scatter_offset",
+ [Ptr<CopyKind<memtype, Scalar>>, UVector, Vector]>
+ $base, $offsets, $data, memtype.size, 0)>;
+ def _scatter_offset_p: Intrinsic<
+ Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
+ Vector:$data, Predicate:$pred),
+ (IRInt<"vstr_scatter_offset_predicated",
+ [Ptr<CopyKind<memtype, Scalar>>, UVector, Vector, Predicate]>
+ $base, $offsets, $data, memtype.size, 0, $pred)>;
+ }
+}
+
+multiclass scatter_offset_shifted<list<Type> types, PrimitiveType memtype,
+ int shift> {
+ let params = types in {
+ def _scatter_shifted_offset: Intrinsic<
+ Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
+ Vector:$data),
+ (IRInt<"vstr_scatter_offset",
+ [Ptr<CopyKind<memtype, Scalar>>, UVector, Vector]>
+ $base, $offsets, $data, memtype.size, shift)>;
+ def _scatter_shifted_offset_p: Intrinsic<
+ Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
+ Vector:$data, Predicate:$pred),
+ (IRInt<"vstr_scatter_offset_predicated",
+ [Ptr<CopyKind<memtype, Scalar>>, UVector, Vector, Predicate]>
+ $base, $offsets, $data, memtype.size, shift, $pred)>;
+ }
+}
+
+multiclass scatter_offset_both<list<Type> types, PrimitiveType memtype,
+ int shift> {
+ defm "": scatter_offset_unshifted<types, memtype>;
+ defm "": scatter_offset_shifted<types, memtype, shift>;
+}
+
+defm vstrbq: scatter_offset_unshifted<!listconcat(T.All8,T.Int16,T.Int32), u8>;
+defm vstrhq: scatter_offset_both<!listconcat(T.All16, T.Int32), u16, 1>;
+defm vstrwq: scatter_offset_both<T.All32, u32, 2>;
+defm vstrdq: scatter_offset_both<T.Int64, u64, 3>;
let params = [Void], pnt = PNT_None in
def urshrl: Intrinsic<u64, (args u64:$value, imm_1to32:$shift),
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index 14afc04a825..3d9333f3d44 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -82,6 +82,11 @@ class IRInt<string name_, list<Type> params_ = [], bit appendKind_ = 0> {
// the return value of the seq construction as a whole.
def seq;
+// Another magic operation is 'unsignedflag', which you give a scalar
+// _type_ as an argument, and it expands into 1 for an unsigned type
+// and 0 for a signed (or floating) one.
+def unsignedflag;
+
// If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it
// indicates that the IR generation for that intrinsic is done by handwritten
// C++ and not autogenerated at all. The effect in the MVE builtin codegen
@@ -109,7 +114,7 @@ def CTO_Vec: ComplexTypeOp;
def CTO_Pred: ComplexTypeOp;
class CTO_Tuple<int n_>: ComplexTypeOp { int n = n_; }
class CTO_Pointer<bit const_>: ComplexTypeOp { bit const = const_; }
-class CTO_Sign<bit signed_>: ComplexTypeOp { bit signed = signed_; }
+def CTO_CopyKind: ComplexTypeOp;
// -----------------------------------------------------------------------------
// Instances of Type intended to be used directly in the specification of an
@@ -167,10 +172,20 @@ class MultiVector<int n>: ComplexType<(CTO_Tuple<n> Vector)>;
class Ptr<Type t>: ComplexType<(CTO_Pointer<0> t)>;
class CPtr<Type t>: ComplexType<(CTO_Pointer<1> t)>;
-// Unsigned<t> expects t to be a scalar, and expands to the unsigned integer
-// scalar of the same size. So it returns u16 if you give it s16 or f16 (or
-// u16 itself).
-class Unsigned<Type t>: ComplexType<(CTO_Sign<0> t)>;
+// CopyKind<s,k> expects s and k to be scalar types. It returns a scalar type
+// whose kind (signed, unsigned or float) matches that of k, and whose size
+// matches that of s.
+class CopyKind<Type s, Type k>: ComplexType<(CTO_CopyKind s, k)>;
+
+// Unsigned<t> expects t to be a scalar type, and expands to the unsigned
+// integer scalar of the same size. So it returns u16 if you give it s16 or
+// f16 (or u16 itself).
+class Unsigned<Type t>: ComplexType<(CTO_CopyKind t, u32)>;
+
+// UScalar and UVector expand to the unsigned-integer versions of
+// Scalar and Vector.
+def UScalar: Unsigned<Scalar>;
+def UVector: VecOf<UScalar>;
// -----------------------------------------------------------------------------
// Internal definitions for specifying immediate arguments for an intrinsic.
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c b/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c
new file mode 100644
index 00000000000..830f62442c3
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c
@@ -0,0 +1,2146 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vldrbq_gather_offset_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 8, i32 0, i32 0)
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vldrbq_gather_offset_s16(const int8_t *base, uint16x8_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrbq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrbq_gather_offset_s16(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrbq_gather_offset_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 8, i32 0, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vldrbq_gather_offset_s32(const int8_t *base, uint32x4_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrbq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrbq_gather_offset_s32(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrbq_gather_offset_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], i32 8, i32 0, i32 0)
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vldrbq_gather_offset_s8(const int8_t *base, uint8x16_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrbq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrbq_gather_offset_s8(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrbq_gather_offset_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 8, i32 0, i32 1)
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vldrbq_gather_offset_u16(const uint8_t *base, uint16x8_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrbq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrbq_gather_offset_u16(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrbq_gather_offset_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 8, i32 0, i32 1)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vldrbq_gather_offset_u32(const uint8_t *base, uint32x4_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrbq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrbq_gather_offset_u32(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrbq_gather_offset_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], i32 8, i32 0, i32 1)
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vldrbq_gather_offset_u8(const uint8_t *base, uint8x16_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrbq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrbq_gather_offset_u8(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrbq_gather_offset_z_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 8, i32 0, i32 0, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vldrbq_gather_offset_z_s16(const int8_t *base, uint16x8_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrbq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrbq_gather_offset_z_s16(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrbq_gather_offset_z_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 8, i32 0, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vldrbq_gather_offset_z_s32(const int8_t *base, uint32x4_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrbq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrbq_gather_offset_z_s32(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrbq_gather_offset_z_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], i32 8, i32 0, i32 0, <16 x i1> [[TMP1]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vldrbq_gather_offset_z_s8(const int8_t *base, uint8x16_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrbq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrbq_gather_offset_z_s8(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrbq_gather_offset_z_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 8, i32 0, i32 1, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vldrbq_gather_offset_z_u16(const uint8_t *base, uint16x8_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrbq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrbq_gather_offset_z_u16(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrbq_gather_offset_z_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 8, i32 0, i32 1, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vldrbq_gather_offset_z_u32(const uint8_t *base, uint32x4_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrbq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrbq_gather_offset_z_u32(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrbq_gather_offset_z_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], i32 8, i32 0, i32 1, <16 x i1> [[TMP1]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vldrbq_gather_offset_z_u8(const uint8_t *base, uint8x16_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrbq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrbq_gather_offset_z_u8(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_base_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> [[ADDR:%.*]], i32 616)
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
+int64x2_t test_vldrdq_gather_base_s64(uint64x2_t addr)
+{
+ return vldrdq_gather_base_s64(addr, 0x268);
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_base_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> [[ADDR:%.*]], i32 336)
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
+uint64x2_t test_vldrdq_gather_base_u64(uint64x2_t addr)
+{
+ return vldrdq_gather_base_u64(addr, 0x150);
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_base_wb_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 576)
+// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP1]], 1
+// CHECK-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[ADDR]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP1]], 0
+// CHECK-NEXT: ret <2 x i64> [[TMP3]]
+//
+int64x2_t test_vldrdq_gather_base_wb_s64(uint64x2_t *addr)
+{
+ return vldrdq_gather_base_wb_s64(addr, 0x240);
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_base_wb_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 328)
+// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP1]], 1
+// CHECK-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[ADDR]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP1]], 0
+// CHECK-NEXT: ret <2 x i64> [[TMP3]]
+//
+uint64x2_t test_vldrdq_gather_base_wb_u64(uint64x2_t *addr)
+{
+ return vldrdq_gather_base_wb_u64(addr, 0x148);
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_base_wb_z_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> [[TMP0]], i32 664, <4 x i1> [[TMP2]])
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 1
+// CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[ADDR]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 0
+// CHECK-NEXT: ret <2 x i64> [[TMP5]]
+//
+int64x2_t test_vldrdq_gather_base_wb_z_s64(uint64x2_t *addr, mve_pred16_t p)
+{
+ return vldrdq_gather_base_wb_z_s64(addr, 0x298, p);
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_base_wb_z_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> [[TMP0]], i32 656, <4 x i1> [[TMP2]])
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 1
+// CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[ADDR]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 0
+// CHECK-NEXT: ret <2 x i64> [[TMP5]]
+//
+uint64x2_t test_vldrdq_gather_base_wb_z_u64(uint64x2_t *addr, mve_pred16_t p)
+{
+ return vldrdq_gather_base_wb_z_u64(addr, 0x290, p);
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_base_z_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> [[ADDR:%.*]], i32 888, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
+int64x2_t test_vldrdq_gather_base_z_s64(uint64x2_t addr, mve_pred16_t p)
+{
+ return vldrdq_gather_base_z_s64(addr, 0x378, p);
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_base_z_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> [[ADDR:%.*]], i32 1000, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
+uint64x2_t test_vldrdq_gather_base_z_u64(uint64x2_t addr, mve_pred16_t p)
+{
+ return vldrdq_gather_base_z_u64(addr, 0x3e8, p);
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_offset_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 0, i32 0)
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
+int64x2_t test_vldrdq_gather_offset_s64(const int64_t *base, uint64x2_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrdq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrdq_gather_offset_s64(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_offset_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 0, i32 1)
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
+uint64x2_t test_vldrdq_gather_offset_u64(const uint64_t *base, uint64x2_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrdq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrdq_gather_offset_u64(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_offset_z_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 0, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
+int64x2_t test_vldrdq_gather_offset_z_s64(const int64_t *base, uint64x2_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrdq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrdq_gather_offset_z_s64(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_offset_z_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 0, i32 1, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
+uint64x2_t test_vldrdq_gather_offset_z_u64(const uint64_t *base, uint64x2_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrdq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrdq_gather_offset_z_u64(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_shifted_offset_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 3, i32 0)
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
+int64x2_t test_vldrdq_gather_shifted_offset_s64(const int64_t *base, uint64x2_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrdq_gather_shifted_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrdq_gather_shifted_offset_s64(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_shifted_offset_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 3, i32 1)
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
+uint64x2_t test_vldrdq_gather_shifted_offset_u64(const uint64_t *base, uint64x2_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrdq_gather_shifted_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrdq_gather_shifted_offset_u64(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_shifted_offset_z_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 3, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
+int64x2_t test_vldrdq_gather_shifted_offset_z_s64(const int64_t *base, uint64x2_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrdq_gather_shifted_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrdq_gather_shifted_offset_z_s64(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_shifted_offset_z_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 3, i32 1, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
+uint64x2_t test_vldrdq_gather_shifted_offset_z_u64(const uint64_t *base, uint64x2_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrdq_gather_shifted_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrdq_gather_shifted_offset_z_u64(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_offset_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 0, i32 0)
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vldrhq_gather_offset_f16(const float16_t *base, uint16x8_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_offset_f16(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_offset_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 0, i32 0)
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vldrhq_gather_offset_s16(const int16_t *base, uint16x8_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_offset_s16(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_offset_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 0, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vldrhq_gather_offset_s32(const int16_t *base, uint32x4_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_offset_s32(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_offset_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 0, i32 1)
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vldrhq_gather_offset_u16(const uint16_t *base, uint16x8_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_offset_u16(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_offset_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 0, i32 1)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vldrhq_gather_offset_u32(const uint16_t *base, uint32x4_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_offset_u32(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_offset_z_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 0, i32 0, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vldrhq_gather_offset_z_f16(const float16_t *base, uint16x8_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_offset_z_f16(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_offset_z_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 0, i32 0, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vldrhq_gather_offset_z_s16(const int16_t *base, uint16x8_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_offset_z_s16(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_offset_z_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 0, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vldrhq_gather_offset_z_s32(const int16_t *base, uint32x4_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_offset_z_s32(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_offset_z_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 0, i32 1, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vldrhq_gather_offset_z_u16(const uint16_t *base, uint16x8_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_offset_z_u16(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_offset_z_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 0, i32 1, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vldrhq_gather_offset_z_u32(const uint16_t *base, uint32x4_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_offset_z_u32(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 1, i32 0)
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vldrhq_gather_shifted_offset_f16(const float16_t *base, uint16x8_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_shifted_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_shifted_offset_f16(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 1, i32 0)
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vldrhq_gather_shifted_offset_s16(const int16_t *base, uint16x8_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_shifted_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_shifted_offset_s16(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 1, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vldrhq_gather_shifted_offset_s32(const int16_t *base, uint32x4_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_shifted_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_shifted_offset_s32(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 1, i32 1)
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vldrhq_gather_shifted_offset_u16(const uint16_t *base, uint16x8_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_shifted_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_shifted_offset_u16(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 1, i32 1)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vldrhq_gather_shifted_offset_u32(const uint16_t *base, uint32x4_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_shifted_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_shifted_offset_u32(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_z_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 1, i32 0, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vldrhq_gather_shifted_offset_z_f16(const float16_t *base, uint16x8_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_shifted_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_shifted_offset_z_f16(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_z_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 1, i32 0, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vldrhq_gather_shifted_offset_z_s16(const int16_t *base, uint16x8_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_shifted_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_shifted_offset_z_s16(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_z_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 1, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vldrhq_gather_shifted_offset_z_s32(const int16_t *base, uint32x4_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_shifted_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_shifted_offset_z_s32(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_z_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], i32 16, i32 1, i32 1, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vldrhq_gather_shifted_offset_z_u16(const uint16_t *base, uint16x8_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_shifted_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_shifted_offset_z_u16(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_z_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 16, i32 1, i32 1, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vldrhq_gather_shifted_offset_z_u32(const uint16_t *base, uint32x4_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrhq_gather_shifted_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrhq_gather_shifted_offset_z_u32(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_base_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> [[ADDR:%.*]], i32 12)
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vldrwq_gather_base_f32(uint32x4_t addr)
+{
+ return vldrwq_gather_base_f32(addr, 0xc);
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_base_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> [[ADDR:%.*]], i32 400)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vldrwq_gather_base_s32(uint32x4_t addr)
+{
+ return vldrwq_gather_base_s32(addr, 0x190);
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_base_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> [[ADDR:%.*]], i32 284)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vldrwq_gather_base_u32(uint32x4_t addr)
+{
+ return vldrwq_gather_base_u32(addr, 0x11c);
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_base_wb_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> [[TMP0]], i32 64)
+// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP1]], 1
+// CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[ADDR]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP1]], 0
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
+float32x4_t test_vldrwq_gather_base_wb_f32(uint32x4_t *addr)
+{
+ return vldrwq_gather_base_wb_f32(addr, 0x40);
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_base_wb_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> [[TMP0]], i32 80)
+// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 1
+// CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[ADDR]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 0
+// CHECK-NEXT: ret <4 x i32> [[TMP3]]
+//
+int32x4_t test_vldrwq_gather_base_wb_s32(uint32x4_t *addr)
+{
+ return vldrwq_gather_base_wb_s32(addr, 0x50);
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_base_wb_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> [[TMP0]], i32 480)
+// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 1
+// CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[ADDR]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 0
+// CHECK-NEXT: ret <4 x i32> [[TMP3]]
+//
+uint32x4_t test_vldrwq_gather_base_wb_u32(uint32x4_t *addr)
+{
+ return vldrwq_gather_base_wb_u32(addr, 0x1e0);
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_base_wb_z_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> [[TMP0]], i32 352, <4 x i1> [[TMP2]])
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP3]], 1
+// CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[ADDR]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP3]], 0
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float32x4_t test_vldrwq_gather_base_wb_z_f32(uint32x4_t *addr, mve_pred16_t p)
+{
+ return vldrwq_gather_base_wb_z_f32(addr, 0x160, p);
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_base_wb_z_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> [[TMP0]], i32 276, <4 x i1> [[TMP2]])
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP3]], 1
+// CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[ADDR]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP3]], 0
+// CHECK-NEXT: ret <4 x i32> [[TMP5]]
+//
+int32x4_t test_vldrwq_gather_base_wb_z_s32(uint32x4_t *addr, mve_pred16_t p)
+{
+ return vldrwq_gather_base_wb_z_s32(addr, 0x114, p);
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_base_wb_z_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> [[TMP0]], i32 88, <4 x i1> [[TMP2]])
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP3]], 1
+// CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[ADDR]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP3]], 0
+// CHECK-NEXT: ret <4 x i32> [[TMP5]]
+//
+uint32x4_t test_vldrwq_gather_base_wb_z_u32(uint32x4_t *addr, mve_pred16_t p)
+{
+ return vldrwq_gather_base_wb_z_u32(addr, 0x58, p);
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_base_z_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32> [[ADDR:%.*]], i32 300, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vldrwq_gather_base_z_f32(uint32x4_t addr, mve_pred16_t p)
+{
+ return vldrwq_gather_base_z_f32(addr, 0x12c, p);
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_base_z_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> [[ADDR:%.*]], i32 440, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vldrwq_gather_base_z_s32(uint32x4_t addr, mve_pred16_t p)
+{
+ return vldrwq_gather_base_z_s32(addr, 0x1b8, p);
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_base_z_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> [[ADDR:%.*]], i32 300, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vldrwq_gather_base_z_u32(uint32x4_t addr, mve_pred16_t p)
+{
+ return vldrwq_gather_base_z_u32(addr, 0x12c, p);
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_offset_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 0, i32 0)
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vldrwq_gather_offset_f32(const float32_t *base, uint32x4_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrwq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrwq_gather_offset_f32(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_offset_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 0, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vldrwq_gather_offset_s32(const int32_t *base, uint32x4_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrwq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrwq_gather_offset_s32(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_offset_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 0, i32 1)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vldrwq_gather_offset_u32(const uint32_t *base, uint32x4_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrwq_gather_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrwq_gather_offset_u32(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_offset_z_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 0, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vldrwq_gather_offset_z_f32(const float32_t *base, uint32x4_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrwq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrwq_gather_offset_z_f32(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_offset_z_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 0, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vldrwq_gather_offset_z_s32(const int32_t *base, uint32x4_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrwq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrwq_gather_offset_z_s32(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_offset_z_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 0, i32 1, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vldrwq_gather_offset_z_u32(const uint32_t *base, uint32x4_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrwq_gather_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrwq_gather_offset_z_u32(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 2, i32 0)
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vldrwq_gather_shifted_offset_f32(const float32_t *base, uint32x4_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrwq_gather_shifted_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrwq_gather_shifted_offset_f32(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 2, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vldrwq_gather_shifted_offset_s32(const int32_t *base, uint32x4_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrwq_gather_shifted_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrwq_gather_shifted_offset_s32(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 2, i32 1)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vldrwq_gather_shifted_offset_u32(const uint32_t *base, uint32x4_t offset)
+{
+#ifdef POLYMORPHIC
+ return vldrwq_gather_shifted_offset(base, offset);
+#else /* POLYMORPHIC */
+ return vldrwq_gather_shifted_offset_u32(base, offset);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_z_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 2, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vldrwq_gather_shifted_offset_z_f32(const float32_t *base, uint32x4_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrwq_gather_shifted_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrwq_gather_shifted_offset_z_f32(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_z_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 2, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vldrwq_gather_shifted_offset_z_s32(const int32_t *base, uint32x4_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrwq_gather_shifted_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrwq_gather_shifted_offset_z_s32(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_z_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], i32 32, i32 2, i32 1, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vldrwq_gather_shifted_offset_z_u32(const uint32_t *base, uint32x4_t offset, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vldrwq_gather_shifted_offset_z(base, offset, p);
+#else /* POLYMORPHIC */
+ return vldrwq_gather_shifted_offset_z_u32(base, offset, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrbq_scatter_offset_p_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 8, i32 0, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrbq_scatter_offset_p_s16(int8_t *base, uint16x8_t offset, int16x8_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrbq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrbq_scatter_offset_p_s16(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrbq_scatter_offset_p_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 8, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrbq_scatter_offset_p_s32(int8_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrbq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrbq_scatter_offset_p_s32(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrbq_scatter_offset_p_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], <16 x i8> [[VALUE:%.*]], i32 8, i32 0, <16 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrbq_scatter_offset_p_s8(int8_t *base, uint8x16_t offset, int8x16_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrbq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrbq_scatter_offset_p_s8(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrbq_scatter_offset_p_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 8, i32 0, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrbq_scatter_offset_p_u16(uint8_t *base, uint16x8_t offset, uint16x8_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrbq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrbq_scatter_offset_p_u16(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrbq_scatter_offset_p_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 8, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrbq_scatter_offset_p_u32(uint8_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrbq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrbq_scatter_offset_p_u32(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrbq_scatter_offset_p_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], <16 x i8> [[VALUE:%.*]], i32 8, i32 0, <16 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrbq_scatter_offset_p_u8(uint8_t *base, uint8x16_t offset, uint8x16_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrbq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrbq_scatter_offset_p_u8(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrbq_scatter_offset_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 8, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrbq_scatter_offset_s16(int8_t *base, uint16x8_t offset, int16x8_t value)
+{
+#ifdef POLYMORPHIC
+ vstrbq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrbq_scatter_offset_s16(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrbq_scatter_offset_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 8, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrbq_scatter_offset_s32(int8_t *base, uint32x4_t offset, int32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrbq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrbq_scatter_offset_s32(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrbq_scatter_offset_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], <16 x i8> [[VALUE:%.*]], i32 8, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrbq_scatter_offset_s8(int8_t *base, uint8x16_t offset, int8x16_t value)
+{
+#ifdef POLYMORPHIC
+ vstrbq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrbq_scatter_offset_s8(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrbq_scatter_offset_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 8, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrbq_scatter_offset_u16(uint8_t *base, uint16x8_t offset, uint16x8_t value)
+{
+#ifdef POLYMORPHIC
+ vstrbq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrbq_scatter_offset_u16(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrbq_scatter_offset_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 8, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrbq_scatter_offset_u32(uint8_t *base, uint32x4_t offset, uint32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrbq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrbq_scatter_offset_u32(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrbq_scatter_offset_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* [[BASE:%.*]], <16 x i8> [[OFFSET:%.*]], <16 x i8> [[VALUE:%.*]], i32 8, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrbq_scatter_offset_u8(uint8_t *base, uint8x16_t offset, uint8x16_t value)
+{
+#ifdef POLYMORPHIC
+ vstrbq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrbq_scatter_offset_u8(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_base_p_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> [[ADDR:%.*]], i32 888, <2 x i64> [[VALUE:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_base_p_s64(uint64x2_t addr, int64x2_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_base_p(addr, 0x378, value, p);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_base_p_s64(addr, 0x378, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_base_p_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> [[ADDR:%.*]], i32 264, <2 x i64> [[VALUE:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_base_p_u64(uint64x2_t addr, uint64x2_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_base_p(addr, 0x108, value, p);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_base_p_u64(addr, 0x108, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_base_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> [[ADDR:%.*]], i32 408, <2 x i64> [[VALUE:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_base_s64(uint64x2_t addr, int64x2_t value)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_base(addr, 0x198, value);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_base_s64(addr, 0x198, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_base_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> [[ADDR:%.*]], i32 472, <2 x i64> [[VALUE:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_base_u64(uint64x2_t addr, uint64x2_t value)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_base(addr, 0x1d8, value);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_base_u64(addr, 0x1d8, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_base_wb_p_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> [[TMP0]], i32 248, <2 x i64> [[VALUE:%.*]], <4 x i1> [[TMP2]])
+// CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[ADDR]], align 8
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_base_wb_p_s64(uint64x2_t *addr, int64x2_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_base_wb_p(addr, 0xf8, value, p);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_base_wb_p_s64(addr, 0xf8, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_base_wb_p_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> [[TMP0]], i32 136, <2 x i64> [[VALUE:%.*]], <4 x i1> [[TMP2]])
+// CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[ADDR]], align 8
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_base_wb_p_u64(uint64x2_t *addr, uint64x2_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_base_wb_p(addr, 0x88, value, p);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_base_wb_p_u64(addr, 0x88, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_base_wb_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 208, <2 x i64> [[VALUE:%.*]])
+// CHECK-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[ADDR]], align 8
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_base_wb_s64(uint64x2_t *addr, int64x2_t value)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_base_wb(addr, 0xd0, value);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_base_wb_s64(addr, 0xd0, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_base_wb_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 168, <2 x i64> [[VALUE:%.*]])
+// CHECK-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[ADDR]], align 8
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_base_wb_u64(uint64x2_t *addr, uint64x2_t value)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_base_wb(addr, 0xa8, value);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_base_wb_u64(addr, 0xa8, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_offset_p_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_offset_p_s64(int64_t *base, uint64x2_t offset, int64x2_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_offset_p_s64(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_offset_p_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_offset_p_u64(uint64_t *base, uint64x2_t offset, uint64x2_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_offset_p_u64(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_offset_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_offset_s64(int64_t *base, uint64x2_t offset, int64x2_t value)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_offset_s64(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_offset_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_offset_u64(uint64_t *base, uint64x2_t offset, uint64x2_t value)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_offset_u64(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_shifted_offset_p_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 3, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_shifted_offset_p_s64(int64_t *base, uint64x2_t offset, int64x2_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_shifted_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_shifted_offset_p_s64(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_shifted_offset_p_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 3, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_shifted_offset_p_u64(uint64_t *base, uint64x2_t offset, uint64x2_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_shifted_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_shifted_offset_p_u64(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_shifted_offset_s64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 3)
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_shifted_offset_s64(int64_t *base, uint64x2_t offset, int64x2_t value)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_shifted_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_shifted_offset_s64(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrdq_scatter_shifted_offset_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 3)
+// CHECK-NEXT: ret void
+//
+void test_vstrdq_scatter_shifted_offset_u64(uint64_t *base, uint64x2_t offset, uint64x2_t value)
+{
+#ifdef POLYMORPHIC
+ vstrdq_scatter_shifted_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrdq_scatter_shifted_offset_u64(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_offset_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x half> [[VALUE:%.*]], i32 16, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_offset_f16(float16_t *base, uint16x8_t offset, float16x8_t value)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_offset_f16(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_offset_p_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x half> [[VALUE:%.*]], i32 16, i32 0, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_offset_p_f16(float16_t *base, uint16x8_t offset, float16x8_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_offset_p_f16(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_offset_p_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 0, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_offset_p_s16(int16_t *base, uint16x8_t offset, int16x8_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_offset_p_s16(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_offset_p_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_offset_p_s32(int16_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_offset_p_s32(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_offset_p_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 0, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_offset_p_u16(uint16_t *base, uint16x8_t offset, uint16x8_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_offset_p_u16(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_offset_p_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_offset_p_u32(uint16_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_offset_p_u32(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_offset_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_offset_s16(int16_t *base, uint16x8_t offset, int16x8_t value)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_offset_s16(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_offset_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_offset_s32(int16_t *base, uint32x4_t offset, int32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_offset_s32(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_offset_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_offset_u16(uint16_t *base, uint16x8_t offset, uint16x8_t value)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_offset_u16(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_offset_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_offset_u32(uint16_t *base, uint32x4_t offset, uint32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_offset_u32(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x half> [[VALUE:%.*]], i32 16, i32 1)
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_shifted_offset_f16(float16_t *base, uint16x8_t offset, float16x8_t value)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_shifted_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_shifted_offset_f16(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_p_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x half> [[VALUE:%.*]], i32 16, i32 1, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_shifted_offset_p_f16(float16_t *base, uint16x8_t offset, float16x8_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_shifted_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_shifted_offset_p_f16(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_p_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 1, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_shifted_offset_p_s16(int16_t *base, uint16x8_t offset, int16x8_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_shifted_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_shifted_offset_p_s16(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_p_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 1, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_shifted_offset_p_s32(int16_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_shifted_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_shifted_offset_p_s32(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_p_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 1, <8 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_shifted_offset_p_u16(uint16_t *base, uint16x8_t offset, uint16x8_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_shifted_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_shifted_offset_p_u16(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_p_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 1, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_shifted_offset_p_u32(uint16_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_shifted_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_shifted_offset_p_u32(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 1)
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_shifted_offset_s16(int16_t *base, uint16x8_t offset, int16x8_t value)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_shifted_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_shifted_offset_s16(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 1)
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_shifted_offset_s32(int16_t *base, uint32x4_t offset, int32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_shifted_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_shifted_offset_s32(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* [[BASE:%.*]], <8 x i16> [[OFFSET:%.*]], <8 x i16> [[VALUE:%.*]], i32 16, i32 1)
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_shifted_offset_u16(uint16_t *base, uint16x8_t offset, uint16x8_t value)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_shifted_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_shifted_offset_u16(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 16, i32 1)
+// CHECK-NEXT: ret void
+//
+void test_vstrhq_scatter_shifted_offset_u32(uint16_t *base, uint32x4_t offset, uint32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrhq_scatter_shifted_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrhq_scatter_shifted_offset_u32(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_base_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> [[ADDR:%.*]], i32 380, <4 x float> [[VALUE:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_base_f32(uint32x4_t addr, float32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_base(addr, 0x17c, value);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_base_f32(addr, 0x17c, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_base_p_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32> [[ADDR:%.*]], i32 400, <4 x float> [[VALUE:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_base_p_f32(uint32x4_t addr, float32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_base_p(addr, 0x190, value, p);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_base_p_f32(addr, 0x190, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_base_p_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> [[ADDR:%.*]], i32 48, <4 x i32> [[VALUE:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_base_p_s32(uint32x4_t addr, int32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_base_p(addr, 0x30, value, p);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_base_p_s32(addr, 0x30, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_base_p_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> [[ADDR:%.*]], i32 376, <4 x i32> [[VALUE:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_base_p_u32(uint32x4_t addr, uint32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_base_p(addr, 0x178, value, p);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_base_p_u32(addr, 0x178, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_base_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> [[ADDR:%.*]], i32 156, <4 x i32> [[VALUE:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_base_s32(uint32x4_t addr, int32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_base(addr, 0x9c, value);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_base_s32(addr, 0x9c, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_base_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> [[ADDR:%.*]], i32 212, <4 x i32> [[VALUE:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_base_u32(uint32x4_t addr, uint32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_base(addr, 0xd4, value);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_base_u32(addr, 0xd4, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_base_wb_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32> [[TMP0]], i32 412, <4 x float> [[VALUE:%.*]])
+// CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[ADDR]], align 8
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_base_wb_f32(uint32x4_t *addr, float32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_base_wb(addr, 0x19c, value);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_base_wb_f32(addr, 0x19c, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_base_wb_p_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32> [[TMP0]], i32 236, <4 x float> [[VALUE:%.*]], <4 x i1> [[TMP2]])
+// CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[ADDR]], align 8
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_base_wb_p_f32(uint32x4_t *addr, float32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_base_wb_p(addr, 0xec, value, p);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_base_wb_p_f32(addr, 0xec, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_base_wb_p_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> [[TMP0]], i32 328, <4 x i32> [[VALUE:%.*]], <4 x i1> [[TMP2]])
+// CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[ADDR]], align 8
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_base_wb_p_s32(uint32x4_t *addr, int32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_base_wb_p(addr, 0x148, value, p);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_base_wb_p_s32(addr, 0x148, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_base_wb_p_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> [[TMP0]], i32 412, <4 x i32> [[VALUE:%.*]], <4 x i1> [[TMP2]])
+// CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[ADDR]], align 8
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_base_wb_p_u32(uint32x4_t *addr, uint32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_base_wb_p(addr, 0x19c, value, p);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_base_wb_p_u32(addr, 0x19c, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_base_wb_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> [[TMP0]], i32 152, <4 x i32> [[VALUE:%.*]])
+// CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[ADDR]], align 8
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_base_wb_s32(uint32x4_t *addr, int32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_base_wb(addr, 0x98, value);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_base_wb_s32(addr, 0x98, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_base_wb_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> [[TMP0]], i32 64, <4 x i32> [[VALUE:%.*]])
+// CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[ADDR]], align 8
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_base_wb_u32(uint32x4_t *addr, uint32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_base_wb(addr, 0x40, value);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_base_wb_u32(addr, 0x40, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_offset_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x float> [[VALUE:%.*]], i32 32, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_offset_f32(float32_t *base, uint32x4_t offset, float32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_offset_f32(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_offset_p_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x float> [[VALUE:%.*]], i32 32, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_offset_p_f32(float32_t *base, uint32x4_t offset, float32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_offset_p_f32(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_offset_p_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_offset_p_s32(int32_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_offset_p_s32(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_offset_p_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_offset_p_u32(uint32_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_offset_p_u32(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_offset_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_offset_s32(int32_t *base, uint32x4_t offset, int32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_offset_s32(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_offset_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_offset_u32(uint32_t *base, uint32x4_t offset, uint32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_offset_u32(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x float> [[VALUE:%.*]], i32 32, i32 2)
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_shifted_offset_f32(float32_t *base, uint32x4_t offset, float32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_shifted_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_shifted_offset_f32(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_p_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x float> [[VALUE:%.*]], i32 32, i32 2, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_shifted_offset_p_f32(float32_t *base, uint32x4_t offset, float32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_shifted_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_shifted_offset_p_f32(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_p_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 2, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_shifted_offset_p_s32(int32_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_shifted_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_shifted_offset_p_s32(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_p_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 2, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_shifted_offset_p_u32(uint32_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_shifted_offset_p(base, offset, value, p);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_shifted_offset_p_u32(base, offset, value, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 2)
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_shifted_offset_s32(int32_t *base, uint32x4_t offset, int32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_shifted_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_shifted_offset_s32(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* [[BASE:%.*]], <4 x i32> [[OFFSET:%.*]], <4 x i32> [[VALUE:%.*]], i32 32, i32 2)
+// CHECK-NEXT: ret void
+//
+void test_vstrwq_scatter_shifted_offset_u32(uint32_t *base, uint32x4_t offset, uint32x4_t value)
+{
+#ifdef POLYMORPHIC
+ vstrwq_scatter_shifted_offset(base, offset, value);
+#else /* POLYMORPHIC */
+ vstrwq_scatter_shifted_offset_u32(base, offset, value);
+#endif /* POLYMORPHIC */
+}
+
diff --git a/clang/test/Sema/arm-mve-immediates.c b/clang/test/Sema/arm-mve-immediates.c
new file mode 100644
index 00000000000..cdf68b8a949
--- /dev/null
+++ b/clang/test/Sema/arm-mve-immediates.c
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -verify -fsyntax-only %s
+
+#include <arm_mve.h>
+
+void test_load_offsets(uint32x4_t addr32, uint64x2_t addr64)
+{
+ // Offsets that should be a multiple of 8 times 0,1,...,127
+ vldrdq_gather_base_s64(addr64, 0);
+ vldrdq_gather_base_s64(addr64, 8);
+ vldrdq_gather_base_s64(addr64, 2*8);
+ vldrdq_gather_base_s64(addr64, 125*8);
+ vldrdq_gather_base_s64(addr64, 126*8);
+ vldrdq_gather_base_s64(addr64, 127*8);
+ vldrdq_gather_base_s64(addr64, -8); // expected-error {{argument value -8 is outside the valid range [0, 1016]}}
+ vldrdq_gather_base_s64(addr64, 128*8); // expected-error {{argument value 1024 is outside the valid range [0, 1016]}}
+ vldrdq_gather_base_s64(addr64, 4); // expected-error {{argument should be a multiple of 8}}
+ vldrdq_gather_base_s64(addr64, 1); // expected-error {{argument should be a multiple of 8}}
+
+ // Offsets that should be a multiple of 4 times 0,1,...,127
+ vldrwq_gather_base_s32(addr32, 0);
+ vldrwq_gather_base_s32(addr32, 4);
+ vldrwq_gather_base_s32(addr32, 2*4);
+ vldrwq_gather_base_s32(addr32, 125*4);
+ vldrwq_gather_base_s32(addr32, 126*4);
+ vldrwq_gather_base_s32(addr32, 127*4);
+ vldrwq_gather_base_s32(addr32, -4); // expected-error {{argument value -4 is outside the valid range [0, 508]}}
+ vldrwq_gather_base_s32(addr32, 128*4); // expected-error {{argument value 512 is outside the valid range [0, 508]}}
+ vldrwq_gather_base_s32(addr32, 2); // expected-error {{argument should be a multiple of 4}}
+ vldrwq_gather_base_s32(addr32, 1); // expected-error {{argument should be a multiple of 4}}
+
+ // Show that the polymorphic store intrinsics get the right set of
+ // error checks after overload resolution. These ones expand to the
+ // 8-byte granular versions...
+ vstrdq_scatter_base(addr64, 0, addr64);
+ vstrdq_scatter_base(addr64, 8, addr64);
+ vstrdq_scatter_base(addr64, 2*8, addr64);
+ vstrdq_scatter_base(addr64, 125*8, addr64);
+ vstrdq_scatter_base(addr64, 126*8, addr64);
+ vstrdq_scatter_base(addr64, 127*8, addr64);
+ vstrdq_scatter_base(addr64, -8, addr64); // expected-error {{argument value -8 is outside the valid range [0, 1016]}}
+ vstrdq_scatter_base(addr64, 128*8, addr64); // expected-error {{argument value 1024 is outside the valid range [0, 1016]}}
+ vstrdq_scatter_base(addr64, 4, addr64); // expected-error {{argument should be a multiple of 8}}
+ vstrdq_scatter_base(addr64, 1, addr64); // expected-error {{argument should be a multiple of 8}}
+
+ /// ... and these ones to the 4-byte.
+ vstrwq_scatter_base(addr32, 0, addr32);
+ vstrwq_scatter_base(addr32, 4, addr32);
+ vstrwq_scatter_base(addr32, 2*4, addr32);
+ vstrwq_scatter_base(addr32, 125*4, addr32);
+ vstrwq_scatter_base(addr32, 126*4, addr32);
+ vstrwq_scatter_base(addr32, 127*4, addr32);
+ vstrwq_scatter_base(addr32, -4, addr32); // expected-error {{argument value -4 is outside the valid range [0, 508]}}
+ vstrwq_scatter_base(addr32, 128*4, addr32); // expected-error {{argument value 512 is outside the valid range [0, 508]}}
+ vstrwq_scatter_base(addr32, 2, addr32); // expected-error {{argument should be a multiple of 4}}
+ vstrwq_scatter_base(addr32, 1, addr32); // expected-error {{argument should be a multiple of 4}}
+}
diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp
index ddec171d671..aa3b475ea7b 100644
--- a/clang/utils/TableGen/MveEmitter.cpp
+++ b/clang/utils/TableGen/MveEmitter.cpp
@@ -204,6 +204,9 @@ public:
Name = "const " + Name;
return Name + " *";
}
+ std::string llvmName() const override {
+ return "llvm::PointerType::getUnqual(" + Pointee->llvmName() + ")";
+ }
static bool classof(const Type *T) {
return T->typeKind() == TypeKind::Pointer;
@@ -512,6 +515,11 @@ public:
void setVarname(const StringRef s) { VarName = s; }
bool varnameUsed() const { return VarNameUsed; }
+ // Emit code to generate this result as a Value *.
+ virtual std::string asValue() {
+ return varname();
+ }
+
// Code generation happens in multiple passes. This method tracks whether a
// Result has yet been visited in a given pass, without the need for a
// tedious loop in between passes that goes through and resets a 'visited'
@@ -547,6 +555,12 @@ public:
std::string typeName() const override {
return AddressType ? "Address" : Result::typeName();
}
+ // Emit code to generate this result as a Value *.
+ std::string asValue() override {
+ if (AddressType)
+ return "(" + varname() + ".getPointer())";
+ return Result::asValue();
+ }
};
// Result subclass for an integer literal appearing in Tablegen. This may need
@@ -665,7 +679,7 @@ public:
OS << "), llvm::SmallVector<Value *, " << Args.size() << "> {";
const char *Sep = "";
for (auto Arg : Args) {
- OS << Sep << Arg->varname();
+ OS << Sep << Arg->asValue();
Sep = ", ";
}
OS << "})";
@@ -974,17 +988,15 @@ const Type *MveEmitter::getType(DagInit *D, const Type *Param) {
return getPointerType(Pointee, Op->getValueAsBit("const"));
}
- if (Op->isSubClassOf("CTO_Sign")) {
- const ScalarType *ST = cast<ScalarType>(getType(D->getArg(0), Param));
- ScalarTypeKind NewKind = Op->getValueAsBit("signed")
- ? ScalarTypeKind::SignedInt
- : ScalarTypeKind::UnsignedInt;
+ if (Op->getName() == "CTO_CopyKind") {
+ const ScalarType *STSize = cast<ScalarType>(getType(D->getArg(0), Param));
+ const ScalarType *STKind = cast<ScalarType>(getType(D->getArg(1), Param));
for (const auto &kv : ScalarTypes) {
const ScalarType *RT = kv.second.get();
- if (RT->kind() == NewKind && RT->sizeInBits() == ST->sizeInBits())
+ if (RT->kind() == STKind->kind() && RT->sizeInBits() == STSize->sizeInBits())
return RT;
}
- PrintFatalError("Cannot change sign of this type");
+ PrintFatalError("Cannot find a type to satisfy CopyKind");
}
PrintFatalError("Bad operator in type dag expression");
@@ -1025,6 +1037,18 @@ Result::Ptr MveEmitter::getCodeForDag(DagInit *D, const Result::Scope &Scope,
}
}
PrintFatalError("Unsupported type cast");
+ } else if (Op->getName() == "unsignedflag") {
+ if (D->getNumArgs() != 1)
+ PrintFatalError("unsignedflag should have exactly one argument");
+ Record *TypeRec = cast<DefInit>(D->getArg(0))->getDef();
+ if (!TypeRec->isSubClassOf("Type"))
+ PrintFatalError("unsignedflag's argument should be a type");
+ if (const auto *ST = dyn_cast<ScalarType>(getType(TypeRec, Param))) {
+ return std::make_shared<IntLiteralResult>(
+ getScalarType("u32"), ST->kind() == ScalarTypeKind::UnsignedInt);
+ } else {
+ PrintFatalError("unsignedflag's argument should be a scalar type");
+ }
} else {
std::vector<Result::Ptr> Args;
for (unsigned i = 0, e = D->getNumArgs(); i < e; ++i)
OpenPOWER on IntegriCloud