summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
authorSimon Tatham <simon.tatham@arm.com>2019-12-09 15:43:50 +0000
committerSimon Tatham <simon.tatham@arm.com>2019-12-09 15:44:09 +0000
commitd97b3e3e65cd77a81b39732af84a1a4229e95091 (patch)
tree554a5ee5dcb7652298b1f702cb01b687b009b3b3 /clang
parentcaabb713ea157f8c449c8d3eb00410bbef734a22 (diff)
downloadbcm5719-llvm-d97b3e3e65cd77a81b39732af84a1a4229e95091.tar.gz
bcm5719-llvm-d97b3e3e65cd77a81b39732af84a1a4229e95091.zip
[ARM][MVE] Add intrinsics for immediate shifts.
Summary: This adds the family of `vshlq_n` and `vshrq_n` ACLE intrinsics, which shift every lane of a vector left or right by a compile-time immediate. They mostly work by expanding to the IR `shl`, `lshr` and `ashr` operations, with their second operand being a vector splat of the immediate. There's a fiddly special case, though. ACLE specifies that the immediate in `vshrq_n` can take values up to //and including// the bit size of the vector lane. But LLVM IR thinks that shifting right by the full size of the lane is UB, and feels free to replace the `lshr` with an `undef` half way through the optimization pipeline. Hence, to keep this legal in source code, I have to detect it at codegen time. Logical (unsigned) right shifts by the element size are handled by simply emitting the zero vector; arithmetic ones are converted into a shift of one bit less, which will always give the same output. In order to do that check, I also had to enhance the tablegen MveEmitter so that it can cope with converting a builtin function's operand into a bare integer to pass to a code-generating subfunction. Previously the only bare integers it knew how to handle were flags generated from within `arm_mve.td`. Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: MarkMurrayARM Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71065
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Basic/arm_mve.td27
-rw-r--r--clang/include/clang/Basic/arm_mve_defs.td8
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp29
-rw-r--r--clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c722
-rw-r--r--clang/utils/TableGen/MveEmitter.cpp83
5 files changed, 836 insertions, 33 deletions
diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index 19852702c1b..cc4b6d9e823 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -522,6 +522,33 @@ defm vstrhq: scatter_offset_both<!listconcat(T.All16, T.Int32), u16, 1>;
defm vstrwq: scatter_offset_both<T.All32, u32, 2>;
defm vstrdq: scatter_offset_both<T.Int64, u64, 3>;
+multiclass PredicatedImmediateVectorShift<
+ Immediate immtype, string predIntrName, list<dag> unsignedFlag = []> {
+ foreach predIntr = [IRInt<predIntrName, [Vector, Predicate]>] in {
+ def _m_n: Intrinsic<Vector, (args Vector:$inactive, Vector:$v,
+ immtype:$sh, Predicate:$pred),
+ !con((predIntr $v, $sh), !dag(predIntr, unsignedFlag, ?),
+ (predIntr $pred, $inactive))>;
+ def _x_n: Intrinsic<Vector, (args Vector:$v, immtype:$sh,
+ Predicate:$pred),
+ !con((predIntr $v, $sh), !dag(predIntr, unsignedFlag, ?),
+ (predIntr $pred, (undef Vector)))>;
+ }
+}
+
+let params = T.Int in {
+ def vshlq_n: Intrinsic<Vector, (args Vector:$v, imm_0toNm1:$sh),
+ (shl $v, (splat (Scalar $sh)))>;
+ defm vshlq: PredicatedImmediateVectorShift<imm_0toNm1, "shl_imm_predicated">;
+
+ let pnt = PNT_NType in {
+ def vshrq_n: Intrinsic<Vector, (args Vector:$v, imm_1toN:$sh),
+ (immshr $v, $sh, (unsignedflag Scalar))>;
+ defm vshrq: PredicatedImmediateVectorShift<imm_1toN, "shr_imm_predicated",
+ [(unsignedflag Scalar)]>;
+ }
+}
+
// Base class for the scalar shift intrinsics.
class ScalarShift<Type argtype, dag shiftCountArg, dag shiftCodeGen>:
Intrinsic<argtype, !con((args argtype:$value), shiftCountArg), shiftCodeGen> {
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index d837a1d33d0..5aa10f250ed 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -66,6 +66,10 @@ def xor: IRBuilder<"CreateXor">;
def sub: IRBuilder<"CreateSub">;
def shl: IRBuilder<"CreateShl">;
def lshr: IRBuilder<"CreateLShr">;
+def immshr: CGHelperFn<"MVEImmediateShr"> {
+ let special_params = [IRBuilderIntParam<1, "unsigned">,
+ IRBuilderIntParam<2, "bool">];
+}
def fadd: IRBuilder<"CreateFAdd">;
def fmul: IRBuilder<"CreateFMul">;
def fsub: IRBuilder<"CreateFSub">;
@@ -308,8 +312,8 @@ def imm_simd_vmvn : Immediate<u32, IB_UEltValue> {
//
// imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1
// inclusive.
-def imm_1toN : Immediate<u32, IB_EltBit<1>>;
-def imm_0toNm1 : Immediate<u32, IB_EltBit<0>>;
+def imm_1toN : Immediate<sint, IB_EltBit<1>>;
+def imm_0toNm1 : Immediate<sint, IB_EltBit<0>>;
// imm_lane has to be the index of a vector lane in the main vector type, i.e
// it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b5b0c3e61d4..94d10a1aedf 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6801,6 +6801,14 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
}
}
+template<typename Integer>
+static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
+ llvm::APSInt IntVal;
+ bool IsConst = E->isIntegerConstantExpr(IntVal, Context);
+ assert(IsConst && "Sema should have checked this was a constant");
+ return IntVal.getExtValue();
+}
+
static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
llvm::Type *T, bool Unsigned) {
// Helper function called by Tablegen-constructed ARM MVE builtin codegen,
@@ -6808,6 +6816,27 @@ static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
}
+static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
+ uint32_t Shift, bool Unsigned) {
+ // MVE helper function for integer shift right. This must handle signed vs
+ // unsigned, and also deal specially with the case where the shift count is
+ // equal to the lane size. In LLVM IR, an LShr with that parameter would be
+ // undefined behavior, but in MVE it's legal, so we must convert it to code
+ // that is not undefined in IR.
+ unsigned LaneBits =
+ V->getType()->getVectorElementType()->getPrimitiveSizeInBits();
+ if (Shift == LaneBits) {
+ // An unsigned shift of the full lane size always generates zero, so we can
+ // simply emit a zero vector. A signed shift of the full lane size does the
+ // same thing as shifting by one bit fewer.
+ if (Unsigned)
+ return llvm::Constant::getNullValue(V->getType());
+ else
+ --Shift;
+ }
+ return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
+}
+
static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
// MVE-specific helper function for a vector splat, which infers the element
// count of the output vector by knowing that MVE vectors are all 128 bits
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
new file mode 100644
index 00000000000..200273c0365
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
@@ -0,0 +1,722 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vshlq_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vshlq_n_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlq_n(a, 5);
+#else /* POLYMORPHIC */
+ return vshlq_n_s8(a, 5);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vshlq_n_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlq_n(a, 5);
+#else /* POLYMORPHIC */
+ return vshlq_n_s16(a, 5);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_n_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], <i32 18, i32 18, i32 18, i32 18>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vshlq_n_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlq_n(a, 18);
+#else /* POLYMORPHIC */
+ return vshlq_n_s32(a, 18);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_n_s8_trivial(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], zeroinitializer
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vshlq_n_s8_trivial(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlq_n(a, 0);
+#else /* POLYMORPHIC */
+ return vshlq_n_s8(a, 0);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_n_s16_trivial(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], zeroinitializer
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vshlq_n_s16_trivial(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlq_n(a, 0);
+#else /* POLYMORPHIC */
+ return vshlq_n_s16(a, 0);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_n_s32_trivial(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], zeroinitializer
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vshlq_n_s32_trivial(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlq_n(a, 0);
+#else /* POLYMORPHIC */
+ return vshlq_n_s32(a, 0);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vshlq_n_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlq_n(a, 3);
+#else /* POLYMORPHIC */
+ return vshlq_n_u8(a, 3);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vshlq_n_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlq_n(a, 11);
+#else /* POLYMORPHIC */
+ return vshlq_n_u16(a, 11);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_n_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], <i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vshlq_n_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlq_n(a, 7);
+#else /* POLYMORPHIC */
+ return vshlq_n_u32(a, 7);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_n_u8_trivial(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], zeroinitializer
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vshlq_n_u8_trivial(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlq_n(a, 0);
+#else /* POLYMORPHIC */
+ return vshlq_n_u8(a, 0);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_n_u16_trivial(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], zeroinitializer
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vshlq_n_u16_trivial(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlq_n(a, 0);
+#else /* POLYMORPHIC */
+ return vshlq_n_u16(a, 0);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_n_u32_trivial(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], zeroinitializer
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vshlq_n_u32_trivial(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlq_n(a, 0);
+#else /* POLYMORPHIC */
+ return vshlq_n_u32(a, 0);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vshrq_n_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vshrq(a, 4);
+#else /* POLYMORPHIC */
+ return vshrq_n_s8(a, 4);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vshrq_n_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vshrq(a, 10);
+#else /* POLYMORPHIC */
+ return vshrq_n_s16(a, 10);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_n_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], <i32 19, i32 19, i32 19, i32 19>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vshrq_n_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+ return vshrq(a, 19);
+#else /* POLYMORPHIC */
+ return vshrq_n_s32(a, 19);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_n_s8_trivial(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vshrq_n_s8_trivial(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vshrq(a, 8);
+#else /* POLYMORPHIC */
+ return vshrq_n_s8(a, 8);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_n_s16_trivial(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vshrq_n_s16_trivial(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vshrq(a, 16);
+#else /* POLYMORPHIC */
+ return vshrq_n_s16(a, 16);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_n_s32_trivial(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], <i32 31, i32 31, i32 31, i32 31>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vshrq_n_s32_trivial(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+ return vshrq(a, 32);
+#else /* POLYMORPHIC */
+ return vshrq_n_s32(a, 32);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = lshr <16 x i8> [[A:%.*]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vshrq_n_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vshrq(a, 1);
+#else /* POLYMORPHIC */
+ return vshrq_n_u8(a, 1);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i16> [[A:%.*]], <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vshrq_n_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vshrq(a, 10);
+#else /* POLYMORPHIC */
+ return vshrq_n_u16(a, 10);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_n_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = lshr <4 x i32> [[A:%.*]], <i32 10, i32 10, i32 10, i32 10>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vshrq_n_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+ return vshrq(a, 10);
+#else /* POLYMORPHIC */
+ return vshrq_n_u32(a, 10);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_n_u8_trivial(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret <16 x i8> zeroinitializer
+//
+uint8x16_t test_vshrq_n_u8_trivial(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vshrq(a, 8);
+#else /* POLYMORPHIC */
+ return vshrq_n_u8(a, 8);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_n_u16_trivial(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret <8 x i16> zeroinitializer
+//
+uint16x8_t test_vshrq_n_u16_trivial(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vshrq(a, 16);
+#else /* POLYMORPHIC */
+ return vshrq_n_u16(a, 16);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_n_u32_trivial(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret <4 x i32> zeroinitializer
+//
+uint32x4_t test_vshrq_n_u32_trivial(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+ return vshrq(a, 32);
+#else /* POLYMORPHIC */
+ return vshrq_n_u32(a, 32);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_m_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 6, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vshlq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlq_m_n(inactive, a, 6, p);
+#else /* POLYMORPHIC */
+ return vshlq_m_n_s8(inactive, a, 6, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_m_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 13, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vshlq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlq_m_n(inactive, a, 13, p);
+#else /* POLYMORPHIC */
+ return vshlq_m_n_s16(inactive, a, 13, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_m_n_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vshlq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlq_m_n(inactive, a, 0, p);
+#else /* POLYMORPHIC */
+ return vshlq_m_n_s32(inactive, a, 0, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_m_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 3, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vshlq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlq_m_n(inactive, a, 3, p);
+#else /* POLYMORPHIC */
+ return vshlq_m_n_u8(inactive, a, 3, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_m_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vshlq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlq_m_n(inactive, a, 1, p);
+#else /* POLYMORPHIC */
+ return vshlq_m_n_u16(inactive, a, 1, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_m_n_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 24, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vshlq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlq_m_n(inactive, a, 24, p);
+#else /* POLYMORPHIC */
+ return vshlq_m_n_u32(inactive, a, 24, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_m_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vshrq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshrq_m(inactive, a, 2, p);
+#else /* POLYMORPHIC */
+ return vshrq_m_n_s8(inactive, a, 2, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_m_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 3, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vshrq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshrq_m(inactive, a, 3, p);
+#else /* POLYMORPHIC */
+ return vshrq_m_n_s16(inactive, a, 3, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_m_n_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 13, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vshrq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshrq_m(inactive, a, 13, p);
+#else /* POLYMORPHIC */
+ return vshrq_m_n_s32(inactive, a, 13, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_m_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vshrq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshrq_m(inactive, a, 4, p);
+#else /* POLYMORPHIC */
+ return vshrq_m_n_u8(inactive, a, 4, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_m_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 14, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vshrq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshrq_m(inactive, a, 14, p);
+#else /* POLYMORPHIC */
+ return vshrq_m_n_u16(inactive, a, 14, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_m_n_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 21, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vshrq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshrq_m(inactive, a, 21, p);
+#else /* POLYMORPHIC */
+ return vshrq_m_n_u32(inactive, a, 21, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_x_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vshlq_x_n_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlq_x_n(a, 1, p);
+#else /* POLYMORPHIC */
+ return vshlq_x_n_s8(a, 1, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_x_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 15, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vshlq_x_n_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlq_x_n(a, 15, p);
+#else /* POLYMORPHIC */
+ return vshlq_x_n_s16(a, 15, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_x_n_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 13, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vshlq_x_n_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlq_x_n(a, 13, p);
+#else /* POLYMORPHIC */
+ return vshlq_x_n_s32(a, 13, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_x_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vshlq_x_n_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlq_x_n(a, 4, p);
+#else /* POLYMORPHIC */
+ return vshlq_x_n_u8(a, 4, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_x_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vshlq_x_n_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlq_x_n(a, 10, p);
+#else /* POLYMORPHIC */
+ return vshlq_x_n_u16(a, 10, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlq_x_n_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 30, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vshlq_x_n_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlq_x_n(a, 30, p);
+#else /* POLYMORPHIC */
+ return vshlq_x_n_u32(a, 30, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_x_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 0, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vshrq_x_n_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshrq_x(a, 4, p);
+#else /* POLYMORPHIC */
+ return vshrq_x_n_s8(a, 4, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_x_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vshrq_x_n_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshrq_x(a, 10, p);
+#else /* POLYMORPHIC */
+ return vshrq_x_n_s16(a, 10, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_x_n_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 7, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vshrq_x_n_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshrq_x(a, 7, p);
+#else /* POLYMORPHIC */
+ return vshrq_x_n_s32(a, 7, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_x_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 7, i32 1, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vshrq_x_n_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshrq_x(a, 7, p);
+#else /* POLYMORPHIC */
+ return vshrq_x_n_u8(a, 7, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_x_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 7, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vshrq_x_n_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshrq_x(a, 7, p);
+#else /* POLYMORPHIC */
+ return vshrq_x_n_u16(a, 7, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshrq_x_n_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 6, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vshrq_x_n_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshrq_x(a, 6, p);
+#else /* POLYMORPHIC */
+ return vshrq_x_n_u32(a, 6, p);
+#endif /* POLYMORPHIC */
+}
diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp
index 422188a5f3d..37bf3220182 100644
--- a/clang/utils/TableGen/MveEmitter.cpp
+++ b/clang/utils/TableGen/MveEmitter.cpp
@@ -469,6 +469,10 @@ public:
virtual void genCode(raw_ostream &OS, CodeGenParamAllocator &) const = 0;
virtual bool hasIntegerConstantValue() const { return false; }
virtual uint32_t integerConstantValue() const { return 0; }
+ virtual bool hasIntegerValue() const { return false; }
+ virtual std::string getIntegerValue(const std::string &) {
+ llvm_unreachable("non-working Result::getIntegerValue called");
+ }
virtual std::string typeName() const { return "Value *"; }
// Mostly, when a code-generation operation has a dependency on prior
@@ -543,8 +547,9 @@ class BuiltinArgResult : public Result {
public:
unsigned ArgNum;
bool AddressType;
- BuiltinArgResult(unsigned ArgNum, bool AddressType)
- : ArgNum(ArgNum), AddressType(AddressType) {}
+ bool Immediate;
+ BuiltinArgResult(unsigned ArgNum, bool AddressType, bool Immediate)
+ : ArgNum(ArgNum), AddressType(AddressType), Immediate(Immediate) {}
void genCode(raw_ostream &OS, CodeGenParamAllocator &) const override {
OS << (AddressType ? "EmitPointerWithAlignment" : "EmitScalarExpr")
<< "(E->getArg(" << ArgNum << "))";
@@ -558,6 +563,11 @@ public:
return "(" + varname() + ".getPointer())";
return Result::asValue();
}
+ bool hasIntegerValue() const override { return Immediate; }
+ virtual std::string getIntegerValue(const std::string &IntType) {
+ return "GetIntegerConstantValue<" + IntType + ">(E->getArg(" +
+ utostr(ArgNum) + "), getContext())";
+ }
};
// Result subclass for an integer literal appearing in Tablegen. This may need
@@ -632,27 +642,34 @@ public:
StringRef CallPrefix;
std::vector<Ptr> Args;
std::set<unsigned> AddressArgs;
- std::map<unsigned, std::string> IntConstantArgs;
+ std::map<unsigned, std::string> IntegerArgs;
IRBuilderResult(StringRef CallPrefix, std::vector<Ptr> Args,
std::set<unsigned> AddressArgs,
- std::map<unsigned, std::string> IntConstantArgs)
- : CallPrefix(CallPrefix), Args(Args), AddressArgs(AddressArgs),
- IntConstantArgs(IntConstantArgs) {}
+ std::map<unsigned, std::string> IntegerArgs)
+ : CallPrefix(CallPrefix), Args(Args), AddressArgs(AddressArgs),
+ IntegerArgs(IntegerArgs) {}
void genCode(raw_ostream &OS,
CodeGenParamAllocator &ParamAlloc) const override {
OS << CallPrefix;
const char *Sep = "";
for (unsigned i = 0, e = Args.size(); i < e; ++i) {
Ptr Arg = Args[i];
- auto it = IntConstantArgs.find(i);
- if (it != IntConstantArgs.end()) {
- assert(Arg->hasIntegerConstantValue());
- OS << Sep << "static_cast<" << it->second << ">("
- << ParamAlloc.allocParam("unsigned",
- utostr(Arg->integerConstantValue()))
- << ")";
+ auto it = IntegerArgs.find(i);
+
+ OS << Sep;
+ Sep = ", ";
+
+ if (it != IntegerArgs.end()) {
+ if (Arg->hasIntegerConstantValue())
+ OS << "static_cast<" << it->second << ">("
+ << ParamAlloc.allocParam(it->second,
+ utostr(Arg->integerConstantValue()))
+ << ")";
+ else if (Arg->hasIntegerValue())
+ OS << ParamAlloc.allocParam(it->second,
+ Arg->getIntegerValue(it->second));
} else {
- OS << Sep << Arg->varname();
+ OS << Arg->varname();
}
Sep = ", ";
}
@@ -661,7 +678,8 @@ public:
void morePrerequisites(std::vector<Ptr> &output) const override {
for (unsigned i = 0, e = Args.size(); i < e; ++i) {
Ptr Arg = Args[i];
- if (IntConstantArgs.find(i) != IntConstantArgs.end())
+ if (IntegerArgs.find(i) != IntegerArgs.end() &&
+ Arg->hasIntegerConstantValue())
continue;
output.push_back(Arg);
}
@@ -980,8 +998,8 @@ public:
const Type *Param);
Result::Ptr getCodeForDagArg(DagInit *D, unsigned ArgNum,
const Result::Scope &Scope, const Type *Param);
- Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType,
- bool Promote);
+ Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType, bool Promote,
+ bool Immediate);
// Constructor and top-level functions.
@@ -1144,17 +1162,17 @@ Result::Ptr MveEmitter::getCodeForDag(DagInit *D, const Result::Scope &Scope,
Args.push_back(getCodeForDagArg(D, i, Scope, Param));
if (Op->isSubClassOf("IRBuilderBase")) {
std::set<unsigned> AddressArgs;
- std::map<unsigned, std::string> IntConstantArgs;
+ std::map<unsigned, std::string> IntegerArgs;
for (Record *sp : Op->getValueAsListOfDefs("special_params")) {
unsigned Index = sp->getValueAsInt("index");
if (sp->isSubClassOf("IRBuilderAddrParam")) {
AddressArgs.insert(Index);
} else if (sp->isSubClassOf("IRBuilderIntParam")) {
- IntConstantArgs[Index] = sp->getValueAsString("type");
+ IntegerArgs[Index] = sp->getValueAsString("type");
}
}
- return std::make_shared<IRBuilderResult>(
- Op->getValueAsString("prefix"), Args, AddressArgs, IntConstantArgs);
+ return std::make_shared<IRBuilderResult>(Op->getValueAsString("prefix"),
+ Args, AddressArgs, IntegerArgs);
} else if (Op->isSubClassOf("IRIntBase")) {
std::vector<const Type *> ParamTypes;
for (Record *RParam : Op->getValueAsListOfDefs("params"))
@@ -1204,9 +1222,9 @@ Result::Ptr MveEmitter::getCodeForDagArg(DagInit *D, unsigned ArgNum,
}
Result::Ptr MveEmitter::getCodeForArg(unsigned ArgNum, const Type *ArgType,
- bool Promote) {
- Result::Ptr V =
- std::make_shared<BuiltinArgResult>(ArgNum, isa<PointerType>(ArgType));
+ bool Promote, bool Immediate) {
+ Result::Ptr V = std::make_shared<BuiltinArgResult>(
+ ArgNum, isa<PointerType>(ArgType), Immediate);
if (Promote) {
if (const auto *ST = dyn_cast<ScalarType>(ArgType)) {
@@ -1279,17 +1297,14 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
const Type *ArgType = ME.getType(TypeInit, Param);
ArgTypes.push_back(ArgType);
- // The argument will usually have a name in the arguments dag, which goes
- // into the variable-name scope that the code gen will refer to.
- StringRef ArgName = ArgsDag->getArgNameStr(i);
- if (!ArgName.empty())
- Scope[ArgName] = ME.getCodeForArg(i, ArgType, Promote);
-
// If the argument is a subclass of Immediate, record the details about
// what values it can take, for Sema checking.
+ bool Immediate = false;
if (auto TypeDI = dyn_cast<DefInit>(TypeInit)) {
Record *TypeRec = TypeDI->getDef();
if (TypeRec->isSubClassOf("Immediate")) {
+ Immediate = true;
+
Record *Bounds = TypeRec->getValueAsDef("bounds");
ImmediateArg &IA = ImmediateArgs[i];
if (Bounds->isSubClassOf("IB_ConstRange")) {
@@ -1303,7 +1318,7 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
IA.i1 = 0;
IA.i2 = 128 / Param->sizeInBits() - 1;
- } else if (Bounds->getName() == "IB_EltBit") {
+ } else if (Bounds->isSubClassOf("IB_EltBit")) {
IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
IA.i1 = Bounds->getValueAsInt("base");
IA.i2 = IA.i1 + Param->sizeInBits() - 1;
@@ -1320,6 +1335,12 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
}
}
}
+
+ // The argument will usually have a name in the arguments dag, which goes
+ // into the variable-name scope that the code gen will refer to.
+ StringRef ArgName = ArgsDag->getArgNameStr(i);
+ if (!ArgName.empty())
+ Scope[ArgName] = ME.getCodeForArg(i, ArgType, Promote, Immediate);
}
// Finally, go through the codegen dag and translate it into a Result object
OpenPOWER on IntegriCloud