summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/include/clang/Basic/arm_mve.td41
-rw-r--r--clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c915
-rw-r--r--llvm/include/llvm/IR/IntrinsicsARM.td21
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp13
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td10
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td232
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll1078
7 files changed, 2236 insertions, 74 deletions
diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index 8bb567c573a..9d9c067ade1 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -606,6 +606,47 @@ let params = T.Int in {
}
}
+let params = T.Int in {
+ def vqshlq_n: Intrinsic<Vector, (args Vector:$v, imm_0toNm1:$sh),
+ (IRInt<"vqshl_imm", [Vector]> $v, $sh, (unsignedflag Scalar))>;
+ def vqshlq_m_n: Intrinsic<Vector, (args Vector:$inactive, Vector:$v,
+ imm_0toNm1:$sh, Predicate:$pred),
+ (IRInt<"vqshl_imm_predicated", [Vector, Predicate]>
+ $v, $sh, (unsignedflag Scalar), $pred, $inactive)>;
+
+ let pnt = PNT_NType in {
+ def vrshrq_n: Intrinsic<Vector, (args Vector:$v, imm_1toN:$sh),
+ (IRInt<"vrshr_imm", [Vector]> $v, $sh, (unsignedflag Scalar))>;
+ defm vrshrq: IntrinsicMX<Vector, (args Vector:$v, imm_1toN:$sh,
+ Predicate:$pred),
+ (IRInt<"vrshr_imm_predicated", [Vector, Predicate]>
+ $v, $sh, (unsignedflag Scalar), $pred, $inactive), "_n">;
+ }
+}
+
+let params = T.Signed, pnt = PNT_NType in {
+ def vqshluq_n: Intrinsic<UVector, (args Vector:$v, imm_0toNm1:$sh),
+ (IRInt<"vqshlu_imm", [Vector]> $v, $sh)>;
+ def vqshluq_m_n: Intrinsic<UVector, (args UVector:$inactive, Vector:$v,
+ imm_0toNm1:$sh, Predicate:$pred),
+ (IRInt<"vqshlu_imm_predicated", [Vector, Predicate]>
+ $v, $sh, $pred, $inactive)>;
+}
+
+multiclass vshll_imm<int top> {
+ let params = !listconcat(T.Int8, T.Int16), pnt = PNT_NType in {
+ def _n: Intrinsic<DblVector, (args Vector:$v, imm_1toN:$sh),
+ (IRInt<"vshll_imm", [DblVector, Vector]>
+ $v, $sh, (unsignedflag Scalar), top)>;
+ defm "": IntrinsicMX<DblVector, (args Vector:$v, imm_1toN:$sh,
+ Predicate:$pred),
+ (IRInt<"vshll_imm_predicated", [DblVector, Vector, Predicate]>
+ $v, $sh, (unsignedflag Scalar), top, $pred, $inactive), "_n">;
+ }
+}
+defm vshllbq : vshll_imm<0>;
+defm vshlltq : vshll_imm<1>;
+
// Base class for the scalar shift intrinsics.
class ScalarShift<Type argtype, dag shiftCountArg, dag shiftCodeGen>:
Intrinsic<argtype, !con((args argtype:$value), shiftCountArg), shiftCodeGen> {
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
index 200273c0365..2128d0801c6 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
@@ -720,3 +720,918 @@ uint32x4_t test_vshrq_x_n_u32(uint32x4_t a, mve_pred16_t p)
return vshrq_x_n_u32(a, 6, p);
#endif /* POLYMORPHIC */
}
+
+// CHECK-LABEL: @test_vqshlq_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> [[A:%.*]], i32 3, i32 0)
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vqshlq_n_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vqshlq_n(a, 3);
+#else /* POLYMORPHIC */
+ return vqshlq_n_s8(a, 3);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshlq_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> [[A:%.*]], i32 4, i32 0)
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vqshlq_n_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vqshlq_n(a, 4);
+#else /* POLYMORPHIC */
+ return vqshlq_n_s16(a, 4);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshlq_n_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> [[A:%.*]], i32 4, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vqshlq_n_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+ return vqshlq_n(a, 4);
+#else /* POLYMORPHIC */
+ return vqshlq_n_s32(a, 4);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshlq_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> [[A:%.*]], i32 0, i32 1)
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vqshlq_n_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vqshlq_n(a, 0);
+#else /* POLYMORPHIC */
+ return vqshlq_n_u8(a, 0);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshlq_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> [[A:%.*]], i32 13, i32 1)
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vqshlq_n_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vqshlq_n(a, 13);
+#else /* POLYMORPHIC */
+ return vqshlq_n_u16(a, 13);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshlq_n_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> [[A:%.*]], i32 6, i32 1)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vqshlq_n_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+ return vqshlq_n(a, 6);
+#else /* POLYMORPHIC */
+ return vqshlq_n_u32(a, 6);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshluq_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqshlu.imm.v16i8(<16 x i8> [[A:%.*]], i32 5)
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vqshluq_n_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vqshluq(a, 5);
+#else /* POLYMORPHIC */
+ return vqshluq_n_s8(a, 5);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshluq_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqshlu.imm.v8i16(<8 x i16> [[A:%.*]], i32 5)
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vqshluq_n_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vqshluq(a, 5);
+#else /* POLYMORPHIC */
+ return vqshluq_n_s16(a, 5);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshluq_n_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqshlu.imm.v4i32(<4 x i32> [[A:%.*]], i32 4)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vqshluq_n_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+ return vqshluq(a, 4);
+#else /* POLYMORPHIC */
+ return vqshluq_n_s32(a, 4);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8> [[A:%.*]], i32 4, i32 0)
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vrshrq_n_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vrshrq(a, 4);
+#else /* POLYMORPHIC */
+ return vrshrq_n_s8(a, 4);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16> [[A:%.*]], i32 12, i32 0)
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vrshrq_n_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vrshrq(a, 12);
+#else /* POLYMORPHIC */
+ return vrshrq_n_s16(a, 12);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_n_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32> [[A:%.*]], i32 30, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vrshrq_n_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+ return vrshrq(a, 30);
+#else /* POLYMORPHIC */
+ return vrshrq_n_s32(a, 30);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8> [[A:%.*]], i32 1, i32 1)
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vrshrq_n_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vrshrq(a, 1);
+#else /* POLYMORPHIC */
+ return vrshrq_n_u8(a, 1);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16> [[A:%.*]], i32 15, i32 1)
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vrshrq_n_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vrshrq(a, 15);
+#else /* POLYMORPHIC */
+ return vrshrq_n_u16(a, 15);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_n_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32> [[A:%.*]], i32 20, i32 1)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vrshrq_n_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+ return vrshrq(a, 20);
+#else /* POLYMORPHIC */
+ return vrshrq_n_u32(a, 20);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshlq_m_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 6, i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vqshlq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vqshlq_m_n(inactive, a, 6, p);
+#else /* POLYMORPHIC */
+ return vqshlq_m_n_s8(inactive, a, 6, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshlq_m_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 13, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vqshlq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vqshlq_m_n(inactive, a, 13, p);
+#else /* POLYMORPHIC */
+ return vqshlq_m_n_s16(inactive, a, 13, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshlq_m_n_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 14, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vqshlq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vqshlq_m_n(inactive, a, 14, p);
+#else /* POLYMORPHIC */
+ return vqshlq_m_n_s32(inactive, a, 14, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshlq_m_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vqshlq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vqshlq_m_n(inactive, a, 4, p);
+#else /* POLYMORPHIC */
+ return vqshlq_m_n_u8(inactive, a, 4, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshlq_m_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 9, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vqshlq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vqshlq_m_n(inactive, a, 9, p);
+#else /* POLYMORPHIC */
+ return vqshlq_m_n_u16(inactive, a, 9, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshlq_m_n_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 25, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vqshlq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vqshlq_m_n(inactive, a, 25, p);
+#else /* POLYMORPHIC */
+ return vqshlq_m_n_u32(inactive, a, 25, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshluq_m_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vqshlu.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vqshluq_m_n_s8(uint8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vqshluq_m(inactive, a, 2, p);
+#else /* POLYMORPHIC */
+ return vqshluq_m_n_s8(inactive, a, 2, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshluq_m_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vqshlu.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 12, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vqshluq_m_n_s16(uint16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vqshluq_m(inactive, a, 12, p);
+#else /* POLYMORPHIC */
+ return vqshluq_m_n_s16(inactive, a, 12, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshluq_m_n_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqshlu.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 24, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vqshluq_m_n_s32(uint32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vqshluq_m(inactive, a, 24, p);
+#else /* POLYMORPHIC */
+ return vqshluq_m_n_s32(inactive, a, 24, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_m_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrshrq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrshrq_m(inactive, a, 2, p);
+#else /* POLYMORPHIC */
+ return vrshrq_m_n_s8(inactive, a, 2, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_m_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 11, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vrshrq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrshrq_m(inactive, a, 11, p);
+#else /* POLYMORPHIC */
+ return vrshrq_m_n_s16(inactive, a, 11, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_m_n_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 24, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vrshrq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrshrq_m(inactive, a, 24, p);
+#else /* POLYMORPHIC */
+ return vrshrq_m_n_s32(inactive, a, 24, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_m_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 7, i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrshrq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrshrq_m(inactive, a, 7, p);
+#else /* POLYMORPHIC */
+ return vrshrq_m_n_u8(inactive, a, 7, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_m_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 4, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vrshrq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrshrq_m(inactive, a, 4, p);
+#else /* POLYMORPHIC */
+ return vrshrq_m_n_u16(inactive, a, 4, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_m_n_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 27, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vrshrq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrshrq_m(inactive, a, 27, p);
+#else /* POLYMORPHIC */
+ return vrshrq_m_n_u32(inactive, a, 27, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_x_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 3, i32 0, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrshrq_x_n_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrshrq_x(a, 3, p);
+#else /* POLYMORPHIC */
+ return vrshrq_x_n_s8(a, 3, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_x_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 12, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vrshrq_x_n_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrshrq_x(a, 12, p);
+#else /* POLYMORPHIC */
+ return vrshrq_x_n_s16(a, 12, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_x_n_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 20, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vrshrq_x_n_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrshrq_x(a, 20, p);
+#else /* POLYMORPHIC */
+ return vrshrq_x_n_s32(a, 20, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_x_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 1, i32 1, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrshrq_x_n_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrshrq_x(a, 1, p);
+#else /* POLYMORPHIC */
+ return vrshrq_x_n_u8(a, 1, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_x_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 13, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vrshrq_x_n_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrshrq_x(a, 13, p);
+#else /* POLYMORPHIC */
+ return vrshrq_x_n_u16(a, 13, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrshrq_x_n_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 6, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vrshrq_x_n_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrshrq_x(a, 6, p);
+#else /* POLYMORPHIC */
+ return vrshrq_x_n_u32(a, 6, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshllbq_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> [[A:%.*]], i32 2, i32 0, i32 0)
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vshllbq_n_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vshllbq(a, 2);
+#else /* POLYMORPHIC */
+ return vshllbq_n_s8(a, 2);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshllbq_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> [[A:%.*]], i32 13, i32 0, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vshllbq_n_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vshllbq(a, 13);
+#else /* POLYMORPHIC */
+ return vshllbq_n_s16(a, 13);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshllbq_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> [[A:%.*]], i32 5, i32 1, i32 0)
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vshllbq_n_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vshllbq(a, 5);
+#else /* POLYMORPHIC */
+ return vshllbq_n_u8(a, 5);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshllbq_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> [[A:%.*]], i32 6, i32 1, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vshllbq_n_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vshllbq(a, 6);
+#else /* POLYMORPHIC */
+ return vshllbq_n_u16(a, 6);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlltq_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> [[A:%.*]], i32 7, i32 0, i32 1)
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vshlltq_n_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlltq(a, 7);
+#else /* POLYMORPHIC */
+ return vshlltq_n_s8(a, 7);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlltq_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> [[A:%.*]], i32 2, i32 0, i32 1)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vshlltq_n_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlltq(a, 2);
+#else /* POLYMORPHIC */
+ return vshlltq_n_s16(a, 2);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlltq_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> [[A:%.*]], i32 7, i32 1, i32 1)
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vshlltq_n_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlltq(a, 7);
+#else /* POLYMORPHIC */
+ return vshlltq_n_u8(a, 7);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlltq_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> [[A:%.*]], i32 14, i32 1, i32 1)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vshlltq_n_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+ return vshlltq(a, 14);
+#else /* POLYMORPHIC */
+ return vshlltq_n_u16(a, 14);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshllbq_m_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 6, i32 0, i32 0, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vshllbq_m_n_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshllbq_m(inactive, a, 6, p);
+#else /* POLYMORPHIC */
+ return vshllbq_m_n_s8(inactive, a, 6, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshllbq_m_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, i32 0, i32 0, <8 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vshllbq_m_n_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshllbq_m(inactive, a, 10, p);
+#else /* POLYMORPHIC */
+ return vshllbq_m_n_s16(inactive, a, 10, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshllbq_m_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 3, i32 1, i32 0, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vshllbq_m_n_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshllbq_m(inactive, a, 3, p);
+#else /* POLYMORPHIC */
+ return vshllbq_m_n_u8(inactive, a, 3, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshllbq_m_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 14, i32 1, i32 0, <8 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vshllbq_m_n_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshllbq_m(inactive, a, 14, p);
+#else /* POLYMORPHIC */
+ return vshllbq_m_n_u16(inactive, a, 14, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlltq_m_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 0, i32 1, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vshlltq_m_n_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlltq_m(inactive, a, 4, p);
+#else /* POLYMORPHIC */
+ return vshlltq_m_n_s8(inactive, a, 4, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlltq_m_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 12, i32 0, i32 1, <8 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vshlltq_m_n_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlltq_m(inactive, a, 12, p);
+#else /* POLYMORPHIC */
+ return vshlltq_m_n_s16(inactive, a, 12, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlltq_m_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, i32 1, i32 1, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vshlltq_m_n_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlltq_m(inactive, a, 2, p);
+#else /* POLYMORPHIC */
+ return vshlltq_m_n_u8(inactive, a, 2, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlltq_m_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 9, i32 1, i32 1, <8 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vshlltq_m_n_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlltq_m(inactive, a, 9, p);
+#else /* POLYMORPHIC */
+ return vshlltq_m_n_u16(inactive, a, 9, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshllbq_x_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 1, i32 0, i32 0, <16 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vshllbq_x_n_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshllbq_x(a, 1, p);
+#else /* POLYMORPHIC */
+ return vshllbq_x_n_s8(a, 1, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshllbq_x_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, i32 0, i32 0, <8 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vshllbq_x_n_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshllbq_x(a, 10, p);
+#else /* POLYMORPHIC */
+ return vshllbq_x_n_s16(a, 10, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshllbq_x_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 6, i32 1, i32 0, <16 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vshllbq_x_n_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshllbq_x(a, 6, p);
+#else /* POLYMORPHIC */
+ return vshllbq_x_n_u8(a, 6, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshllbq_x_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, i32 1, i32 0, <8 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vshllbq_x_n_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshllbq_x(a, 10, p);
+#else /* POLYMORPHIC */
+ return vshllbq_x_n_u16(a, 10, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlltq_x_n_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, i32 0, i32 1, <16 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vshlltq_x_n_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlltq_x(a, 2, p);
+#else /* POLYMORPHIC */
+ return vshlltq_x_n_s8(a, 2, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlltq_x_n_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 6, i32 0, i32 1, <8 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vshlltq_x_n_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlltq_x(a, 6, p);
+#else /* POLYMORPHIC */
+ return vshlltq_x_n_s16(a, 6, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlltq_x_n_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 5, i32 1, i32 1, <16 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vshlltq_x_n_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlltq_x(a, 5, p);
+#else /* POLYMORPHIC */
+ return vshlltq_x_n_u8(a, 5, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlltq_x_n_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 3, i32 1, i32 1, <8 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vshlltq_x_n_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vshlltq_x(a, 3, p);
+#else /* POLYMORPHIC */
+ return vshlltq_x_n_u16(a, 3, p);
+#endif /* POLYMORPHIC */
+}
+
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index 8995f969e2e..a10620612e2 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -876,10 +876,19 @@ defm int_arm_mve_maxv: IntrinsicSignSuffix<[llvm_i32_ty],
[llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
multiclass MVEPredicated<list<LLVMType> rets, list<LLVMType> params,
- LLVMType pred, list<IntrinsicProperty> props = []> {
+ LLVMType pred = llvm_anyvector_ty,
+ list<IntrinsicProperty> props = []> {
def "": Intrinsic<rets, params, props>;
def _predicated: Intrinsic<rets, params # [pred], props>;
}
+multiclass MVEPredicatedM<list<LLVMType> rets, list<LLVMType> params,
+ LLVMType pred = llvm_anyvector_ty,
+ list<IntrinsicProperty> props = [IntrNoMem]> {
+ def "": Intrinsic<rets, params, props>;
+ def _predicated: Intrinsic<rets, params # [pred,
+ !if(!eq(!cast<string>(rets[0]), "llvm_anyvector_ty"),
+ LLVMMatchType<0>, rets[0])], props>;
+}
defm int_arm_mve_vcvt_narrow: MVEPredicated<[llvm_v8f16_ty],
[llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], llvm_v4i1_ty, [IntrNoMem]>;
@@ -921,6 +930,16 @@ def int_arm_mve_shr_imm_predicated: Intrinsic<[llvm_anyvector_ty],
llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;
+defm int_arm_mve_vqshl_imm: MVEPredicatedM<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>;
+defm int_arm_mve_vrshr_imm: MVEPredicatedM<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>;
+defm int_arm_mve_vqshlu_imm: MVEPredicatedM<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/]>;
+defm int_arm_mve_vshll_imm: MVEPredicatedM<[llvm_anyvector_ty],
+ [llvm_anyvector_ty, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/,
+ llvm_i32_ty /*top-half*/]>;
+
// MVE scalar shifts.
class ARM_MVE_qrshift_single<list<LLVMType> value,
list<LLVMType> saturate = []> :
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index cf3fd62e493..6dd56b35d0a 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -159,6 +159,9 @@ public:
SDValue &OffReg, SDValue &ShImm);
bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
+ template<int Min, int Max>
+ bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
+
inline bool is_so_imm(unsigned Imm) const {
return ARM_AM::getSOImmVal(Imm) != -1;
}
@@ -1383,6 +1386,16 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
return false;
}
+template <int Min, int Max>
+bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
+ int Val;
+ if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
+ OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
+ return true;
+ }
+ return false;
+}
+
bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
SDValue &Base,
SDValue &OffReg, SDValue &ShImm) {
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 155e0efff1a..1cab1747ff4 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -935,7 +935,10 @@ def MVEShiftImm1_7AsmOperand: ImmAsmOperand<1,7> {
// encodings allow.
let DiagnosticString = "operand must be an immediate in the range [1,8]";
}
-def mve_shift_imm1_7 : Operand<i32> {
+def mve_shift_imm1_7 : Operand<i32>,
+ // SelectImmediateInRange / isScaledConstantInRange uses a
+ // half-open interval, so the parameters <1,8> mean 1-7 inclusive
+ ComplexPattern<i32, 1, "SelectImmediateInRange<1,8>", [], []> {
let ParserMatchClass = MVEShiftImm1_7AsmOperand;
let EncoderMethod = "getMVEShiftImmOpValue";
}
@@ -948,7 +951,10 @@ def MVEShiftImm1_15AsmOperand: ImmAsmOperand<1,15> {
// encodings allow.
let DiagnosticString = "operand must be an immediate in the range [1,16]";
}
-def mve_shift_imm1_15 : Operand<i32> {
+def mve_shift_imm1_15 : Operand<i32>,
+ // SelectImmediateInRange / isScaledConstantInRange uses a
+ // half-open interval, so the parameters <1,16> mean 1-15 inclusive
+ ComplexPattern<i32, 1, "SelectImmediateInRange<1,16>", [], []> {
let ParserMatchClass = MVEShiftImm1_15AsmOperand;
let EncoderMethod = "getMVEShiftImmOpValue";
}
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index a1a8614e4a7..21f0d5e8679 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -2315,8 +2315,8 @@ let Predicates = [HasMVEInt] in {
class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
- dag immops, list<dag> pattern=[]>
- : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$Qm), immops),
+ Operand immtype, list<dag> pattern=[]>
+ : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm, immtype:$imm),
iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> {
let Inst{28} = U;
let Inst{25-23} = 0b101;
@@ -2325,6 +2325,9 @@ class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
let Inst{11-6} = 0b111101;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+
+ // For the MVE_VSHLL_patterns multiclass to refer to
+ Operand immediateType = immtype;
}
// The immediate VSHLL instructions accept shift counts from 1 up to
@@ -2333,7 +2336,7 @@ class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
class MVE_VSHLL_imm8<string iname, string suffix,
bit U, bit th, list<dag> pattern=[]>
- : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_7:$imm), pattern> {
+ : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_7, pattern> {
bits<3> imm;
let Inst{20-19} = 0b01;
let Inst{18-16} = imm;
@@ -2341,7 +2344,7 @@ class MVE_VSHLL_imm8<string iname, string suffix,
class MVE_VSHLL_imm16<string iname, string suffix,
bit U, bit th, list<dag> pattern=[]>
- : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_15:$imm), pattern> {
+ : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_15, pattern> {
bits<4> imm;
let Inst{20} = 0b1;
let Inst{19-16} = imm;
@@ -2385,6 +2388,45 @@ defm MVE_VSHLL_lws16 : MVE_VSHLL_lw<"vshll", "s16", 0b01, 0b0, "$Qd, $Qm, #16">;
defm MVE_VSHLL_lwu8 : MVE_VSHLL_lw<"vshll", "u8", 0b00, 0b1, "$Qd, $Qm, #8">;
defm MVE_VSHLL_lwu16 : MVE_VSHLL_lw<"vshll", "u16", 0b01, 0b1, "$Qd, $Qm, #16">;
+multiclass MVE_VSHLL_patterns<MVEVectorVTInfo VTI, int top> {
+ // A succession of local variable definitions, via singleton
+ // foreach, to make the actual patterns legible
+ foreach suffix = [!strconcat(VTI.Suffix, !if(top, "th", "bh"))] in
+ foreach inst_imm = [!cast<MVE_VSHLL_imm>("MVE_VSHLL_imm" # suffix)] in
+ foreach inst_lw = [!cast<MVE_VSHLL_by_lane_width>("MVE_VSHLL_lw" # suffix)] in
+ foreach unpred_int = [int_arm_mve_vshll_imm] in
+ foreach pred_int = [int_arm_mve_vshll_imm_predicated] in
+ foreach imm = [inst_imm.immediateType] in {
+
+ def : Pat<(VTI.DblVec (unpred_int (VTI.Vec MQPR:$src), imm:$imm,
+ (i32 VTI.Unsigned), (i32 top))),
+ (VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm))>;
+ def : Pat<(VTI.DblVec (unpred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits),
+ (i32 VTI.Unsigned), (i32 top))),
+ (VTI.DblVec (inst_lw (VTI.Vec MQPR:$src)))>;
+
+ def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), imm:$imm,
+ (i32 VTI.Unsigned), (i32 top),
+ (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive))),
+ (VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm,
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive)))>;
+ def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits),
+ (i32 VTI.Unsigned), (i32 top),
+ (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive))),
+ (VTI.DblVec (inst_lw (VTI.Vec MQPR:$src), ARMVCCThen,
+ (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive)))>;
+
+ }
+}
+
+foreach VTI = [MVE_v16s8, MVE_v8s16, MVE_v16u8, MVE_v8u16] in
+ foreach top = [0, 1] in
+ defm : MVE_VSHLL_patterns<VTI, top>;
+
class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
dag immops, list<dag> pattern=[]>
: MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
@@ -2606,6 +2648,13 @@ class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b0;
let validForTailPredication = 1;
+
+ // For the MVE_shift_imm_patterns multiclass to refer to
+ MVEVectorVTInfo VTI;
+ Operand immediateType;
+ Intrinsic unpred_int;
+ Intrinsic pred_int;
+ dag unsignedFlag = (?);
}
class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm>
@@ -2645,50 +2694,49 @@ def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,(ins imm0_31:$imm)> {
let Inst{21} = 0b1;
}
-class MVE_VQSHL_imm<string suffix, dag imm>
- : MVE_shift_with_imm<"vqshl", suffix, (outs MQPR:$Qd),
- !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+class MVE_VQSHL_imm<MVEVectorVTInfo VTI_, Operand immType>
+ : MVE_shift_with_imm<"vqshl", VTI_.Suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
vpred_r, ""> {
bits<6> imm;
+ let Inst{28} = VTI_.Unsigned;
let Inst{25-24} = 0b11;
let Inst{21-16} = imm;
let Inst{10-8} = 0b111;
-}
-
-def MVE_VQSHLimms8 : MVE_VQSHL_imm<"s8", (ins imm0_7:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21-19} = 0b001;
-}
-
-def MVE_VQSHLimmu8 : MVE_VQSHL_imm<"u8", (ins imm0_7:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21-19} = 0b001;
-}
-
-def MVE_VQSHLimms16 : MVE_VQSHL_imm<"s16", (ins imm0_15:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21-20} = 0b01;
-}
-def MVE_VQSHLimmu16 : MVE_VQSHL_imm<"u16", (ins imm0_15:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21-20} = 0b01;
-}
-
-def MVE_VQSHLimms32 : MVE_VQSHL_imm<"s32", (ins imm0_31:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21} = 0b1;
+ let VTI = VTI_;
+ let immediateType = immType;
+ let unsignedFlag = (? (i32 VTI.Unsigned));
}
-def MVE_VQSHLimmu32 : MVE_VQSHL_imm<"u32", (ins imm0_31:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21} = 0b1;
+let unpred_int = int_arm_mve_vqshl_imm,
+ pred_int = int_arm_mve_vqshl_imm_predicated in {
+ def MVE_VQSHLimms8 : MVE_VQSHL_imm<MVE_v16s8, imm0_7> {
+ let Inst{21-19} = 0b001;
+ }
+ def MVE_VQSHLimmu8 : MVE_VQSHL_imm<MVE_v16u8, imm0_7> {
+ let Inst{21-19} = 0b001;
+ }
+
+ def MVE_VQSHLimms16 : MVE_VQSHL_imm<MVE_v8s16, imm0_15> {
+ let Inst{21-20} = 0b01;
+ }
+ def MVE_VQSHLimmu16 : MVE_VQSHL_imm<MVE_v8u16, imm0_15> {
+ let Inst{21-20} = 0b01;
+ }
+
+ def MVE_VQSHLimms32 : MVE_VQSHL_imm<MVE_v4s32, imm0_31> {
+ let Inst{21} = 0b1;
+ }
+ def MVE_VQSHLimmu32 : MVE_VQSHL_imm<MVE_v4u32, imm0_31> {
+ let Inst{21} = 0b1;
+ }
}
-class MVE_VQSHLU_imm<string suffix, dag imm>
- : MVE_shift_with_imm<"vqshlu", suffix, (outs MQPR:$Qd),
- !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+class MVE_VQSHLU_imm<MVEVectorVTInfo VTI_, Operand immType>
+ : MVE_shift_with_imm<"vqshlu", VTI_.Suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
vpred_r, ""> {
bits<6> imm;
@@ -2696,61 +2744,103 @@ class MVE_VQSHLU_imm<string suffix, dag imm>
let Inst{25-24} = 0b11;
let Inst{21-16} = imm;
let Inst{10-8} = 0b110;
-}
-def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<"s8", (ins imm0_7:$imm)> {
- let Inst{21-19} = 0b001;
+ let VTI = VTI_;
+ let immediateType = immType;
}
-def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<"s16", (ins imm0_15:$imm)> {
- let Inst{21-20} = 0b01;
-}
+let unpred_int = int_arm_mve_vqshlu_imm,
+ pred_int = int_arm_mve_vqshlu_imm_predicated in {
+ def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<MVE_v16s8, imm0_7> {
+ let Inst{21-19} = 0b001;
+ }
-def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<"s32", (ins imm0_31:$imm)> {
- let Inst{21} = 0b1;
+ def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<MVE_v8s16, imm0_15> {
+ let Inst{21-20} = 0b01;
+ }
+
+ def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<MVE_v4s32, imm0_31> {
+ let Inst{21} = 0b1;
+ }
}
-class MVE_VRSHR_imm<string suffix, dag imm>
- : MVE_shift_with_imm<"vrshr", suffix, (outs MQPR:$Qd),
- !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+class MVE_VRSHR_imm<MVEVectorVTInfo VTI_, Operand immType>
+ : MVE_shift_with_imm<"vrshr", VTI_.Suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
vpred_r, ""> {
bits<6> imm;
+ let Inst{28} = VTI_.Unsigned;
let Inst{25-24} = 0b11;
let Inst{21-16} = imm;
let Inst{10-8} = 0b010;
-}
-def MVE_VRSHR_imms8 : MVE_VRSHR_imm<"s8", (ins shr_imm8:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21-19} = 0b001;
+ let VTI = VTI_;
+ let immediateType = immType;
+ let unsignedFlag = (? (i32 VTI.Unsigned));
}
-def MVE_VRSHR_immu8 : MVE_VRSHR_imm<"u8", (ins shr_imm8:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21-19} = 0b001;
-}
+let unpred_int = int_arm_mve_vrshr_imm,
+ pred_int = int_arm_mve_vrshr_imm_predicated in {
+ def MVE_VRSHR_imms8 : MVE_VRSHR_imm<MVE_v16s8, shr_imm8> {
+ let Inst{21-19} = 0b001;
+ }
-def MVE_VRSHR_imms16 : MVE_VRSHR_imm<"s16", (ins shr_imm16:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21-20} = 0b01;
-}
+ def MVE_VRSHR_immu8 : MVE_VRSHR_imm<MVE_v16u8, shr_imm8> {
+ let Inst{21-19} = 0b001;
+ }
-def MVE_VRSHR_immu16 : MVE_VRSHR_imm<"u16", (ins shr_imm16:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21-20} = 0b01;
-}
+ def MVE_VRSHR_imms16 : MVE_VRSHR_imm<MVE_v8s16, shr_imm16> {
+ let Inst{21-20} = 0b01;
+ }
-def MVE_VRSHR_imms32 : MVE_VRSHR_imm<"s32", (ins shr_imm32:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21} = 0b1;
-}
+ def MVE_VRSHR_immu16 : MVE_VRSHR_imm<MVE_v8u16, shr_imm16> {
+ let Inst{21-20} = 0b01;
+ }
-def MVE_VRSHR_immu32 : MVE_VRSHR_imm<"u32", (ins shr_imm32:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21} = 0b1;
+ def MVE_VRSHR_imms32 : MVE_VRSHR_imm<MVE_v4s32, shr_imm32> {
+ let Inst{21} = 0b1;
+ }
+
+ def MVE_VRSHR_immu32 : MVE_VRSHR_imm<MVE_v4u32, shr_imm32> {
+ let Inst{21} = 0b1;
+ }
}
+multiclass MVE_shift_imm_patterns<MVE_shift_with_imm inst> {
+ def : Pat<(inst.VTI.Vec !con((inst.unpred_int (inst.VTI.Vec MQPR:$src),
+ inst.immediateType:$imm),
+ inst.unsignedFlag)),
+ (inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src),
+ inst.immediateType:$imm))>;
+
+ def : Pat<(inst.VTI.Vec !con((inst.pred_int (inst.VTI.Vec MQPR:$src),
+ inst.immediateType:$imm),
+ inst.unsignedFlag,
+ (? (inst.VTI.Pred VCCR:$mask),
+ (inst.VTI.Vec MQPR:$inactive)))),
+ (inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src),
+ inst.immediateType:$imm,
+ ARMVCCThen, (inst.VTI.Pred VCCR:$mask),
+ (inst.VTI.Vec MQPR:$inactive)))>;
+}
+
+defm : MVE_shift_imm_patterns<MVE_VQSHLimms8>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLimmu8>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLimms16>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLimmu16>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLimms32>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLimmu32>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms8>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms16>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms32>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_imms8>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_immu8>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_imms16>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_immu16>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_imms32>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_immu32>;
+
class MVE_VSHR_imm<string suffix, dag imm>
: MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd),
!con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll
index 86228ef94b3..dd4980f99b9 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll
@@ -385,6 +385,1058 @@ entry:
ret <4 x i32> %2
}
+define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) {
+; CHECK-LABEL: test_vqshlq_n_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vqshl.s8 q0, q0, #3
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> %a, i32 3, i32 0)
+ ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) {
+; CHECK-LABEL: test_vqshlq_n_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vqshl.s16 q0, q0, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> %a, i32 4, i32 0)
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) {
+; CHECK-LABEL: test_vqshlq_n_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vqshl.s32 q0, q0, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %a, i32 4, i32 0)
+ ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) {
+; CHECK-LABEL: test_vqshlq_n_u8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vqshl.u8 q0, q0, #0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> %a, i32 0, i32 1)
+ ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) {
+; CHECK-LABEL: test_vqshlq_n_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vqshl.u16 q0, q0, #13
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> %a, i32 13, i32 1)
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) {
+; CHECK-LABEL: test_vqshlq_n_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vqshl.u32 q0, q0, #6
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %a, i32 6, i32 1)
+ ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) {
+; CHECK-LABEL: test_vqshluq_n_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vqshlu.s8 q0, q0, #5
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <16 x i8> @llvm.arm.mve.vqshlu.imm.v16i8(<16 x i8> %a, i32 5)
+ ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) {
+; CHECK-LABEL: test_vqshluq_n_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vqshlu.s16 q0, q0, #5
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x i16> @llvm.arm.mve.vqshlu.imm.v8i16(<8 x i16> %a, i32 5)
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) {
+; CHECK-LABEL: test_vqshluq_n_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vqshlu.s32 q0, q0, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x i32> @llvm.arm.mve.vqshlu.imm.v4i32(<4 x i32> %a, i32 4)
+ ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) {
+; CHECK-LABEL: test_vrshrq_n_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vrshr.s8 q0, q0, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8> %a, i32 4, i32 0)
+ ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) {
+; CHECK-LABEL: test_vrshrq_n_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vrshr.s16 q0, q0, #12
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16> %a, i32 12, i32 0)
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) {
+; CHECK-LABEL: test_vrshrq_n_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vrshr.s32 q0, q0, #30
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32> %a, i32 30, i32 0)
+ ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) {
+; CHECK-LABEL: test_vrshrq_n_u8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vrshr.u8 q0, q0, #1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8> %a, i32 1, i32 1)
+ ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) {
+; CHECK-LABEL: test_vrshrq_n_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vrshr.u16 q0, q0, #15
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16> %a, i32 15, i32 1)
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) {
+; CHECK-LABEL: test_vrshrq_n_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vrshr.u32 q0, q0, #20
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32> %a, i32 20, i32 1)
+ ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqshlq_m_n_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vqshlt.s8 q0, q1, #6
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 6, i32 0, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqshlq_m_n_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vqshlt.s16 q0, q1, #13
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, i32 0, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqshlq_m_n_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vqshlt.s32 q0, q1, #14
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 14, i32 0, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqshlq_m_n_u8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vqshlt.u8 q0, q1, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 1, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqshlq_m_n_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vqshlt.u16 q0, q1, #9
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 9, i32 1, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqshlq_m_n_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vqshlt.u32 q0, q1, #25
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 25, i32 1, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vqshluq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqshluq_m_n_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vqshlut.s8 q0, q1, #2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.vqshlu.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqshluq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqshluq_m_n_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vqshlut.s16 q0, q1, #12
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vqshlu.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 12, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqshluq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqshluq_m_n_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vqshlut.s32 q0, q1, #24
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vqshlu.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 24, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrshrq_m_n_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrshrt.s8 q0, q1, #2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrshrq_m_n_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrshrt.s16 q0, q1, #11
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 11, i32 0, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrshrq_m_n_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrshrt.s32 q0, q1, #24
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 24, i32 0, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrshrq_m_n_u8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrshrt.u8 q0, q1, #7
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 7, i32 1, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrshrq_m_n_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrshrt.u16 q0, q1, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 4, i32 1, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrshrq_m_n_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrshrt.u32 q0, q1, #27
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 27, i32 1, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrshrq_x_n_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrshrt.s8 q0, q0, #3
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 3, i32 0, <16 x i1> %1, <16 x i8> undef)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrshrq_x_n_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrshrt.s16 q0, q0, #12
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 12, i32 0, <8 x i1> %1, <8 x i16> undef)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_x_n_s32(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrshrq_x_n_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrshrt.s32 q0, q0, #20
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 20, i32 0, <4 x i1> %1, <4 x i32> undef)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrshrq_x_n_u8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrshrt.u8 q0, q0, #1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, i32 1, <16 x i1> %1, <16 x i8> undef)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrshrq_x_n_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrshrt.u16 q0, q0, #13
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, i32 1, <8 x i1> %1, <8 x i16> undef)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_x_n_u32(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrshrq_x_n_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrshrt.u32 q0, q0, #6
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 6, i32 1, <4 x i1> %1, <4 x i32> undef)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_s8(<16 x i8> %a) {
+; CHECK-LABEL: test_vshllbq_n_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllb.s8 q0, q0, #2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 2, i32 0, i32 0)
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_s8_lanewidth(<16 x i8> %a) {
+; CHECK-LABEL: test_vshllbq_n_s8_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllb.s8 q0, q0, #8
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 0, i32 0)
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_s16(<8 x i16> %a) {
+; CHECK-LABEL: test_vshllbq_n_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllb.s16 q0, q0, #13
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 13, i32 0, i32 0)
+ ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_s16_lanewidth(<8 x i16> %a) {
+; CHECK-LABEL: test_vshllbq_n_s16_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllb.s16 q0, q0, #16
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 0, i32 0)
+ ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_u8(<16 x i8> %a) {
+; CHECK-LABEL: test_vshllbq_n_u8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllb.u8 q0, q0, #5
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 5, i32 1, i32 0)
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_u8_lanewidth(<16 x i8> %a) {
+; CHECK-LABEL: test_vshllbq_n_u8_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllb.u8 q0, q0, #8
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 1, i32 0)
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_u16(<8 x i16> %a) {
+; CHECK-LABEL: test_vshllbq_n_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllb.u16 q0, q0, #6
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 6, i32 1, i32 0)
+ ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_u16_lanewidth(<8 x i16> %a) {
+; CHECK-LABEL: test_vshllbq_n_u16_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllb.u16 q0, q0, #16
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 1, i32 0)
+ ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_s8(<16 x i8> %a) {
+; CHECK-LABEL: test_vshlltq_n_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllt.s8 q0, q0, #7
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 7, i32 0, i32 1)
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_s8_lanewidth(<16 x i8> %a) {
+; CHECK-LABEL: test_vshlltq_n_s8_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllt.s8 q0, q0, #8
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 0, i32 1)
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_s16(<8 x i16> %a) {
+; CHECK-LABEL: test_vshlltq_n_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllt.s16 q0, q0, #2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 2, i32 0, i32 1)
+ ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_s16_lanewidth(<8 x i16> %a) {
+; CHECK-LABEL: test_vshlltq_n_s16_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllt.s16 q0, q0, #16
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 0, i32 1)
+ ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_u8(<16 x i8> %a) {
+; CHECK-LABEL: test_vshlltq_n_u8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllt.u8 q0, q0, #7
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 7, i32 1, i32 1)
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_u8_lanewidth(<16 x i8> %a) {
+; CHECK-LABEL: test_vshlltq_n_u8_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllt.u8 q0, q0, #8
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 1, i32 1)
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_u16(<8 x i16> %a) {
+; CHECK-LABEL: test_vshlltq_n_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllt.u16 q0, q0, #14
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 14, i32 1, i32 1)
+ ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_u16_lanewidth(<8 x i16> %a) {
+; CHECK-LABEL: test_vshlltq_n_u16_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshllt.u16 q0, q0, #16
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 1, i32 1)
+ ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_m_n_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.s8 q0, q1, #6
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 6, i32 0, i32 0, <16 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_s8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_m_n_s8_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.s8 q0, q1, #8
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 0, i32 0, <16 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_m_n_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.s16 q0, q1, #10
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 10, i32 0, i32 0, <8 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_s16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_m_n_s16_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.s16 q0, q1, #16
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 0, i32 0, <8 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_m_n_u8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.u8 q0, q1, #3
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 3, i32 1, i32 0, <16 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_u8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_m_n_u8_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.u8 q0, q1, #8
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 1, i32 0, <16 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_m_n_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.u16 q0, q1, #14
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 14, i32 1, i32 0, <8 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_u16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_m_n_u16_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.u16 q0, q1, #16
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 1, i32 0, <8 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_m_n_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.s8 q0, q1, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 4, i32 0, i32 1, <16 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_s8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_m_n_s8_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.s8 q0, q1, #8
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 0, i32 1, <16 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_m_n_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.s16 q0, q1, #12
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 12, i32 0, i32 1, <8 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_s16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_m_n_s16_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.s16 q0, q1, #16
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 0, i32 1, <8 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_m_n_u8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.u8 q0, q1, #2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 2, i32 1, i32 1, <16 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_u8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_m_n_u8_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.u8 q0, q1, #8
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 1, i32 1, <16 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_m_n_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.u16 q0, q1, #9
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 9, i32 1, i32 1, <8 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_u16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_m_n_u16_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.u16 q0, q1, #16
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 1, i32 1, <8 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_x_n_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.s8 q0, q0, #1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 1, i32 0, i32 0, <16 x i1> %1, <8 x i16> undef)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_s8_lanewidth(<16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_x_n_s8_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.s8 q0, q0, #8
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 0, i32 0, <16 x i1> %1, <8 x i16> undef)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_x_n_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.s16 q0, q0, #10
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 10, i32 0, i32 0, <8 x i1> %1, <4 x i32> undef)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_s16_lanewidth(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_x_n_s16_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.s16 q0, q0, #16
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 0, i32 0, <8 x i1> %1, <4 x i32> undef)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_x_n_u8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.u8 q0, q0, #6
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 6, i32 1, i32 0, <16 x i1> %1, <8 x i16> undef)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_u8_lanewidth(<16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_x_n_u8_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.u8 q0, q0, #8
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 1, i32 0, <16 x i1> %1, <8 x i16> undef)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_x_n_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.u16 q0, q0, #10
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 10, i32 1, i32 0, <8 x i1> %1, <4 x i32> undef)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_u16_lanewidth(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshllbq_x_n_u16_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshllbt.u16 q0, q0, #16
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 1, i32 0, <8 x i1> %1, <4 x i32> undef)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_x_n_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.s8 q0, q0, #2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, i32 1, <16 x i1> %1, <8 x i16> undef)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_s8_lanewidth(<16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_x_n_s8_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.s8 q0, q0, #8
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 0, i32 1, <16 x i1> %1, <8 x i16> undef)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_x_n_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.s16 q0, q0, #6
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 6, i32 0, i32 1, <8 x i1> %1, <4 x i32> undef)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_s16_lanewidth(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_x_n_s16_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.s16 q0, q0, #16
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 0, i32 1, <8 x i1> %1, <4 x i32> undef)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_x_n_u8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.u8 q0, q0, #5
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 5, i32 1, i32 1, <16 x i1> %1, <8 x i16> undef)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_u8_lanewidth(<16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_x_n_u8_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.u8 q0, q0, #8
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 1, i32 1, <16 x i1> %1, <8 x i16> undef)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_x_n_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.u16 q0, q0, #3
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 3, i32 1, i32 1, <8 x i1> %1, <4 x i32> undef)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_u16_lanewidth(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vshlltq_x_n_u16_lanewidth:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vshlltt.u16 q0, q0, #16
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 1, i32 1, <8 x i1> %1, <4 x i32> undef)
+ ret <4 x i32> %2
+}
+
declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
@@ -396,3 +1448,29 @@ declare <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4
declare <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>)
declare <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>)
declare <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>)
+
+declare <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8>, i32, i32)
+declare <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16>, i32, i32)
+declare <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32>, i32, i32)
+declare <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>)
+
+declare <16 x i8> @llvm.arm.mve.vqshlu.imm.v16i8(<16 x i8>, i32)
+declare <8 x i16> @llvm.arm.mve.vqshlu.imm.v8i16(<8 x i16>, i32)
+declare <4 x i32> @llvm.arm.mve.vqshlu.imm.v4i32(<4 x i32>, i32)
+declare <16 x i8> @llvm.arm.mve.vqshlu.imm.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.vqshlu.imm.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.vqshlu.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>)
+
+declare <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8>, i32, i32)
+declare <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16>, i32, i32)
+declare <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32>, i32, i32)
+declare <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>)
+
+declare <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8>, i32, i32, i32)
+declare <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16>, i32, i32, i32)
+declare <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8>, i32, i32, i32, <16 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16>, i32, i32, i32, <8 x i1>, <4 x i32>)
OpenPOWER on IntegriCloud