diff options
author | Simon Tatham <simon.tatham@arm.com> | 2019-12-09 15:43:50 +0000 |
---|---|---|
committer | Simon Tatham <simon.tatham@arm.com> | 2019-12-09 15:44:09 +0000 |
commit | d97b3e3e65cd77a81b39732af84a1a4229e95091 (patch) | |
tree | 554a5ee5dcb7652298b1f702cb01b687b009b3b3 /clang | |
parent | caabb713ea157f8c449c8d3eb00410bbef734a22 (diff) | |
download | bcm5719-llvm-d97b3e3e65cd77a81b39732af84a1a4229e95091.tar.gz bcm5719-llvm-d97b3e3e65cd77a81b39732af84a1a4229e95091.zip |
[ARM][MVE] Add intrinsics for immediate shifts.
Summary:
This adds the family of `vshlq_n` and `vshrq_n` ACLE intrinsics, which
shift every lane of a vector left or right by a compile-time
immediate. They mostly work by expanding to the IR `shl`, `lshr` and
`ashr` operations, with their second operand being a vector splat of
the immediate.
There's a fiddly special case, though. ACLE specifies that the
immediate in `vshrq_n` can take values up to //and including// the bit
size of the vector lane. But LLVM IR thinks that shifting right by the
full size of the lane is UB, and feels free to replace the `lshr` with
an `undef` half way through the optimization pipeline. Hence, to keep
this legal in source code, I have to detect it at codegen time.
Logical (unsigned) right shifts by the element size are handled by
simply emitting the zero vector; arithmetic ones are converted into a
shift of one bit less, which will always give the same output.
In order to do that check, I also had to enhance the tablegen
MveEmitter so that it can cope with converting a builtin function's
operand into a bare integer to pass to a code-generating subfunction.
Previously the only bare integers it knew how to handle were flags
generated from within `arm_mve.td`.
Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard
Reviewed By: MarkMurrayARM
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71065
Diffstat (limited to 'clang')
-rw-r--r-- | clang/include/clang/Basic/arm_mve.td | 27 | ||||
-rw-r--r-- | clang/include/clang/Basic/arm_mve_defs.td | 8 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 29 | ||||
-rw-r--r-- | clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c | 722 | ||||
-rw-r--r-- | clang/utils/TableGen/MveEmitter.cpp | 83 |
5 files changed, 836 insertions, 33 deletions
diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 19852702c1b..cc4b6d9e823 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -522,6 +522,33 @@ defm vstrhq: scatter_offset_both<!listconcat(T.All16, T.Int32), u16, 1>; defm vstrwq: scatter_offset_both<T.All32, u32, 2>; defm vstrdq: scatter_offset_both<T.Int64, u64, 3>; +multiclass PredicatedImmediateVectorShift< + Immediate immtype, string predIntrName, list<dag> unsignedFlag = []> { + foreach predIntr = [IRInt<predIntrName, [Vector, Predicate]>] in { + def _m_n: Intrinsic<Vector, (args Vector:$inactive, Vector:$v, + immtype:$sh, Predicate:$pred), + !con((predIntr $v, $sh), !dag(predIntr, unsignedFlag, ?), + (predIntr $pred, $inactive))>; + def _x_n: Intrinsic<Vector, (args Vector:$v, immtype:$sh, + Predicate:$pred), + !con((predIntr $v, $sh), !dag(predIntr, unsignedFlag, ?), + (predIntr $pred, (undef Vector)))>; + } +} + +let params = T.Int in { + def vshlq_n: Intrinsic<Vector, (args Vector:$v, imm_0toNm1:$sh), + (shl $v, (splat (Scalar $sh)))>; + defm vshlq: PredicatedImmediateVectorShift<imm_0toNm1, "shl_imm_predicated">; + + let pnt = PNT_NType in { + def vshrq_n: Intrinsic<Vector, (args Vector:$v, imm_1toN:$sh), + (immshr $v, $sh, (unsignedflag Scalar))>; + defm vshrq: PredicatedImmediateVectorShift<imm_1toN, "shr_imm_predicated", + [(unsignedflag Scalar)]>; + } +} + // Base class for the scalar shift intrinsics. class ScalarShift<Type argtype, dag shiftCountArg, dag shiftCodeGen>: Intrinsic<argtype, !con((args argtype:$value), shiftCountArg), shiftCodeGen> { diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index d837a1d33d0..5aa10f250ed 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -66,6 +66,10 @@ def xor: IRBuilder<"CreateXor">; def sub: IRBuilder<"CreateSub">; def shl: IRBuilder<"CreateShl">; def lshr: IRBuilder<"CreateLShr">; +def immshr: CGHelperFn<"MVEImmediateShr"> { + let special_params = [IRBuilderIntParam<1, "unsigned">, + IRBuilderIntParam<2, "bool">]; +} def fadd: IRBuilder<"CreateFAdd">; def fmul: IRBuilder<"CreateFMul">; def fsub: IRBuilder<"CreateFSub">; @@ -308,8 +312,8 @@ def imm_simd_vmvn : Immediate<u32, IB_UEltValue> { // // imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1 // inclusive. -def imm_1toN : Immediate<u32, IB_EltBit<1>>; -def imm_0toNm1 : Immediate<u32, IB_EltBit<0>>; +def imm_1toN : Immediate<sint, IB_EltBit<1>>; +def imm_0toNm1 : Immediate<sint, IB_EltBit<0>>; // imm_lane has to be the index of a vector lane in the main vector type, i.e // it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b5b0c3e61d4..94d10a1aedf 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6801,6 +6801,14 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } } +template<typename Integer> +static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) { + llvm::APSInt IntVal; + bool IsConst = E->isIntegerConstantExpr(IntVal, Context); + assert(IsConst && "Sema should have checked this was a constant"); + return IntVal.getExtValue(); +} + static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned) { // Helper function called by Tablegen-constructed ARM MVE builtin codegen, @@ -6808,6 +6816,27 @@ static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T); } +static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, + uint32_t Shift, bool Unsigned) { + // MVE helper function for integer shift right. This must handle signed vs + // unsigned, and also deal specially with the case where the shift count is + // equal to the lane size. In LLVM IR, an LShr with that parameter would be + // undefined behavior, but in MVE it's legal, so we must convert it to code + // that is not undefined in IR. + unsigned LaneBits = + V->getType()->getVectorElementType()->getPrimitiveSizeInBits(); + if (Shift == LaneBits) { + // An unsigned shift of the full lane size always generates zero, so we can + // simply emit a zero vector. A signed shift of the full lane size does the + // same thing as shifting by one bit fewer. + if (Unsigned) + return llvm::Constant::getNullValue(V->getType()); + else + --Shift; + } + return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift); +} + static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) { // MVE-specific helper function for a vector splat, which infers the element // count of the output vector by knowing that MVE vectors are all 128 bits diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c new file mode 100644 index 00000000000..200273c0365 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c @@ -0,0 +1,722 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include <arm_mve.h> + +// CHECK-LABEL: @test_vshlq_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5> +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vshlq_n_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 5); +#else /* POLYMORPHIC */ + return vshlq_n_s8(a, 5); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5> +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vshlq_n_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 5); +#else /* POLYMORPHIC */ + return vshlq_n_s16(a, 5); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], <i32 18, i32 18, i32 18, i32 18> +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vshlq_n_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 18); +#else /* POLYMORPHIC */ + return vshlq_n_s32(a, 18); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_s8_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], zeroinitializer +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vshlq_n_s8_trivial(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 0); +#else /* POLYMORPHIC */ + return vshlq_n_s8(a, 0); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_s16_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], zeroinitializer +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vshlq_n_s16_trivial(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 0); +#else /* POLYMORPHIC */ + return vshlq_n_s16(a, 0); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_s32_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], zeroinitializer +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vshlq_n_s32_trivial(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 0); +#else /* POLYMORPHIC */ + return vshlq_n_s32(a, 0); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vshlq_n_u8(uint8x16_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 3); +#else /* POLYMORPHIC */ + return vshlq_n_u8(a, 3); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11> +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vshlq_n_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 11); +#else /* POLYMORPHIC */ + return vshlq_n_u16(a, 11); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], <i32 7, i32 7, i32 7, i32 7> +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vshlq_n_u32(uint32x4_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 7); +#else /* POLYMORPHIC */ + return vshlq_n_u32(a, 7); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_u8_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], zeroinitializer +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vshlq_n_u8_trivial(uint8x16_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 0); +#else /* POLYMORPHIC */ + return vshlq_n_u8(a, 0); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_u16_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], zeroinitializer +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vshlq_n_u16_trivial(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 0); +#else /* POLYMORPHIC */ + return vshlq_n_u16(a, 0); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_u32_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], zeroinitializer +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vshlq_n_u32_trivial(uint32x4_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 0); +#else /* POLYMORPHIC */ + return vshlq_n_u32(a, 0); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vshrq_n_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 4); +#else /* POLYMORPHIC */ + return vshrq_n_s8(a, 4); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vshrq_n_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 10); +#else /* POLYMORPHIC */ + return vshrq_n_s16(a, 10); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], <i32 19, i32 19, i32 19, i32 19> +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vshrq_n_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 19); +#else /* POLYMORPHIC */ + return vshrq_n_s32(a, 19); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_s8_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vshrq_n_s8_trivial(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 8); +#else /* POLYMORPHIC */ + return vshrq_n_s8(a, 8); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_s16_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vshrq_n_s16_trivial(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 16); +#else /* POLYMORPHIC */ + return vshrq_n_s16(a, 16); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_s32_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], <i32 31, i32 31, i32 31, i32 31> +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vshrq_n_s32_trivial(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 32); +#else /* POLYMORPHIC */ + return vshrq_n_s32(a, 32); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = lshr <16 x i8> [[A:%.*]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vshrq_n_u8(uint8x16_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 1); +#else /* POLYMORPHIC */ + return vshrq_n_u8(a, 1); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i16> [[A:%.*]], <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vshrq_n_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 10); +#else /* POLYMORPHIC */ + return vshrq_n_u16(a, 10); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = lshr <4 x i32> [[A:%.*]], <i32 10, i32 10, i32 10, i32 10> +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vshrq_n_u32(uint32x4_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 10); +#else /* POLYMORPHIC */ + return vshrq_n_u32(a, 10); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_u8_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret <16 x i8> zeroinitializer +// +uint8x16_t test_vshrq_n_u8_trivial(uint8x16_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 8); +#else /* POLYMORPHIC */ + return vshrq_n_u8(a, 8); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_u16_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret <8 x i16> zeroinitializer +// +uint16x8_t test_vshrq_n_u16_trivial(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 16); +#else /* POLYMORPHIC */ + return vshrq_n_u16(a, 16); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_u32_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret <4 x i32> zeroinitializer +// +uint32x4_t test_vshrq_n_u32_trivial(uint32x4_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 32); +#else /* POLYMORPHIC */ + return vshrq_n_u32(a, 32); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_m_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 6, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vshlq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_m_n(inactive, a, 6, p); +#else /* POLYMORPHIC */ + return vshlq_m_n_s8(inactive, a, 6, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 13, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vshlq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_m_n(inactive, a, 13, p); +#else /* POLYMORPHIC */ + return vshlq_m_n_s16(inactive, a, 13, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vshlq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_m_n(inactive, a, 0, p); +#else /* POLYMORPHIC */ + return vshlq_m_n_s32(inactive, a, 0, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_m_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 3, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vshlq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_m_n(inactive, a, 3, p); +#else /* POLYMORPHIC */ + return vshlq_m_n_u8(inactive, a, 3, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vshlq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_m_n(inactive, a, 1, p); +#else /* POLYMORPHIC */ + return vshlq_m_n_u16(inactive, a, 1, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 24, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vshlq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_m_n(inactive, a, 24, p); +#else /* POLYMORPHIC */ + return vshlq_m_n_u32(inactive, a, 24, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_m_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vshrq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_m(inactive, a, 2, p); +#else /* POLYMORPHIC */ + return vshrq_m_n_s8(inactive, a, 2, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 3, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vshrq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_m(inactive, a, 3, p); +#else /* POLYMORPHIC */ + return vshrq_m_n_s16(inactive, a, 3, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 13, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vshrq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_m(inactive, a, 13, p); +#else /* POLYMORPHIC */ + return vshrq_m_n_s32(inactive, a, 13, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_m_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vshrq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_m(inactive, a, 4, p); +#else /* POLYMORPHIC */ + return vshrq_m_n_u8(inactive, a, 4, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 14, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vshrq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_m(inactive, a, 14, p); +#else /* POLYMORPHIC */ + return vshrq_m_n_u16(inactive, a, 14, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 21, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vshrq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_m(inactive, a, 21, p); +#else /* POLYMORPHIC */ + return vshrq_m_n_u32(inactive, a, 21, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_x_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vshlq_x_n_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_x_n(a, 1, p); +#else /* POLYMORPHIC */ + return vshlq_x_n_s8(a, 1, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_x_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 15, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vshlq_x_n_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_x_n(a, 15, p); +#else /* POLYMORPHIC */ + return vshlq_x_n_s16(a, 15, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_x_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 13, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vshlq_x_n_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_x_n(a, 13, p); +#else /* POLYMORPHIC */ + return vshlq_x_n_s32(a, 13, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_x_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vshlq_x_n_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_x_n(a, 4, p); +#else /* POLYMORPHIC */ + return vshlq_x_n_u8(a, 4, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_x_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vshlq_x_n_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_x_n(a, 10, p); +#else /* POLYMORPHIC */ + return vshlq_x_n_u16(a, 10, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_x_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 30, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vshlq_x_n_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_x_n(a, 30, p); +#else /* POLYMORPHIC */ + return vshlq_x_n_u32(a, 30, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_x_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 0, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vshrq_x_n_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_x(a, 4, p); +#else /* POLYMORPHIC */ + return vshrq_x_n_s8(a, 4, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_x_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vshrq_x_n_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_x(a, 10, p); +#else /* POLYMORPHIC */ + return vshrq_x_n_s16(a, 10, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_x_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 7, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vshrq_x_n_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_x(a, 7, p); +#else /* POLYMORPHIC */ + return vshrq_x_n_s32(a, 7, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_x_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 7, i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vshrq_x_n_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_x(a, 7, p); +#else /* POLYMORPHIC */ + return vshrq_x_n_u8(a, 7, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_x_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 7, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vshrq_x_n_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_x(a, 7, p); +#else /* POLYMORPHIC */ + return vshrq_x_n_u16(a, 7, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_x_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 6, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vshrq_x_n_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_x(a, 6, p); +#else /* POLYMORPHIC */ + return vshrq_x_n_u32(a, 6, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp index 422188a5f3d..37bf3220182 100644 --- a/clang/utils/TableGen/MveEmitter.cpp +++ b/clang/utils/TableGen/MveEmitter.cpp @@ -469,6 +469,10 @@ public: virtual void genCode(raw_ostream &OS, CodeGenParamAllocator &) const = 0; virtual bool hasIntegerConstantValue() const { return false; } virtual uint32_t integerConstantValue() const { return 0; } + virtual bool hasIntegerValue() const { return false; } + virtual std::string getIntegerValue(const std::string &) { + llvm_unreachable("non-working Result::getIntegerValue called"); + } virtual std::string typeName() const { return "Value *"; } // Mostly, when a code-generation operation has a dependency on prior @@ -543,8 +547,9 @@ class BuiltinArgResult : public Result { public: unsigned ArgNum; bool AddressType; - BuiltinArgResult(unsigned ArgNum, bool AddressType) - : ArgNum(ArgNum), AddressType(AddressType) {} + bool Immediate; + BuiltinArgResult(unsigned ArgNum, bool AddressType, bool Immediate) + : ArgNum(ArgNum), AddressType(AddressType), Immediate(Immediate) {} void genCode(raw_ostream &OS, CodeGenParamAllocator &) const override { OS << (AddressType ? "EmitPointerWithAlignment" : "EmitScalarExpr") << "(E->getArg(" << ArgNum << "))"; @@ -558,6 +563,11 @@ public: return "(" + varname() + ".getPointer())"; return Result::asValue(); } + bool hasIntegerValue() const override { return Immediate; } + virtual std::string getIntegerValue(const std::string &IntType) { + return "GetIntegerConstantValue<" + IntType + ">(E->getArg(" + + utostr(ArgNum) + "), getContext())"; + } }; // Result subclass for an integer literal appearing in Tablegen. This may need @@ -632,27 +642,34 @@ public: StringRef CallPrefix; std::vector<Ptr> Args; std::set<unsigned> AddressArgs; - std::map<unsigned, std::string> IntConstantArgs; + std::map<unsigned, std::string> IntegerArgs; IRBuilderResult(StringRef CallPrefix, std::vector<Ptr> Args, std::set<unsigned> AddressArgs, - std::map<unsigned, std::string> IntConstantArgs) - : CallPrefix(CallPrefix), Args(Args), AddressArgs(AddressArgs), - IntConstantArgs(IntConstantArgs) {} + std::map<unsigned, std::string> IntegerArgs) + : CallPrefix(CallPrefix), Args(Args), AddressArgs(AddressArgs), + IntegerArgs(IntegerArgs) {} void genCode(raw_ostream &OS, CodeGenParamAllocator &ParamAlloc) const override { OS << CallPrefix; const char *Sep = ""; for (unsigned i = 0, e = Args.size(); i < e; ++i) { Ptr Arg = Args[i]; - auto it = IntConstantArgs.find(i); - if (it != IntConstantArgs.end()) { - assert(Arg->hasIntegerConstantValue()); - OS << Sep << "static_cast<" << it->second << ">(" - << ParamAlloc.allocParam("unsigned", - utostr(Arg->integerConstantValue())) - << ")"; + auto it = IntegerArgs.find(i); + + OS << Sep; + Sep = ", "; + + if (it != IntegerArgs.end()) { + if (Arg->hasIntegerConstantValue()) + OS << "static_cast<" << it->second << ">(" + << ParamAlloc.allocParam(it->second, + utostr(Arg->integerConstantValue())) + << ")"; + else if (Arg->hasIntegerValue()) + OS << ParamAlloc.allocParam(it->second, + Arg->getIntegerValue(it->second)); } else { - OS << Sep << Arg->varname(); + OS << Arg->varname(); } Sep = ", "; } @@ -661,7 +678,8 @@ public: void morePrerequisites(std::vector<Ptr> &output) const override { for (unsigned i = 0, e = Args.size(); i < e; ++i) { Ptr Arg = Args[i]; - if (IntConstantArgs.find(i) != IntConstantArgs.end()) + if (IntegerArgs.find(i) != IntegerArgs.end() && + Arg->hasIntegerConstantValue()) continue; output.push_back(Arg); } @@ -980,8 +998,8 @@ public: const Type *Param); Result::Ptr getCodeForDagArg(DagInit *D, unsigned ArgNum, const Result::Scope &Scope, const Type *Param); - Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType, - bool Promote); + Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType, bool Promote, + bool Immediate); // Constructor and top-level functions. @@ -1144,17 +1162,17 @@ Result::Ptr MveEmitter::getCodeForDag(DagInit *D, const Result::Scope &Scope, Args.push_back(getCodeForDagArg(D, i, Scope, Param)); if (Op->isSubClassOf("IRBuilderBase")) { std::set<unsigned> AddressArgs; - std::map<unsigned, std::string> IntConstantArgs; + std::map<unsigned, std::string> IntegerArgs; for (Record *sp : Op->getValueAsListOfDefs("special_params")) { unsigned Index = sp->getValueAsInt("index"); if (sp->isSubClassOf("IRBuilderAddrParam")) { AddressArgs.insert(Index); } else if (sp->isSubClassOf("IRBuilderIntParam")) { - IntConstantArgs[Index] = sp->getValueAsString("type"); + IntegerArgs[Index] = sp->getValueAsString("type"); } } - return std::make_shared<IRBuilderResult>( - Op->getValueAsString("prefix"), Args, AddressArgs, IntConstantArgs); + return std::make_shared<IRBuilderResult>(Op->getValueAsString("prefix"), + Args, AddressArgs, IntegerArgs); } else if (Op->isSubClassOf("IRIntBase")) { std::vector<const Type *> ParamTypes; for (Record *RParam : Op->getValueAsListOfDefs("params")) @@ -1204,9 +1222,9 @@ Result::Ptr MveEmitter::getCodeForDagArg(DagInit *D, unsigned ArgNum, } Result::Ptr MveEmitter::getCodeForArg(unsigned ArgNum, const Type *ArgType, - bool Promote) { - Result::Ptr V = - std::make_shared<BuiltinArgResult>(ArgNum, isa<PointerType>(ArgType)); + bool Promote, bool Immediate) { + Result::Ptr V = std::make_shared<BuiltinArgResult>( + ArgNum, isa<PointerType>(ArgType), Immediate); if (Promote) { if (const auto *ST = dyn_cast<ScalarType>(ArgType)) { @@ -1279,17 +1297,14 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param) const Type *ArgType = ME.getType(TypeInit, Param); ArgTypes.push_back(ArgType); - // The argument will usually have a name in the arguments dag, which goes - // into the variable-name scope that the code gen will refer to. - StringRef ArgName = ArgsDag->getArgNameStr(i); - if (!ArgName.empty()) - Scope[ArgName] = ME.getCodeForArg(i, ArgType, Promote); - // If the argument is a subclass of Immediate, record the details about // what values it can take, for Sema checking. + bool Immediate = false; if (auto TypeDI = dyn_cast<DefInit>(TypeInit)) { Record *TypeRec = TypeDI->getDef(); if (TypeRec->isSubClassOf("Immediate")) { + Immediate = true; + Record *Bounds = TypeRec->getValueAsDef("bounds"); ImmediateArg &IA = ImmediateArgs[i]; if (Bounds->isSubClassOf("IB_ConstRange")) { @@ -1303,7 +1318,7 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param) IA.boundsType = ImmediateArg::BoundsType::ExplicitRange; IA.i1 = 0; IA.i2 = 128 / Param->sizeInBits() - 1; - } else if (Bounds->getName() == "IB_EltBit") { + } else if (Bounds->isSubClassOf("IB_EltBit")) { IA.boundsType = ImmediateArg::BoundsType::ExplicitRange; IA.i1 = Bounds->getValueAsInt("base"); IA.i2 = IA.i1 + Param->sizeInBits() - 1; @@ -1320,6 +1335,12 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param) } } } + + // The argument will usually have a name in the arguments dag, which goes + // into the variable-name scope that the code gen will refer to. + StringRef ArgName = ArgsDag->getArgNameStr(i); + if (!ArgName.empty()) + Scope[ArgName] = ME.getCodeForArg(i, ArgType, Promote, Immediate); } // Finally, go through the codegen dag and translate it into a Result object |