diff options
| author | Simon Tatham <simon.tatham@arm.com> | 2019-12-02 16:17:59 +0000 |
|---|---|---|
| committer | Simon Tatham <simon.tatham@arm.com> | 2019-12-02 16:20:30 +0000 |
| commit | d173fb5d2854a1ce42bcc34832db5039b2c60e69 (patch) | |
| tree | 68655e854c5774fcad96180adac1007a95064f19 /clang | |
| parent | 48cce077efcc3c3637aac0143b3c2c9d1cf7ab8b (diff) | |
| download | bcm5719-llvm-d173fb5d2854a1ce42bcc34832db5039b2c60e69.tar.gz bcm5719-llvm-d173fb5d2854a1ce42bcc34832db5039b2c60e69.zip | |
[ARM,MVE] Add intrinsics to deal with predicates.
Summary:
This commit adds the `vpselq` intrinsics which take an MVE predicate
word and select lanes from two vectors; the `vctp` intrinsics which
create a tail predicate word suitable for processing the first m
elements of a vector (e.g. in the last iteration of a loop); and
`vpnot`, which simply complements a predicate word and is just
syntactic sugar for the `~` operator.
The `vctp` ACLE intrinsics are lowered to the IR intrinsics we've
already added (and which D70592 just reorganized). I've filled in the
missing isel rule for VCTP64, and added another set of rules to
generate the predicated forms.
I needed one small tweak in MveEmitter to allow the `unpromoted` type
modifier to apply to predicates as well as integers, so that `vpnot`
doesn't pointlessly convert its input integer to an `<n x i1>` before
complementing it.
Reviewers: ostannard, MarkMurrayARM, dmgreen
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D70485
Diffstat (limited to 'clang')
| -rw-r--r-- | clang/include/clang/Basic/arm_mve.td | 26 | ||||
| -rw-r--r-- | clang/test/CodeGen/arm-mve-intrinsics/predicates.c | 290 | ||||
| -rw-r--r-- | clang/utils/TableGen/MveEmitter.cpp | 16 |
3 files changed, 325 insertions, 7 deletions
diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 90cccb12472..ed925a20072 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -214,6 +214,32 @@ let params = T.Float in { (IRIntBase<"maxnum", [Vector]> $a, $b)>; } +def vpselq: Intrinsic<Vector, (args Vector:$t, Vector:$f, Predicate:$pred), + (select $pred, $t, $f)> { let params = T.Usual; } +def vpselq_64: Intrinsic< + Vector, (args Vector:$t, Vector:$f, PredOf<u32>:$pred), + (bitcast (select $pred, (bitcast $t, VecOf<u32>), + (bitcast $f, VecOf<u32>)), Vector)>, + NameOverride<"vpselq"> { let params = T.All64; } + +let params = [Void], pnt = PNT_None in { + + multiclass vctp<Type pred, string intname> { + def "": Intrinsic<pred, (args u32:$val), + (u16 (IRInt<"pred_v2i", [pred]> (IRIntBase<intname> $val)))>; + def _m: Intrinsic<pred, (args u32:$val, pred:$inpred), + (u16 (IRInt<"pred_v2i", [pred]> (and $inpred, + (IRIntBase<intname> $val))))>; + } + defm vctp8q: vctp<PredOf<u8>, "arm_mve_vctp8">; + defm vctp16q: vctp<PredOf<u16>, "arm_mve_vctp16">; + defm vctp32q: vctp<PredOf<u32>, "arm_mve_vctp32">; + defm vctp64q: vctp<PredOf<u64>, "arm_mve_vctp64">; + + def vpnot: Intrinsic<PredOf<u8>, (args unpromoted<PredOf<u8>>:$pred), + (xor $pred, (u16 65535))>; + +} multiclass contiguous_load<string mnemonic, PrimitiveType memtype, list<Type> same_size, list<Type> wider> { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/predicates.c b/clang/test/CodeGen/arm-mve-intrinsics/predicates.c new file mode 100644 index 00000000000..5761849d094 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/predicates.c @@ -0,0 +1,290 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s + +#include <arm_mve.h> + +// CHECK-LABEL: @test_vctp16q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: ret i16 [[TMP2]] +// +mve_pred16_t test_vctp16q(uint32_t a) +{ + return vctp16q(a); +} + +// CHECK-LABEL: @test_vctp16q_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 +// CHECK-NEXT: ret i16 [[TMP5]] +// +mve_pred16_t test_vctp16q_m(uint32_t a, mve_pred16_t p) +{ + return vctp16q_m(a, p); +} + +// CHECK-LABEL: @test_vctp32q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: ret i16 [[TMP2]] +// +mve_pred16_t test_vctp32q(uint32_t a) +{ + return vctp32q(a); +} + +// CHECK-LABEL: @test_vctp32q_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 +// CHECK-NEXT: ret i16 [[TMP5]] +// +mve_pred16_t test_vctp32q_m(uint32_t a, mve_pred16_t p) +{ + return vctp32q_m(a, p); +} + +// CHECK-LABEL: @test_vctp64q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.arm.mve.vctp64(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: ret i16 [[TMP2]] +// +mve_pred16_t test_vctp64q(uint32_t a) +{ + return vctp64q(a); +} + +// CHECK-LABEL: @test_vctp64q_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.vctp64(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 +// CHECK-NEXT: ret i16 [[TMP5]] +// +mve_pred16_t test_vctp64q_m(uint32_t a, mve_pred16_t p) +{ + return vctp64q_m(a, p); +} + +// CHECK-LABEL: @test_vctp8q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: ret i16 [[TMP2]] +// +mve_pred16_t test_vctp8q(uint32_t a) +{ + return vctp8q(a); +} + +// CHECK-LABEL: @test_vctp8q_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 +// CHECK-NEXT: ret i16 [[TMP5]] +// +mve_pred16_t test_vctp8q_m(uint32_t a, mve_pred16_t p) +{ + return vctp8q_m(a, p); +} + +// CHECK-LABEL: @test_vpnot( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor i16 [[A:%.*]], -1 +// CHECK-NEXT: ret i16 [[TMP0]] +// +mve_pred16_t test_vpnot(mve_pred16_t a) +{ + return vpnot(a); +} + +// CHECK-LABEL: @test_vpselq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x half> [[A:%.*]], <8 x half> [[B:%.*]] +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vpselq_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_f16(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]] +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vpselq_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_f32(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]] +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vpselq_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_s16(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vpselq_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_s32(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] +// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP5]] +// +int64x2_t test_vpselq_s64(int64x2_t a, int64x2_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_s64(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]] +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vpselq_s8(int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_s8(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]] +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vpselq_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_u16(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vpselq_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_u32(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] +// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP5]] +// +uint64x2_t test_vpselq_u64(uint64x2_t a, uint64x2_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_u64(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]] +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vpselq_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_u8(a, b, p); +#endif /* POLYMORPHIC */ +} + diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp index 1ca3b5a3f22..422188a5f3d 100644 --- a/clang/utils/TableGen/MveEmitter.cpp +++ b/clang/utils/TableGen/MveEmitter.cpp @@ -1208,14 +1208,16 @@ Result::Ptr MveEmitter::getCodeForArg(unsigned ArgNum, const Type *ArgType, Result::Ptr V = std::make_shared<BuiltinArgResult>(ArgNum, isa<PointerType>(ArgType)); - if (const auto *ST = dyn_cast<ScalarType>(ArgType)) { - if (Promote && ST->isInteger() && ST->sizeInBits() < 32) + if (Promote) { + if (const auto *ST = dyn_cast<ScalarType>(ArgType)) { + if (ST->isInteger() && ST->sizeInBits() < 32) + V = std::make_shared<IntCastResult>(getScalarType("u32"), V); + } else if (const auto *PT = dyn_cast<PredicateType>(ArgType)) { V = std::make_shared<IntCastResult>(getScalarType("u32"), V); - } else if (const auto *PT = dyn_cast<PredicateType>(ArgType)) { - V = std::make_shared<IntCastResult>(getScalarType("u32"), V); - V = std::make_shared<IRIntrinsicResult>("arm_mve_pred_i2v", - std::vector<const Type *>{PT}, - std::vector<Result::Ptr>{V}); + V = std::make_shared<IRIntrinsicResult>("arm_mve_pred_i2v", + std::vector<const Type *>{PT}, + std::vector<Result::Ptr>{V}); + } } return V; |

