[clang,ARM] Initial ACLE intrinsics for MVE.

This commit sets up the infrastructure for auto-generating <arm_mve.h> and doing clang-side code generation for the builtins it relies on, and demonstrates that it works by implementing a representative sample of the ACLE intrinsics, more or less matching the ones introduced in LLVM IR by D67158,D68699,D68700. Like NEON, that header file will provide a set of vector types like uint16x8_t and C functions with names like vaddq_u32(). Unlike NEON, the ACLE spec for <arm_mve.h> includes a polymorphism system, so that you can write plain vaddq() and disambiguate by the vector types you pass to it. Unlike the corresponding NEON code, I've arranged to make every user- facing ACLE intrinsic into a clang builtin, and implement all the code generation inside clang. So <arm_mve.h> itself contains nothing but typedefs and function declarations, with the latter all using the new `__attribute__((__clang_builtin))` system to arrange that the user- facing function names correspond to the right internal BuiltinIDs. So the new MveEmitter tablegen system specifies the full sequence of IRBuilder operations that each user-facing ACLE intrinsic should translate into. Where possible, the ACLE intrinsics map to standard IR operations such as vector-typed `add` and `fadd`; where no standard representation exists, I call down to the sample IR intrinsics introduced in an earlier commit. Doing it like this means that you get the polymorphism for free just by using __attribute__((overloadable)): the clang overload resolution decides which function declaration is the relevant one, and _then_ its BuiltinID is looked up, so by the time we're doing code generation, that's all been resolved by the standard system. It also means that you get really nice error messages if the user passes the wrong combination of types: clang will show the declarations from the header file and explain why each one doesn't match. (The obvious alternative approach would be to have wrapper functions in <arm_mve.h> which pass their arguments to the underlying builtins. But that doesn't work in the case where one of the arguments has to be a constant integer: the wrapper function can't pass the constantness through. So you'd have to do that case using a macro instead, and then use C11 `_Generic` to handle the polymorphism. Then you have to add horrible workarounds because `_Generic` requires even the untaken branches to type-check successfully, and //then// if the user gets the types wrong, the error message is totally unreadable!) Reviewers: dmgreen, miyuki, ostannard Subscribers: mgorny, javed.absar, kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D67161
author: Simon Tatham <simon.tatham@arm.com> 2019-09-02 15:50:50 +0100
committer: Simon Tatham <simon.tatham@arm.com> 2019-10-24 16:33:13 +0100
commit: 08074cc96557dcb7ec91d7cd84c412414fa9a516 (patch)
tree: 11371e0a7950d1d4b5dc0e2d4476e1ba91b4672d /clang/test
parent: 7c11da0cfd3396150c13657a2217f2044f314734 (diff)
download: bcm5719-llvm-08074cc96557dcb7ec91d7cd84c412414fa9a516.tar.gz
bcm5719-llvm-08074cc96557dcb7ec91d7cd84c412414fa9a516.zip
7 files changed, 448 insertions, 0 deletions
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c b/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
new file mode 100644
index 00000000000..d7c4d5e85ae
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
@@ -0,0 +1,23 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_urshrl(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[VALUE:%.*]], 32
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[VALUE]] to i32
+// CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.urshrl(i32 [[TMP2]], i32 [[TMP1]], i32 6)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
+// CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
+// CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
+// CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
+// CHECK-NEXT:    ret i64 [[TMP9]]
+//
+uint64_t test_urshrl(uint64_t value)
+{
+    return urshrl(value, 6);
+}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vadc.c b/clang/test/CodeGen/arm-mve-intrinsics/vadc.c
new file mode 100644
index 00000000000..bd6bdc53d08
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vadc.c
@@ -0,0 +1,89 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vadciq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 29
+// CHECK-NEXT:    [[TMP3:%.*]] = and i32 1, [[TMP2]]
+// CHECK-NEXT:    store i32 [[TMP3]], i32* [[CARRY_OUT:%.*]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
+// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+//
+int32x4_t test_vadciq_s32(int32x4_t a, int32x4_t b, unsigned *carry_out)
+{
+#ifdef POLYMORPHIC
+    return vadciq(a, b, carry_out);
+#else /* POLYMORPHIC */
+    return vadciq_s32(a, b, carry_out);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vadcq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CARRY:%.*]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[TMP0]], 29
+// CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
+// CHECK-NEXT:    [[TMP5:%.*]] = and i32 1, [[TMP4]]
+// CHECK-NEXT:    store i32 [[TMP5]], i32* [[CARRY]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
+// CHECK-NEXT:    ret <4 x i32> [[TMP6]]
+//
+uint32x4_t test_vadcq_u32(uint32x4_t a, uint32x4_t b, unsigned *carry)
+{
+#ifdef POLYMORPHIC
+    return vadcq(a, b, carry);
+#else /* POLYMORPHIC */
+    return vadcq_u32(a, b, carry);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vadciq_m_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
+// CHECK-NEXT:    [[TMP5:%.*]] = and i32 1, [[TMP4]]
+// CHECK-NEXT:    store i32 [[TMP5]], i32* [[CARRY_OUT:%.*]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
+// CHECK-NEXT:    ret <4 x i32> [[TMP6]]
+//
+uint32x4_t test_vadciq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry_out, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vadciq_m(inactive, a, b, carry_out, p);
+#else /* POLYMORPHIC */
+    return vadciq_m_u32(inactive, a, b, carry_out, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vadcq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CARRY:%.*]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[TMP0]], 29
+// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]], <4 x i1> [[TMP3]])
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = lshr i32 [[TMP5]], 29
+// CHECK-NEXT:    [[TMP7:%.*]] = and i32 1, [[TMP6]]
+// CHECK-NEXT:    store i32 [[TMP7]], i32* [[CARRY]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 0
+// CHECK-NEXT:    ret <4 x i32> [[TMP8]]
+//
+int32x4_t test_vadcq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vadcq_m(inactive, a, b, carry, p);
+#else /* POLYMORPHIC */
+    return vadcq_m_s32(inactive, a, b, carry, p);
+#endif /* POLYMORPHIC */
+}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
new file mode 100644
index 00000000000..30923ee1a2b
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
@@ -0,0 +1,65 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vaddq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = add <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b)
+{
+#ifdef POLYMORPHIC
+    return vaddq(a, b);
+#else /* POLYMORPHIC */
+    return vaddq_u32(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsubq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vsubq_f16(float16x8_t a, float16x8_t b)
+{
+#ifdef POLYMORPHIC
+    return vsubq(a, b);
+#else /* POLYMORPHIC */
+    return vsubq_f16(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vaddq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vaddq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vaddq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+    return vaddq_m_s8(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsubq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vsubq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vsubq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+    return vsubq_m_f32(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
new file mode 100644
index 00000000000..ab1b0180eee
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
@@ -0,0 +1,26 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vcvttq_f16_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1)
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vcvttq_f16_f32(float16x8_t a, float32x4_t b)
+{
+    return vcvttq_f16_f32(a, b);
+}
+
+// CHECK-LABEL: @test_vcvttq_m_f16_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vcvttq_m_f16_f32(float16x8_t a, float32x4_t b, mve_pred16_t p)
+{
+    return vcvttq_m_f16_f32(a, b, p);
+}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vld24.c b/clang/test/CodeGen/arm-mve-intrinsics/vld24.c
new file mode 100644
index 00000000000..2adf6db9883
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vld24.c
@@ -0,0 +1,100 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vld2q_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0f16(half* [[ADDR:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <8 x half>, <8 x half> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T:%.*]] undef, <8 x half> [[TMP1]], 0, 0
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x half>, <8 x half> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] %2, <8 x half> [[TMP3]], 0, 1
+// CHECK-NEXT:    ret [[STRUCT_FLOAT16X8X2_T]] %4
+//
+float16x8x2_t test_vld2q_f16(const float16_t *addr)
+{
+#ifdef POLYMORPHIC
+    return vld2q(addr);
+#else /* POLYMORPHIC */
+    return vld2q_f16(addr);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vld4q_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.mve.vld4q.v16i8.p0i8(i8* [[ADDR:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T:%.*]] undef, <16 x i8> [[TMP1]], 0, 0
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T]] %2, <16 x i8> [[TMP3]], 0, 1
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T]] %4, <16 x i8> [[TMP5]], 0, 2
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T]] %6, <16 x i8> [[TMP7]], 0, 3
+// CHECK-NEXT:    ret [[STRUCT_UINT8X16X4_T]] %8
+//
+uint8x16x4_t test_vld4q_u8(const uint8_t *addr)
+{
+#ifdef POLYMORPHIC
+    return vld4q(addr);
+#else /* POLYMORPHIC */
+    return vld4q_u8(addr);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vst2q_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T:%.*]] %value.coerce, 0, 0
+// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T]] %value.coerce, 0, 1
+// CHECK-NEXT:    call void @llvm.arm.mve.vst2q.p0i32.v4i32(i32* [[ADDR:%.*]], <4 x i32> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <4 x i32> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 0)
+// CHECK-NEXT:    call void @llvm.arm.mve.vst2q.p0i32.v4i32(i32* [[ADDR]], <4 x i32> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <4 x i32> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 1)
+// CHECK-NEXT:    ret void
+//
+void test_vst2q_u32(uint32_t *addr, uint32x4x2_t value)
+{
+#ifdef POLYMORPHIC
+    vst2q(addr, value);
+#else /* POLYMORPHIC */
+    vst2q_u32(addr, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vst4q_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T:%.*]] %value.coerce, 0, 0
+// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T]] %value.coerce, 0, 1
+// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T]] %value.coerce, 0, 2
+// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T]] %value.coerce, 0, 3
+// CHECK-NEXT:    call void @llvm.arm.mve.vst4q.p0i8.v16i8(i8* [[ADDR:%.*]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 0)
+// CHECK-NEXT:    call void @llvm.arm.mve.vst4q.p0i8.v16i8(i8* [[ADDR]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 1)
+// CHECK-NEXT:    call void @llvm.arm.mve.vst4q.p0i8.v16i8(i8* [[ADDR]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 2)
+// CHECK-NEXT:    call void @llvm.arm.mve.vst4q.p0i8.v16i8(i8* [[ADDR]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 3)
+// CHECK-NEXT:    ret void
+//
+void test_vst4q_s8(int8_t *addr, int8x16x4_t value)
+{
+#ifdef POLYMORPHIC
+    vst4q(addr, value);
+#else /* POLYMORPHIC */
+    vst4q_s8(addr, value);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vst2q_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T:%.*]] %value.coerce, 0, 0
+// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] %value.coerce, 0, 1
+// CHECK-NEXT:    call void @llvm.arm.mve.vst2q.p0f16.v8f16(half* [[ADDR:%.*]], <8 x half> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <8 x half> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 0)
+// CHECK-NEXT:    call void @llvm.arm.mve.vst2q.p0f16.v8f16(half* [[ADDR]], <8 x half> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <8 x half> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 1)
+// CHECK-NEXT:    ret void
+//
+void test_vst2q_f16(float16_t *addr, float16x8x2_t value)
+{
+#ifdef POLYMORPHIC
+    vst2q(addr, value);
+#else /* POLYMORPHIC */
+    vst2q_f16(addr, value);
+#endif /* POLYMORPHIC */
+}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vldr.c b/clang/test/CodeGen/arm-mve-intrinsics/vldr.c
new file mode 100644
index 00000000000..ab253fe5ee7
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vldr.c
@@ -0,0 +1,48 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vldrwq_gather_base_wb_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> [[TMP0]], i32 80)
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 1
+// CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* [[ADDR]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 0
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+int32x4_t test_vldrwq_gather_base_wb_s32(uint32x4_t *addr)
+{
+    return vldrwq_gather_base_wb_s32(addr, 0x50);
+}
+
+// CHECK-LABEL: @test_vldrwq_gather_base_wb_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> [[TMP0]], i32 64)
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP1]], 1
+// CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* [[ADDR]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP1]], 0
+// CHECK-NEXT:    ret <4 x float> [[TMP3]]
+//
+float32x4_t test_vldrwq_gather_base_wb_f32(uint32x4_t *addr)
+{
+    return vldrwq_gather_base_wb_f32(addr, 0x40);
+}
+
+// CHECK-LABEL: @test_vldrdq_gather_base_wb_z_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> [[TMP0]], i32 656, <4 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 1
+// CHECK-NEXT:    store <2 x i64> [[TMP4]], <2 x i64>* [[ADDR]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 0
+// CHECK-NEXT:    ret <2 x i64> [[TMP5]]
+//
+uint64x2_t test_vldrdq_gather_base_wb_z_u64(uint64x2_t *addr, mve_pred16_t p)
+{
+    return vldrdq_gather_base_wb_z_u64(addr, 0x290, p);
+}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c
new file mode 100644
index 00000000000..1187c2ddba9
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c
@@ -0,0 +1,97 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vminvq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[A:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.minv.s.v16i8(i32 [[TMP0]], <16 x i8> [[B:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-NEXT:    ret i8 [[TMP2]]
+//
+int8_t test_vminvq_s8(int8_t a, int8x16_t b)
+{
+#ifdef POLYMORPHIC
+    return vminvq(a, b);
+#else /* POLYMORPHIC */
+    return vminvq_s8(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminvq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[A:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.minv.s.v8i16(i32 [[TMP0]], <8 x i16> [[B:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NEXT:    ret i16 [[TMP2]]
+//
+int16_t test_vminvq_s16(int16_t a, int16x8_t b)
+{
+#ifdef POLYMORPHIC
+    return vminvq(a, b);
+#else /* POLYMORPHIC */
+    return vminvq_s16(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminvq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.arm.mve.minv.s.v4i32(i32 [[A:%.*]], <4 x i32> [[B:%.*]])
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+int32_t test_vminvq_s32(int32_t a, int32x4_t b)
+{
+#ifdef POLYMORPHIC
+    return vminvq(a, b);
+#else /* POLYMORPHIC */
+    return vminvq_s32(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminvq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[A:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.minv.u.v16i8(i32 [[TMP0]], <16 x i8> [[B:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-NEXT:    ret i8 [[TMP2]]
+//
+uint8_t test_vminvq_u8(uint8_t a, uint8x16_t b)
+{
+#ifdef POLYMORPHIC
+    return vminvq(a, b);
+#else /* POLYMORPHIC */
+    return vminvq_u8(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminvq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[A:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.minv.u.v8i16(i32 [[TMP0]], <8 x i16> [[B:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NEXT:    ret i16 [[TMP2]]
+//
+uint16_t test_vminvq_u16(uint16_t a, uint16x8_t b)
+{
+#ifdef POLYMORPHIC
+    return vminvq(a, b);
+#else /* POLYMORPHIC */
+    return vminvq_u16(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminvq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.arm.mve.minv.u.v4i32(i32 [[A:%.*]], <4 x i32> [[B:%.*]])
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t test_vminvq_u32(uint32_t a, uint32x4_t b)
+{
+#ifdef POLYMORPHIC
+    return vminvq(a, b);
+#else /* POLYMORPHIC */
+    return vminvq_u32(a, b);
+#endif /* POLYMORPHIC */
+}
author	Simon Tatham <simon.tatham@arm.com>	2019-09-02 15:50:50 +0100
committer	Simon Tatham <simon.tatham@arm.com>	2019-10-24 16:33:13 +0100
commit	08074cc96557dcb7ec91d7cd84c412414fa9a516 (patch)
tree	11371e0a7950d1d4b5dc0e2d4476e1ba91b4672d /clang/test
parent	7c11da0cfd3396150c13657a2217f2044f314734 (diff)
download	bcm5719-llvm-08074cc96557dcb7ec91d7cd84c412414fa9a516.tar.gz bcm5719-llvm-08074cc96557dcb7ec91d7cd84c412414fa9a516.zip