summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Basic/arm_neon.td18
-rw-r--r--clang/lib/Basic/Targets/AArch64.cpp1
-rw-r--r--clang/lib/Basic/Targets/ARM.cpp12
-rw-r--r--clang/lib/Basic/Targets/ARM.h3
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp8
-rw-r--r--clang/test/CodeGen/aarch64-neon-vcadd.c63
-rw-r--r--clang/test/CodeGen/arm-neon-vcadd.c51
7 files changed, 155 insertions, 1 deletions
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index b5e395c8103..a4dc21b6431 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -1673,3 +1673,21 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__)" in {
def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN_Hi>;
def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi>;
}
+
+// v8.3-A Vector complex addition intrinsics
+let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
+ def VCADD_ROT90_FP16 : SInst<"vcadd_rot90", "...", "h">;
+ def VCADD_ROT270_FP16 : SInst<"vcadd_rot270", "...", "h">;
+ def VCADDQ_ROT90_FP16 : SInst<"vcaddq_rot90", "QQQ", "h">;
+ def VCADDQ_ROT270_FP16 : SInst<"vcaddq_rot270", "QQQ", "h">;
+}
+let ArchGuard = "defined(__ARM_FEATURE_COMPLEX)" in {
+ def VCADD_ROT90 : SInst<"vcadd_rot90", "...", "f">;
+ def VCADD_ROT270 : SInst<"vcadd_rot270", "...", "f">;
+ def VCADDQ_ROT90 : SInst<"vcaddq_rot90", "QQQ", "f">;
+ def VCADDQ_ROT270 : SInst<"vcaddq_rot270", "QQQ", "f">;
+}
+let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__aarch64__)" in {
+ def VCADDQ_ROT90_FP64 : SInst<"vcaddq_rot90", "QQQ", "d">;
+ def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">;
+} \ No newline at end of file
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 5214f7c30ee..cba3e3ada7e 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -158,6 +158,7 @@ void AArch64TargetInfo::getTargetDefinesARMV82A(const LangOptions &Opts,
void AArch64TargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts,
MacroBuilder &Builder) const {
+ Builder.defineMacro("__ARM_FEATURE_COMPLEX", "1");
Builder.defineMacro("__ARM_FEATURE_JCVT", "1");
// Also include the Armv8.2 defines
getTargetDefinesARMV82A(Opts, Builder);
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index 437a77afdc9..be088e81cff 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -580,6 +580,13 @@ void ARMTargetInfo::getTargetDefinesARMV82A(const LangOptions &Opts,
getTargetDefinesARMV81A(Opts, Builder);
}
+void ARMTargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts,
+ MacroBuilder &Builder) const {
+ // Also include the ARMv8.2-A defines
+ Builder.defineMacro("__ARM_FEATURE_COMPLEX", "1");
+ getTargetDefinesARMV82A(Opts, Builder);
+}
+
void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
// Target identification.
@@ -809,6 +816,11 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
case llvm::ARM::ArchKind::ARMV8_2A:
getTargetDefinesARMV82A(Opts, Builder);
break;
+ case llvm::ARM::ArchKind::ARMV8_3A:
+ case llvm::ARM::ArchKind::ARMV8_4A:
+ case llvm::ARM::ArchKind::ARMV8_5A:
+ getTargetDefinesARMV83A(Opts, Builder);
+ break;
}
}
diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h
index ce87a626593..9696a440458 100644
--- a/clang/lib/Basic/Targets/ARM.h
+++ b/clang/lib/Basic/Targets/ARM.h
@@ -148,9 +148,10 @@ public:
void getTargetDefinesARMV81A(const LangOptions &Opts,
MacroBuilder &Builder) const;
-
void getTargetDefinesARMV82A(const LangOptions &Opts,
MacroBuilder &Builder) const;
+ void getTargetDefinesARMV83A(const LangOptions &Opts,
+ MacroBuilder &Builder) const;
void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override;
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 26044f53e49..68706d78cd1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4454,6 +4454,10 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
+ NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
NEONMAP1(vcage_v, arm_neon_vacge, 0),
NEONMAP1(vcageq_v, arm_neon_vacge, 0),
NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
@@ -4727,6 +4731,10 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
+ NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
NEONMAP1(vcage_v, aarch64_neon_facge, 0),
NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
diff --git a/clang/test/CodeGen/aarch64-neon-vcadd.c b/clang/test/CodeGen/aarch64-neon-vcadd.c
new file mode 100644
index 00000000000..6f1b3dcd401
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-neon-vcadd.c
@@ -0,0 +1,63 @@
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.3-a+fp16 %s -S -emit-llvm -o - | FileCheck %s
+
+#include <arm_neon.h>
+
+void foo16x4_rot90(float16x4_t a, float16x4_t b)
+{
+// CHECK: call <4 x half> @llvm.aarch64.neon.vcadd.rot90.v4f16
+ float16x4_t result = vcadd_rot90_f16(a, b);
+}
+
+void foo32x2_rot90(float32x2_t a, float32x2_t b)
+{
+// CHECK: call <2 x float> @llvm.aarch64.neon.vcadd.rot90.v2f32
+ float32x2_t result = vcadd_rot90_f32(a, b);
+}
+
+void foo16x8_rot90(float16x8_t a, float16x8_t b)
+{
+// CHECK: call <8 x half> @llvm.aarch64.neon.vcadd.rot90.v8f16
+ float16x8_t result = vcaddq_rot90_f16(a, b);
+}
+
+void foo32x4_rot90(float32x4_t a, float32x4_t b)
+{
+// CHECK: call <4 x float> @llvm.aarch64.neon.vcadd.rot90.v4f32
+ float32x4_t result = vcaddq_rot90_f32(a, b);
+}
+
+void foo64x2_rot90(float64x2_t a, float64x2_t b)
+{
+// CHECK: call <2 x double> @llvm.aarch64.neon.vcadd.rot90.v2f64
+ float64x2_t result = vcaddq_rot90_f64(a, b);
+}
+
+void foo16x4_rot270(float16x4_t a, float16x4_t b)
+{
+// CHECK: call <4 x half> @llvm.aarch64.neon.vcadd.rot270.v4f16
+ float16x4_t result = vcadd_rot270_f16(a, b);
+}
+
+void foo32x2_rot270(float32x2_t a, float32x2_t b)
+{
+// CHECK: call <2 x float> @llvm.aarch64.neon.vcadd.rot270.v2f32
+ float32x2_t result = vcadd_rot270_f32(a, b);
+}
+
+void foo16x8_rot270(float16x8_t a, float16x8_t b)
+{
+// CHECK: call <8 x half> @llvm.aarch64.neon.vcadd.rot270.v8f16
+ float16x8_t result = vcaddq_rot270_f16(a, b);
+}
+
+void foo32x4_rot270(float32x4_t a, float32x4_t b)
+{
+// CHECK: call <4 x float> @llvm.aarch64.neon.vcadd.rot270.v4f32
+ float32x4_t result = vcaddq_rot270_f32(a, b);
+}
+
+void foo64x2_rot270(float64x2_t a, float64x2_t b)
+{
+// CHECK: call <2 x double> @llvm.aarch64.neon.vcadd.rot270.v2f64
+ float64x2_t result = vcaddq_rot270_f64(a, b);
+}
diff --git a/clang/test/CodeGen/arm-neon-vcadd.c b/clang/test/CodeGen/arm-neon-vcadd.c
new file mode 100644
index 00000000000..4b9cf34c486
--- /dev/null
+++ b/clang/test/CodeGen/arm-neon-vcadd.c
@@ -0,0 +1,51 @@
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.3-a+fp16 %s -S -emit-llvm -o - | opt -S -sroa | FileCheck %s
+
+#include <arm_neon.h>
+
+void foo16x4_rot90(float16x4_t a, float16x4_t b)
+{
+// CHECK: call <4 x half> @llvm.arm.neon.vcadd.rot90.v4f16
+ float16x4_t result = vcadd_rot90_f16(a, b);
+}
+
+void foo32x2_rot90(float32x2_t a, float32x2_t b)
+{
+// CHECK: call <2 x float> @llvm.arm.neon.vcadd.rot90.v2f32
+ float32x2_t result = vcadd_rot90_f32(a, b);
+}
+
+void foo16x8_rot90(float16x8_t a, float16x8_t b)
+{
+// CHECK: call <8 x half> @llvm.arm.neon.vcadd.rot90.v8f16
+ float16x8_t result = vcaddq_rot90_f16(a, b);
+}
+
+void foo32x4_rot90(float32x4_t a, float32x4_t b)
+{
+// CHECK: call <4 x float> @llvm.arm.neon.vcadd.rot90.v4f32
+ float32x4_t result = vcaddq_rot90_f32(a, b);
+}
+
+void foo16x4_rot270(float16x4_t a, float16x4_t b)
+{
+// CHECK: call <4 x half> @llvm.arm.neon.vcadd.rot270.v4f16
+ float16x4_t result = vcadd_rot270_f16(a, b);
+}
+
+void foo32x2_rot270(float32x2_t a, float32x2_t b)
+{
+// CHECK: call <2 x float> @llvm.arm.neon.vcadd.rot270.v2f32
+ float32x2_t result = vcadd_rot270_f32(a, b);
+}
+
+void foo16x8_rot270(float16x8_t a, float16x8_t b)
+{
+// CHECK: call <8 x half> @llvm.arm.neon.vcadd.rot270.v8f16
+ float16x8_t result = vcaddq_rot270_f16(a, b);
+}
+
+void foo32x4_rot270(float32x4_t a, float32x4_t b)
+{
+// CHECK: call <4 x float> @llvm.arm.neon.vcadd.rot270.v4f32
+ float32x4_t result = vcaddq_rot270_f32(a, b);
+}
OpenPOWER on IntegriCloud