diff options
author | Abderrazek Zaafrani <a.zaafrani@samsung.com> | 2017-06-20 18:54:57 +0000 |
---|---|---|
committer | Abderrazek Zaafrani <a.zaafrani@samsung.com> | 2017-06-20 18:54:57 +0000 |
commit | f10ca93f34d3c88dfed98bfef28f45c8088d4ace (patch) | |
tree | b877b3482bb5c0995d7d9bfc27da001f4987ca47 /clang/lib | |
parent | 297b6eb20df59d7c5d593b6f04fd4ea9d0e77e29 (diff) | |
download | bcm5719-llvm-f10ca93f34d3c88dfed98bfef28f45c8088d4ace.tar.gz bcm5719-llvm-f10ca93f34d3c88dfed98bfef28f45c8088d4ace.zip |
[AArch64] ADD ARMv.2-A FP16 vector intrinsics
Differential Revision: https://reviews.llvm.org/D34161
llvm-svn: 305820
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/Basic/Targets.cpp | 10 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 183 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.cpp | 1 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenTypeCache.h | 2 |
4 files changed, 189 insertions, 7 deletions
diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index a3b8330707b..e23a93e8ced 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -6172,6 +6172,8 @@ class AArch64TargetInfo : public TargetInfo { unsigned Crypto; unsigned Unaligned; unsigned V8_1A; + unsigned V8_2A; + unsigned HasFullFP16; static const Builtin::Info BuiltinInfo[]; @@ -6303,6 +6305,8 @@ public: if (V8_1A) Builder.defineMacro("__ARM_FEATURE_QRDMX", "1"); + if (V8_2A && FPU == NeonMode && HasFullFP16) + Builder.defineMacro("__ARM_FEATURE_FP16_VECTOR_ARITHMETIC", "1"); // All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work. Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); @@ -6330,6 +6334,8 @@ public: Crypto = 0; Unaligned = 1; V8_1A = 0; + V8_2A = 0; + HasFullFP16 = 0; for (const auto &Feature : Features) { if (Feature == "+neon") @@ -6342,6 +6348,10 @@ public: Unaligned = 0; if (Feature == "+v8.1a") V8_1A = 1; + if (Feature == "+v8.2a") + V8_2A = 1; + if (Feature == "+fullfp16") + HasFullFP16 = 1; } setDataLayout(); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 8f0c22d1f7e..a6451b7fc3c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2956,8 +2956,9 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); case NeonTypeFlags::Int16: case NeonTypeFlags::Poly16: - case NeonTypeFlags::Float16: return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); + case NeonTypeFlags::Float16: + return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad)); case NeonTypeFlags::Int32: return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); case NeonTypeFlags::Int64: @@ -2980,6 +2981,8 @@ static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags) { int IsQuad = IntTypeFlags.isQuad(); switch (IntTypeFlags.getEltType()) { + case NeonTypeFlags::Int16: + return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad)); case NeonTypeFlags::Int32: return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad)); case NeonTypeFlags::Int64: @@ -3127,55 +3130,80 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), NEONMAP0(vcvt_f32_v), + NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0), NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0), NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), + NEONMAP0(vcvt_s16_v), NEONMAP0(vcvt_s32_v), NEONMAP0(vcvt_s64_v), + NEONMAP0(vcvt_u16_v), NEONMAP0(vcvt_u32_v), NEONMAP0(vcvt_u64_v), + NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0), NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), + NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0), NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), + NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0), NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), + NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0), NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), + NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0), NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), + NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0), NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), + NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0), NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), + NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0), NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), + NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0), NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), + NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0), NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), + NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0), NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), + NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0), NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), + NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0), NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), + NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0), NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), + NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0), NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), NEONMAP0(vcvtq_f32_v), + NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0), NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), + NEONMAP0(vcvtq_s16_v), NEONMAP0(vcvtq_s32_v), NEONMAP0(vcvtq_s64_v), + NEONMAP0(vcvtq_u16_v), NEONMAP0(vcvtq_u32_v), NEONMAP0(vcvtq_u64_v), NEONMAP0(vext_v), @@ -3338,19 +3366,27 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vcnt_v, ctpop, Add1ArgType), NEONMAP1(vcntq_v, ctpop, Add1ArgType), NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), + NEONMAP0(vcvt_f16_v), NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), NEONMAP0(vcvt_f32_v), + NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0), NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP0(vcvtq_f16_v), NEONMAP0(vcvtq_f32_v), + NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0), NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), @@ -3819,9 +3855,20 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vcageq_v: case NEON::BI__builtin_neon_vcagt_v: case NEON::BI__builtin_neon_vcagtq_v: { - llvm::Type *VecFlt = llvm::VectorType::get( - VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy, - VTy->getNumElements()); + llvm::Type *Ty; + switch (VTy->getScalarSizeInBits()) { + default: llvm_unreachable("unexpected type"); + case 32: + Ty = FloatTy; + break; + case 64: + Ty = DoubleTy; + break; + case 16: + Ty = HalfTy; + break; + } + llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements()); llvm::Type *Tys[] = { VTy, VecFlt }; Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); return EmitNeonCall(F, Ops, NameHint); @@ -3838,8 +3885,16 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); + case NEON::BI__builtin_neon_vcvt_f16_v: + case NEON::BI__builtin_neon_vcvtq_f16_v: + Ops[0] = Builder.CreateBitCast(Ops[0], Ty); + Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad)); + return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") + : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); + case NEON::BI__builtin_neon_vcvt_n_f16_v: case NEON::BI__builtin_neon_vcvt_n_f32_v: case NEON::BI__builtin_neon_vcvt_n_f64_v: + case NEON::BI__builtin_neon_vcvtq_n_f16_v: case NEON::BI__builtin_neon_vcvtq_n_f32_v: case NEON::BI__builtin_neon_vcvtq_n_f64_v: { llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; @@ -3847,11 +3902,15 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Function *F = CGM.getIntrinsic(Int, Tys); return EmitNeonCall(F, Ops, "vcvt_n"); } + case NEON::BI__builtin_neon_vcvt_n_s16_v: case NEON::BI__builtin_neon_vcvt_n_s32_v: + case NEON::BI__builtin_neon_vcvt_n_u16_v: case NEON::BI__builtin_neon_vcvt_n_u32_v: case NEON::BI__builtin_neon_vcvt_n_s64_v: case NEON::BI__builtin_neon_vcvt_n_u64_v: + case NEON::BI__builtin_neon_vcvtq_n_s16_v: case NEON::BI__builtin_neon_vcvtq_n_s32_v: + case NEON::BI__builtin_neon_vcvtq_n_u16_v: case NEON::BI__builtin_neon_vcvtq_n_u32_v: case NEON::BI__builtin_neon_vcvtq_n_s64_v: case NEON::BI__builtin_neon_vcvtq_n_u64_v: { @@ -3863,44 +3922,63 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vcvt_u32_v: case NEON::BI__builtin_neon_vcvt_s64_v: case NEON::BI__builtin_neon_vcvt_u64_v: + case NEON::BI__builtin_neon_vcvt_s16_v: + case NEON::BI__builtin_neon_vcvt_u16_v: case NEON::BI__builtin_neon_vcvtq_s32_v: case NEON::BI__builtin_neon_vcvtq_u32_v: case NEON::BI__builtin_neon_vcvtq_s64_v: - case NEON::BI__builtin_neon_vcvtq_u64_v: { + case NEON::BI__builtin_neon_vcvtq_u64_v: + case NEON::BI__builtin_neon_vcvtq_s16_v: + case NEON::BI__builtin_neon_vcvtq_u16_v: { Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); } + case NEON::BI__builtin_neon_vcvta_s16_v: case NEON::BI__builtin_neon_vcvta_s32_v: case NEON::BI__builtin_neon_vcvta_s64_v: case NEON::BI__builtin_neon_vcvta_u32_v: case NEON::BI__builtin_neon_vcvta_u64_v: + case NEON::BI__builtin_neon_vcvtaq_s16_v: case NEON::BI__builtin_neon_vcvtaq_s32_v: case NEON::BI__builtin_neon_vcvtaq_s64_v: + case NEON::BI__builtin_neon_vcvtaq_u16_v: case NEON::BI__builtin_neon_vcvtaq_u32_v: case NEON::BI__builtin_neon_vcvtaq_u64_v: + case NEON::BI__builtin_neon_vcvtn_s16_v: case NEON::BI__builtin_neon_vcvtn_s32_v: case NEON::BI__builtin_neon_vcvtn_s64_v: + case NEON::BI__builtin_neon_vcvtn_u16_v: case NEON::BI__builtin_neon_vcvtn_u32_v: case NEON::BI__builtin_neon_vcvtn_u64_v: + case NEON::BI__builtin_neon_vcvtnq_s16_v: case NEON::BI__builtin_neon_vcvtnq_s32_v: case NEON::BI__builtin_neon_vcvtnq_s64_v: + case NEON::BI__builtin_neon_vcvtnq_u16_v: case NEON::BI__builtin_neon_vcvtnq_u32_v: case NEON::BI__builtin_neon_vcvtnq_u64_v: + case NEON::BI__builtin_neon_vcvtp_s16_v: case NEON::BI__builtin_neon_vcvtp_s32_v: case NEON::BI__builtin_neon_vcvtp_s64_v: + case NEON::BI__builtin_neon_vcvtp_u16_v: case NEON::BI__builtin_neon_vcvtp_u32_v: case NEON::BI__builtin_neon_vcvtp_u64_v: + case NEON::BI__builtin_neon_vcvtpq_s16_v: case NEON::BI__builtin_neon_vcvtpq_s32_v: case NEON::BI__builtin_neon_vcvtpq_s64_v: + case NEON::BI__builtin_neon_vcvtpq_u16_v: case NEON::BI__builtin_neon_vcvtpq_u32_v: case NEON::BI__builtin_neon_vcvtpq_u64_v: + case NEON::BI__builtin_neon_vcvtm_s16_v: case NEON::BI__builtin_neon_vcvtm_s32_v: case NEON::BI__builtin_neon_vcvtm_s64_v: + case NEON::BI__builtin_neon_vcvtm_u16_v: case NEON::BI__builtin_neon_vcvtm_u32_v: case NEON::BI__builtin_neon_vcvtm_u64_v: + case NEON::BI__builtin_neon_vcvtmq_s16_v: case NEON::BI__builtin_neon_vcvtmq_s32_v: case NEON::BI__builtin_neon_vcvtmq_s64_v: + case NEON::BI__builtin_neon_vcvtmq_u16_v: case NEON::BI__builtin_neon_vcvtmq_u32_v: case NEON::BI__builtin_neon_vcvtmq_u64_v: { llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; @@ -6110,7 +6188,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); } + case NEON::BI__builtin_neon_vfmah_lane_f16: case NEON::BI__builtin_neon_vfmas_lane_f32: + case NEON::BI__builtin_neon_vfmah_laneq_f16: case NEON::BI__builtin_neon_vfmas_laneq_f32: case NEON::BI__builtin_neon_vfmad_lane_f64: case NEON::BI__builtin_neon_vfmad_laneq_f64: { @@ -6285,18 +6365,25 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvt_u32_v: case NEON::BI__builtin_neon_vcvt_s64_v: case NEON::BI__builtin_neon_vcvt_u64_v: + case NEON::BI__builtin_neon_vcvt_s16_v: + case NEON::BI__builtin_neon_vcvt_u16_v: case NEON::BI__builtin_neon_vcvtq_s32_v: case NEON::BI__builtin_neon_vcvtq_u32_v: case NEON::BI__builtin_neon_vcvtq_s64_v: - case NEON::BI__builtin_neon_vcvtq_u64_v: { + case NEON::BI__builtin_neon_vcvtq_u64_v: + case NEON::BI__builtin_neon_vcvtq_s16_v: + case NEON::BI__builtin_neon_vcvtq_u16_v: { Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); if (usgn) return Builder.CreateFPToUI(Ops[0], Ty); return Builder.CreateFPToSI(Ops[0], Ty); } + case NEON::BI__builtin_neon_vcvta_s16_v: case NEON::BI__builtin_neon_vcvta_s32_v: + case NEON::BI__builtin_neon_vcvtaq_s16_v: case NEON::BI__builtin_neon_vcvtaq_s32_v: case NEON::BI__builtin_neon_vcvta_u32_v: + case NEON::BI__builtin_neon_vcvtaq_u16_v: case NEON::BI__builtin_neon_vcvtaq_u32_v: case NEON::BI__builtin_neon_vcvta_s64_v: case NEON::BI__builtin_neon_vcvtaq_s64_v: @@ -6306,9 +6393,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); } + case NEON::BI__builtin_neon_vcvtm_s16_v: case NEON::BI__builtin_neon_vcvtm_s32_v: + case NEON::BI__builtin_neon_vcvtmq_s16_v: case NEON::BI__builtin_neon_vcvtmq_s32_v: + case NEON::BI__builtin_neon_vcvtm_u16_v: case NEON::BI__builtin_neon_vcvtm_u32_v: + case NEON::BI__builtin_neon_vcvtmq_u16_v: case NEON::BI__builtin_neon_vcvtmq_u32_v: case NEON::BI__builtin_neon_vcvtm_s64_v: case NEON::BI__builtin_neon_vcvtmq_s64_v: @@ -6318,9 +6409,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); } + case NEON::BI__builtin_neon_vcvtn_s16_v: case NEON::BI__builtin_neon_vcvtn_s32_v: + case NEON::BI__builtin_neon_vcvtnq_s16_v: case NEON::BI__builtin_neon_vcvtnq_s32_v: + case NEON::BI__builtin_neon_vcvtn_u16_v: case NEON::BI__builtin_neon_vcvtn_u32_v: + case NEON::BI__builtin_neon_vcvtnq_u16_v: case NEON::BI__builtin_neon_vcvtnq_u32_v: case NEON::BI__builtin_neon_vcvtn_s64_v: case NEON::BI__builtin_neon_vcvtnq_s64_v: @@ -6330,9 +6425,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); } + case NEON::BI__builtin_neon_vcvtp_s16_v: case NEON::BI__builtin_neon_vcvtp_s32_v: + case NEON::BI__builtin_neon_vcvtpq_s16_v: case NEON::BI__builtin_neon_vcvtpq_s32_v: + case NEON::BI__builtin_neon_vcvtp_u16_v: case NEON::BI__builtin_neon_vcvtp_u32_v: + case NEON::BI__builtin_neon_vcvtpq_u16_v: case NEON::BI__builtin_neon_vcvtpq_u32_v: case NEON::BI__builtin_neon_vcvtp_s64_v: case NEON::BI__builtin_neon_vcvtpq_s64_v: @@ -6505,6 +6604,24 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } + case NEON::BI__builtin_neon_vmaxv_f16: { + Int = Intrinsic::aarch64_neon_fmaxv; + Ty = HalfTy; + VTy = llvm::VectorType::get(HalfTy, 4); + llvm::Type *Tys[2] = { Ty, VTy }; + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); + return Builder.CreateTrunc(Ops[0], HalfTy); + } + case NEON::BI__builtin_neon_vmaxvq_f16: { + Int = Intrinsic::aarch64_neon_fmaxv; + Ty = HalfTy; + VTy = llvm::VectorType::get(HalfTy, 8); + llvm::Type *Tys[2] = { Ty, VTy }; + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); + return Builder.CreateTrunc(Ops[0], HalfTy); + } case NEON::BI__builtin_neon_vminv_u8: { Int = Intrinsic::aarch64_neon_uminv; Ty = Int32Ty; @@ -6577,6 +6694,60 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } + case NEON::BI__builtin_neon_vminv_f16: { + Int = Intrinsic::aarch64_neon_fminv; + Ty = HalfTy; + VTy = llvm::VectorType::get(HalfTy, 4); + llvm::Type *Tys[2] = { Ty, VTy }; + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); + return Builder.CreateTrunc(Ops[0], HalfTy); + } + case NEON::BI__builtin_neon_vminvq_f16: { + Int = Intrinsic::aarch64_neon_fminv; + Ty = HalfTy; + VTy = llvm::VectorType::get(HalfTy, 8); + llvm::Type *Tys[2] = { Ty, VTy }; + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); + return Builder.CreateTrunc(Ops[0], HalfTy); + } + case NEON::BI__builtin_neon_vmaxnmv_f16: { + Int = Intrinsic::aarch64_neon_fmaxnmv; + Ty = HalfTy; + VTy = llvm::VectorType::get(HalfTy, 4); + llvm::Type *Tys[2] = { Ty, VTy }; + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); + return Builder.CreateTrunc(Ops[0], HalfTy); + } + case NEON::BI__builtin_neon_vmaxnmvq_f16: { + Int = Intrinsic::aarch64_neon_fmaxnmv; + Ty = HalfTy; + VTy = llvm::VectorType::get(HalfTy, 8); + llvm::Type *Tys[2] = { Ty, VTy }; + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); + return Builder.CreateTrunc(Ops[0], HalfTy); + } + case NEON::BI__builtin_neon_vminnmv_f16: { + Int = Intrinsic::aarch64_neon_fminnmv; + Ty = HalfTy; + VTy = llvm::VectorType::get(HalfTy, 4); + llvm::Type *Tys[2] = { Ty, VTy }; + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); + return Builder.CreateTrunc(Ops[0], HalfTy); + } + case NEON::BI__builtin_neon_vminnmvq_f16: { + Int = Intrinsic::aarch64_neon_fminnmv; + Ty = HalfTy; + VTy = llvm::VectorType::get(HalfTy, 8); + llvm::Type *Tys[2] = { Ty, VTy }; + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); + return Builder.CreateTrunc(Ops[0], HalfTy); + } case NEON::BI__builtin_neon_vmul_n_f64: { Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 20d945fe50d..5319ccec163 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -98,6 +98,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, Int16Ty = llvm::Type::getInt16Ty(LLVMContext); Int32Ty = llvm::Type::getInt32Ty(LLVMContext); Int64Ty = llvm::Type::getInt64Ty(LLVMContext); + HalfTy = llvm::Type::getHalfTy(LLVMContext); FloatTy = llvm::Type::getFloatTy(LLVMContext); DoubleTy = llvm::Type::getDoubleTy(LLVMContext); PointerWidthInBits = C.getTargetInfo().getPointerWidth(0); diff --git a/clang/lib/CodeGen/CodeGenTypeCache.h b/clang/lib/CodeGen/CodeGenTypeCache.h index 450eab48a3b..6910d36733d 100644 --- a/clang/lib/CodeGen/CodeGenTypeCache.h +++ b/clang/lib/CodeGen/CodeGenTypeCache.h @@ -36,7 +36,7 @@ struct CodeGenTypeCache { /// i8, i16, i32, and i64 llvm::IntegerType *Int8Ty, *Int16Ty, *Int32Ty, *Int64Ty; /// float, double - llvm::Type *FloatTy, *DoubleTy; + llvm::Type *HalfTy, *FloatTy, *DoubleTy; /// int llvm::IntegerType *IntTy; |