From 25e8a6754e3f4c447ddfe5b742c01c16cb050b67 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Sat, 24 May 2014 12:51:25 +0000 Subject: AArch64/ARM64: update Clang after AArch64 removal. A few (mostly CodeGen) parts of Clang were tightly coupled to the AArch64 backend. Now that it's gone, they will not even compile. I've also deduplicated RUN lines in many of the AArch64 tests. This might improve "make check-all" time noticably: some of those NEON tests were monsters. llvm-svn: 209578 --- clang/lib/CodeGen/CGBuiltin.cpp | 1307 +-------------------------------------- 1 file changed, 2 insertions(+), 1305 deletions(-) (limited to 'clang/lib/CodeGen/CGBuiltin.cpp') diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 9d692d8e1e2..585db1778bf 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1637,14 +1637,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (getTarget().getTriple().getArch()) { - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_be: - return EmitAArch64BuiltinExpr(BuiltinID, E); case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: return EmitARMBuiltinExpr(BuiltinID, E); + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: case llvm::Triple::arm64: case llvm::Triple::arm64_be: return EmitARM64BuiltinExpr(BuiltinID, E); @@ -1883,354 +1882,6 @@ enum { Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ #NameBase, TypeModifier } -static const NeonIntrinsicInfo AArch64SISDIntrinsicInfo[] = { - NEONMAP1(vabdd_f64, aarch64_neon_vabd, AddRetType), - NEONMAP1(vabds_f32, aarch64_neon_vabd, AddRetType), - NEONMAP1(vabsd_s64, aarch64_neon_vabs, 0), - NEONMAP1(vaddd_s64, aarch64_neon_vaddds, 0), - NEONMAP1(vaddd_u64, aarch64_neon_vadddu, 0), - NEONMAP1(vaddlv_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vaddv_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vaddvq_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vaddvq_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_s64, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u64, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vcaged_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcages_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcagtd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vcagts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vcaled_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcales_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcaltd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vcalts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vceqd_f64, aarch64_neon_fceq, VectorRet | Add2ArgTypes), - NEONMAP1(vceqd_s64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqd_u64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqs_f32, aarch64_neon_fceq, VectorRet | Add2ArgTypes), - NEONMAP1(vceqzd_f64, aarch64_neon_fceq, FpCmpzModifiers), - NEONMAP1(vceqzd_s64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqzd_u64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqzs_f32, aarch64_neon_fceq, FpCmpzModifiers), - NEONMAP1(vcged_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vcged_s64, aarch64_neon_vcge, VectorRetGetArgs01), - NEONMAP1(vcged_u64, aarch64_neon_vchs, VectorRetGetArgs01), - NEONMAP1(vcges_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vcgezd_f64, aarch64_neon_fcge, FpCmpzModifiers), - NEONMAP1(vcgezd_s64, aarch64_neon_vcge, VectorRetGetArgs01), - NEONMAP1(vcgezs_f32, aarch64_neon_fcge, FpCmpzModifiers), - NEONMAP1(vcgtd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcgtd_s64, aarch64_neon_vcgt, VectorRetGetArgs01), - NEONMAP1(vcgtd_u64, aarch64_neon_vchi, VectorRetGetArgs01), - NEONMAP1(vcgts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcgtzd_f64, aarch64_neon_fcgt, FpCmpzModifiers), - NEONMAP1(vcgtzd_s64, aarch64_neon_vcgt, VectorRetGetArgs01), - NEONMAP1(vcgtzs_f32, aarch64_neon_fcgt, FpCmpzModifiers), - NEONMAP1(vcled_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vcled_s64, aarch64_neon_vcge, VectorRetGetArgs01), - NEONMAP1(vcled_u64, aarch64_neon_vchs, VectorRetGetArgs01), - NEONMAP1(vcles_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vclezd_f64, aarch64_neon_fclez, FpCmpzModifiers), - NEONMAP1(vclezd_s64, aarch64_neon_vclez, VectorRetGetArgs01), - NEONMAP1(vclezs_f32, aarch64_neon_fclez, FpCmpzModifiers), - NEONMAP1(vcltd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcltd_s64, aarch64_neon_vcgt, VectorRetGetArgs01), - NEONMAP1(vcltd_u64, aarch64_neon_vchi, VectorRetGetArgs01), - NEONMAP1(vclts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcltzd_f64, aarch64_neon_fcltz, FpCmpzModifiers), - NEONMAP1(vcltzd_s64, aarch64_neon_vcltz, VectorRetGetArgs01), - NEONMAP1(vcltzs_f32, aarch64_neon_fcltz, FpCmpzModifiers), - NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, VectorRet | Add1ArgType), - NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, VectorRet | Add1ArgType), - NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, VectorRet | Add1ArgType), - NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_f64_s64, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_f64_u64, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, VectorRet | Add1ArgType), - NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, VectorRet | Add1ArgType), - NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, VectorRet | Add1ArgType), - NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, VectorRet | Add1ArgType), - NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, VectorRet | Add1ArgType), - NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, VectorRet | Add1ArgType), - NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, VectorRet | Add1ArgType), - NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, VectorRet | Add1ArgType), - NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, VectorRet | Add1ArgType), - NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, VectorRet | Add1ArgType), - NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, VectorRet | Add1ArgType), - NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, VectorRet | Add1ArgType), - NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, VectorRet | Add1ArgType), - NEONMAP1(vcvts_f32_s32, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_f32_u32, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType), - NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType), - NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, VectorRet | Add1ArgType), - NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, VectorRet | Add1ArgType), - NEONMAP1(vcvtxd_f32_f64, aarch64_neon_fcvtxn, 0), - NEONMAP0(vdupb_lane_i8), - NEONMAP0(vdupb_laneq_i8), - NEONMAP0(vdupd_lane_f64), - NEONMAP0(vdupd_lane_i64), - NEONMAP0(vdupd_laneq_f64), - NEONMAP0(vdupd_laneq_i64), - NEONMAP0(vduph_lane_i16), - NEONMAP0(vduph_laneq_i16), - NEONMAP0(vdups_lane_f32), - NEONMAP0(vdups_lane_i32), - NEONMAP0(vdups_laneq_f32), - NEONMAP0(vdups_laneq_i32), - NEONMAP0(vfmad_lane_f64), - NEONMAP0(vfmad_laneq_f64), - NEONMAP0(vfmas_lane_f32), - NEONMAP0(vfmas_laneq_f32), - NEONMAP0(vget_lane_f32), - NEONMAP0(vget_lane_f64), - NEONMAP0(vget_lane_i16), - NEONMAP0(vget_lane_i32), - NEONMAP0(vget_lane_i64), - NEONMAP0(vget_lane_i8), - NEONMAP0(vgetq_lane_f32), - NEONMAP0(vgetq_lane_f64), - NEONMAP0(vgetq_lane_i16), - NEONMAP0(vgetq_lane_i32), - NEONMAP0(vgetq_lane_i64), - NEONMAP0(vgetq_lane_i8), - NEONMAP1(vmaxnmv_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vmaxnmvq_f32, aarch64_neon_vmaxnmv, 0), - NEONMAP1(vmaxnmvq_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vmaxv_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vmaxv_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_f32, aarch64_neon_vmaxv, 0), - NEONMAP1(vmaxvq_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vmaxvq_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vminnmv_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vminnmvq_f32, aarch64_neon_vminnmv, 0), - NEONMAP1(vminnmvq_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vminv_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vminv_s16, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_s32, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_s8, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_u16, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_u32, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_u8, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_f32, aarch64_neon_vminv, 0), - NEONMAP1(vminvq_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vminvq_s16, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_s32, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_s8, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_u16, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_u32, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_u8, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP0(vmul_n_f64), - NEONMAP1(vmull_p64, aarch64_neon_vmull_p64, 0), - NEONMAP0(vmulxd_f64), - NEONMAP0(vmulxs_f32), - NEONMAP1(vnegd_s64, aarch64_neon_vneg, 0), - NEONMAP1(vpaddd_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vpaddd_s64, aarch64_neon_vpadd, 0), - NEONMAP1(vpaddd_u64, aarch64_neon_vpadd, 0), - NEONMAP1(vpadds_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vpmaxnmqd_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vpmaxnms_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vpmaxqd_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vpmaxs_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vpminnmqd_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vpminnms_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vpminqd_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vpmins_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vqabsb_s8, arm_neon_vqabs, VectorRet), - NEONMAP1(vqabsd_s64, arm_neon_vqabs, VectorRet), - NEONMAP1(vqabsh_s16, arm_neon_vqabs, VectorRet), - NEONMAP1(vqabss_s32, arm_neon_vqabs, VectorRet), - NEONMAP1(vqaddb_s8, arm_neon_vqadds, VectorRet), - NEONMAP1(vqaddb_u8, arm_neon_vqaddu, VectorRet), - NEONMAP1(vqaddd_s64, arm_neon_vqadds, VectorRet), - NEONMAP1(vqaddd_u64, arm_neon_vqaddu, VectorRet), - NEONMAP1(vqaddh_s16, arm_neon_vqadds, VectorRet), - NEONMAP1(vqaddh_u16, arm_neon_vqaddu, VectorRet), - NEONMAP1(vqadds_s32, arm_neon_vqadds, VectorRet), - NEONMAP1(vqadds_u32, arm_neon_vqaddu, VectorRet), - NEONMAP0(vqdmlalh_lane_s16), - NEONMAP0(vqdmlalh_laneq_s16), - NEONMAP1(vqdmlalh_s16, aarch64_neon_vqdmlal, VectorRet), - NEONMAP0(vqdmlals_lane_s32), - NEONMAP0(vqdmlals_laneq_s32), - NEONMAP1(vqdmlals_s32, aarch64_neon_vqdmlal, VectorRet), - NEONMAP0(vqdmlslh_lane_s16), - NEONMAP0(vqdmlslh_laneq_s16), - NEONMAP1(vqdmlslh_s16, aarch64_neon_vqdmlsl, VectorRet), - NEONMAP0(vqdmlsls_lane_s32), - NEONMAP0(vqdmlsls_laneq_s32), - NEONMAP1(vqdmlsls_s32, aarch64_neon_vqdmlsl, VectorRet), - NEONMAP1(vqdmulhh_s16, arm_neon_vqdmulh, VectorRet), - NEONMAP1(vqdmulhs_s32, arm_neon_vqdmulh, VectorRet), - NEONMAP1(vqdmullh_s16, arm_neon_vqdmull, VectorRet), - NEONMAP1(vqdmulls_s32, arm_neon_vqdmull, VectorRet), - NEONMAP1(vqmovnd_s64, arm_neon_vqmovns, VectorRet), - NEONMAP1(vqmovnd_u64, arm_neon_vqmovnu, VectorRet), - NEONMAP1(vqmovnh_s16, arm_neon_vqmovns, VectorRet), - NEONMAP1(vqmovnh_u16, arm_neon_vqmovnu, VectorRet), - NEONMAP1(vqmovns_s32, arm_neon_vqmovns, VectorRet), - NEONMAP1(vqmovns_u32, arm_neon_vqmovnu, VectorRet), - NEONMAP1(vqmovund_s64, arm_neon_vqmovnsu, VectorRet), - NEONMAP1(vqmovunh_s16, arm_neon_vqmovnsu, VectorRet), - NEONMAP1(vqmovuns_s32, arm_neon_vqmovnsu, VectorRet), - NEONMAP1(vqnegb_s8, arm_neon_vqneg, VectorRet), - NEONMAP1(vqnegd_s64, arm_neon_vqneg, VectorRet), - NEONMAP1(vqnegh_s16, arm_neon_vqneg, VectorRet), - NEONMAP1(vqnegs_s32, arm_neon_vqneg, VectorRet), - NEONMAP1(vqrdmulhh_s16, arm_neon_vqrdmulh, VectorRet), - NEONMAP1(vqrdmulhs_s32, arm_neon_vqrdmulh, VectorRet), - NEONMAP1(vqrshlb_s8, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshlb_u8, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshld_s64, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshld_u64, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshlh_s16, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshlh_u16, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshls_s32, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshls_u32, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshrnd_n_s64, aarch64_neon_vsqrshrn, VectorRet), - NEONMAP1(vqrshrnd_n_u64, aarch64_neon_vuqrshrn, VectorRet), - NEONMAP1(vqrshrnh_n_s16, aarch64_neon_vsqrshrn, VectorRet), - NEONMAP1(vqrshrnh_n_u16, aarch64_neon_vuqrshrn, VectorRet), - NEONMAP1(vqrshrns_n_s32, aarch64_neon_vsqrshrn, VectorRet), - NEONMAP1(vqrshrns_n_u32, aarch64_neon_vuqrshrn, VectorRet), - NEONMAP1(vqrshrund_n_s64, aarch64_neon_vsqrshrun, VectorRet), - NEONMAP1(vqrshrunh_n_s16, aarch64_neon_vsqrshrun, VectorRet), - NEONMAP1(vqrshruns_n_s32, aarch64_neon_vsqrshrun, VectorRet), - NEONMAP1(vqshlb_n_s8, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshlb_n_u8, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshlb_s8, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshlb_u8, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshld_n_s64, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshld_n_u64, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshld_s64, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshld_u64, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshlh_n_s16, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshlh_n_u16, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshlh_s16, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshlh_u16, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshls_n_s32, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshls_n_u32, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshls_s32, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshls_u32, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshlub_n_s8, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshlud_n_s64, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshluh_n_s16, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshlus_n_s32, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshrnd_n_s64, aarch64_neon_vsqshrn, VectorRet), - NEONMAP1(vqshrnd_n_u64, aarch64_neon_vuqshrn, VectorRet), - NEONMAP1(vqshrnh_n_s16, aarch64_neon_vsqshrn, VectorRet), - NEONMAP1(vqshrnh_n_u16, aarch64_neon_vuqshrn, VectorRet), - NEONMAP1(vqshrns_n_s32, aarch64_neon_vsqshrn, VectorRet), - NEONMAP1(vqshrns_n_u32, aarch64_neon_vuqshrn, VectorRet), - NEONMAP1(vqshrund_n_s64, aarch64_neon_vsqshrun, VectorRet), - NEONMAP1(vqshrunh_n_s16, aarch64_neon_vsqshrun, VectorRet), - NEONMAP1(vqshruns_n_s32, aarch64_neon_vsqshrun, VectorRet), - NEONMAP1(vqsubb_s8, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubb_u8, arm_neon_vqsubu, VectorRet), - NEONMAP1(vqsubd_s64, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubd_u64, arm_neon_vqsubu, VectorRet), - NEONMAP1(vqsubh_s16, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubh_u16, arm_neon_vqsubu, VectorRet), - NEONMAP1(vqsubs_s32, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubs_u32, arm_neon_vqsubu, VectorRet), - NEONMAP1(vrecped_f64, aarch64_neon_vrecpe, AddRetType), - NEONMAP1(vrecpes_f32, aarch64_neon_vrecpe, AddRetType), - NEONMAP1(vrecpsd_f64, aarch64_neon_vrecps, AddRetType), - NEONMAP1(vrecpss_f32, aarch64_neon_vrecps, AddRetType), - NEONMAP1(vrecpxd_f64, aarch64_neon_vrecpx, AddRetType), - NEONMAP1(vrecpxs_f32, aarch64_neon_vrecpx, AddRetType), - NEONMAP1(vrshld_s64, aarch64_neon_vrshlds, 0), - NEONMAP1(vrshld_u64, aarch64_neon_vrshldu, 0), - NEONMAP1(vrshrd_n_s64, aarch64_neon_vsrshr, VectorRet), - NEONMAP1(vrshrd_n_u64, aarch64_neon_vurshr, VectorRet), - NEONMAP1(vrsqrted_f64, aarch64_neon_vrsqrte, AddRetType), - NEONMAP1(vrsqrtes_f32, aarch64_neon_vrsqrte, AddRetType), - NEONMAP1(vrsqrtsd_f64, aarch64_neon_vrsqrts, AddRetType), - NEONMAP1(vrsqrtss_f32, aarch64_neon_vrsqrts, AddRetType), - NEONMAP1(vrsrad_n_s64, aarch64_neon_vrsrads_n, 0), - NEONMAP1(vrsrad_n_u64, aarch64_neon_vrsradu_n, 0), - NEONMAP0(vset_lane_f32), - NEONMAP0(vset_lane_f64), - NEONMAP0(vset_lane_i16), - NEONMAP0(vset_lane_i32), - NEONMAP0(vset_lane_i64), - NEONMAP0(vset_lane_i8), - NEONMAP0(vsetq_lane_f32), - NEONMAP0(vsetq_lane_f64), - NEONMAP0(vsetq_lane_i16), - NEONMAP0(vsetq_lane_i32), - NEONMAP0(vsetq_lane_i64), - NEONMAP0(vsetq_lane_i8), - NEONMAP1(vsha1cq_u32, arm_neon_sha1c, 0), - NEONMAP1(vsha1h_u32, arm_neon_sha1h, 0), - NEONMAP1(vsha1mq_u32, arm_neon_sha1m, 0), - NEONMAP1(vsha1pq_u32, arm_neon_sha1p, 0), - NEONMAP1(vshld_n_s64, aarch64_neon_vshld_n, 0), - NEONMAP1(vshld_n_u64, aarch64_neon_vshld_n, 0), - NEONMAP1(vshld_s64, aarch64_neon_vshlds, 0), - NEONMAP1(vshld_u64, aarch64_neon_vshldu, 0), - NEONMAP1(vshrd_n_s64, aarch64_neon_vshrds_n, 0), - NEONMAP1(vshrd_n_u64, aarch64_neon_vshrdu_n, 0), - NEONMAP1(vslid_n_s64, aarch64_neon_vsli, VectorRet), - NEONMAP1(vslid_n_u64, aarch64_neon_vsli, VectorRet), - NEONMAP1(vsqaddb_u8, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsqaddd_u64, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsqaddh_u16, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsqadds_u32, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsrad_n_s64, aarch64_neon_vsrads_n, 0), - NEONMAP1(vsrad_n_u64, aarch64_neon_vsradu_n, 0), - NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, VectorRet), - NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, VectorRet), - NEONMAP1(vsubd_s64, aarch64_neon_vsubds, 0), - NEONMAP1(vsubd_u64, aarch64_neon_vsubdu, 0), - NEONMAP1(vtstd_s64, aarch64_neon_vtstd, VectorRetGetArgs01), - NEONMAP1(vtstd_u64, aarch64_neon_vtstd, VectorRetGetArgs01), - NEONMAP1(vuqaddb_s8, aarch64_neon_vuqadd, VectorRet), - NEONMAP1(vuqaddd_s64, aarch64_neon_vuqadd, VectorRet), - NEONMAP1(vuqaddh_s16, aarch64_neon_vuqadd, VectorRet), - NEONMAP1(vuqadds_s32, aarch64_neon_vuqadd, VectorRet) -}; - static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), @@ -2739,7 +2390,6 @@ static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = { #undef NEONMAP2 static bool NEONSIMDIntrinsicsProvenSorted = false; -static bool AArch64SISDIntrinsicInfoProvenSorted = false; static bool ARM64SIMDIntrinsicsProvenSorted = false; static bool ARM64SISDIntrinsicsProvenSorted = false; @@ -2869,169 +2519,6 @@ static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, return CGF.Builder.CreateBitCast(Result, ResultType, s); } -static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, - const NeonIntrinsicInfo &SISDInfo, - const CallExpr *E) { - unsigned BuiltinID = SISDInfo.BuiltinID; - unsigned int Int = SISDInfo.LLVMIntrinsic; - const char *s = SISDInfo.NameHint; - - SmallVector Ops; - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); - } - - // AArch64 scalar builtins are not overloaded, they do not have an extra - // argument that specifies the vector type, need to handle each case. - switch (BuiltinID) { - default: break; - case NEON::BI__builtin_neon_vdups_lane_f32: - case NEON::BI__builtin_neon_vdupd_lane_f64: - case NEON::BI__builtin_neon_vdups_laneq_f32: - case NEON::BI__builtin_neon_vdupd_laneq_f64: { - return CGF.Builder.CreateExtractElement(Ops[0], Ops[1], "vdup_lane"); - } - case NEON::BI__builtin_neon_vdupb_lane_i8: - case NEON::BI__builtin_neon_vduph_lane_i16: - case NEON::BI__builtin_neon_vdups_lane_i32: - case NEON::BI__builtin_neon_vdupd_lane_i64: - case NEON::BI__builtin_neon_vdupb_laneq_i8: - case NEON::BI__builtin_neon_vduph_laneq_i16: - case NEON::BI__builtin_neon_vdups_laneq_i32: - case NEON::BI__builtin_neon_vdupd_laneq_i64: { - // The backend treats Neon scalar types as v1ix types - // So we want to dup lane from any vector to v1ix vector - // with shufflevector - s = "vdup_lane"; - Value* SV = llvm::ConstantVector::getSplat(1, cast(Ops[1])); - Value *Result = CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], SV, s); - llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); - // AArch64 intrinsic one-element vector type cast to - // scalar type expected by the builtin - return CGF.Builder.CreateBitCast(Result, Ty, s); - } - case NEON::BI__builtin_neon_vqdmlalh_lane_s16 : - case NEON::BI__builtin_neon_vqdmlalh_laneq_s16 : - case NEON::BI__builtin_neon_vqdmlals_lane_s32 : - case NEON::BI__builtin_neon_vqdmlals_laneq_s32 : - case NEON::BI__builtin_neon_vqdmlslh_lane_s16 : - case NEON::BI__builtin_neon_vqdmlslh_laneq_s16 : - case NEON::BI__builtin_neon_vqdmlsls_lane_s32 : - case NEON::BI__builtin_neon_vqdmlsls_laneq_s32 : { - Int = Intrinsic::arm_neon_vqadds; - if (BuiltinID == NEON::BI__builtin_neon_vqdmlslh_lane_s16 || - BuiltinID == NEON::BI__builtin_neon_vqdmlslh_laneq_s16 || - BuiltinID == NEON::BI__builtin_neon_vqdmlsls_lane_s32 || - BuiltinID == NEON::BI__builtin_neon_vqdmlsls_laneq_s32) { - Int = Intrinsic::arm_neon_vqsubs; - } - // create vqdmull call with b * c[i] - llvm::Type *Ty = CGF.ConvertType(E->getArg(1)->getType()); - llvm::VectorType *OpVTy = llvm::VectorType::get(Ty, 1); - Ty = CGF.ConvertType(E->getArg(0)->getType()); - llvm::VectorType *ResVTy = llvm::VectorType::get(Ty, 1); - Value *F = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, ResVTy); - Value *V = UndefValue::get(OpVTy); - llvm::Constant *CI = ConstantInt::get(CGF.Int32Ty, 0); - SmallVector MulOps; - MulOps.push_back(Ops[1]); - MulOps.push_back(Ops[2]); - MulOps[0] = CGF.Builder.CreateInsertElement(V, MulOps[0], CI); - MulOps[1] = CGF.Builder.CreateExtractElement(MulOps[1], Ops[3], "extract"); - MulOps[1] = CGF.Builder.CreateInsertElement(V, MulOps[1], CI); - Value *MulRes = CGF.Builder.CreateCall2(F, MulOps[0], MulOps[1]); - // create vqadds call with a +/- vqdmull result - F = CGF.CGM.getIntrinsic(Int, ResVTy); - SmallVector AddOps; - AddOps.push_back(Ops[0]); - AddOps.push_back(MulRes); - V = UndefValue::get(ResVTy); - AddOps[0] = CGF.Builder.CreateInsertElement(V, AddOps[0], CI); - Value *AddRes = CGF.Builder.CreateCall2(F, AddOps[0], AddOps[1]); - return CGF.Builder.CreateBitCast(AddRes, Ty); - } - case NEON::BI__builtin_neon_vfmas_lane_f32: - case NEON::BI__builtin_neon_vfmas_laneq_f32: - case NEON::BI__builtin_neon_vfmad_lane_f64: - case NEON::BI__builtin_neon_vfmad_laneq_f64: { - llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); - Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - } - // Scalar Floating-point Multiply Extended - case NEON::BI__builtin_neon_vmulxs_f32: - case NEON::BI__builtin_neon_vmulxd_f64: { - Int = Intrinsic::aarch64_neon_vmulx; - llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); - return CGF.EmitNeonCall(CGF.CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); - } - case NEON::BI__builtin_neon_vmul_n_f64: { - // v1f64 vmul_n_f64 should be mapped to Neon scalar mul lane - llvm::Type *VTy = GetNeonType(&CGF, - NeonTypeFlags(NeonTypeFlags::Float64, false, false)); - Ops[0] = CGF.Builder.CreateBitCast(Ops[0], VTy); - llvm::Value *Idx = llvm::ConstantInt::get(CGF.Int32Ty, 0); - Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], Idx, "extract"); - Value *Result = CGF.Builder.CreateFMul(Ops[0], Ops[1]); - return CGF.Builder.CreateBitCast(Result, VTy); - } - case NEON::BI__builtin_neon_vget_lane_i8: - case NEON::BI__builtin_neon_vget_lane_i16: - case NEON::BI__builtin_neon_vget_lane_i32: - case NEON::BI__builtin_neon_vget_lane_i64: - case NEON::BI__builtin_neon_vget_lane_f32: - case NEON::BI__builtin_neon_vget_lane_f64: - case NEON::BI__builtin_neon_vgetq_lane_i8: - case NEON::BI__builtin_neon_vgetq_lane_i16: - case NEON::BI__builtin_neon_vgetq_lane_i32: - case NEON::BI__builtin_neon_vgetq_lane_i64: - case NEON::BI__builtin_neon_vgetq_lane_f32: - case NEON::BI__builtin_neon_vgetq_lane_f64: - return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vget_lane_i8, E); - case NEON::BI__builtin_neon_vset_lane_i8: - case NEON::BI__builtin_neon_vset_lane_i16: - case NEON::BI__builtin_neon_vset_lane_i32: - case NEON::BI__builtin_neon_vset_lane_i64: - case NEON::BI__builtin_neon_vset_lane_f32: - case NEON::BI__builtin_neon_vset_lane_f64: - case NEON::BI__builtin_neon_vsetq_lane_i8: - case NEON::BI__builtin_neon_vsetq_lane_i16: - case NEON::BI__builtin_neon_vsetq_lane_i32: - case NEON::BI__builtin_neon_vsetq_lane_i64: - case NEON::BI__builtin_neon_vsetq_lane_f32: - case NEON::BI__builtin_neon_vsetq_lane_f64: - return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vset_lane_i8, E); - - case NEON::BI__builtin_neon_vceqzd_s64: - case NEON::BI__builtin_neon_vceqzd_u64: - case NEON::BI__builtin_neon_vcgezd_s64: - case NEON::BI__builtin_neon_vcgtzd_s64: - case NEON::BI__builtin_neon_vclezd_s64: - case NEON::BI__builtin_neon_vcltzd_s64: - // Add implicit zero operand. - Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); - break; - case NEON::BI__builtin_neon_vceqzs_f32: - case NEON::BI__builtin_neon_vceqzd_f64: - case NEON::BI__builtin_neon_vcgezs_f32: - case NEON::BI__builtin_neon_vcgezd_f64: - case NEON::BI__builtin_neon_vcgtzs_f32: - case NEON::BI__builtin_neon_vcgtzd_f64: - case NEON::BI__builtin_neon_vclezs_f32: - case NEON::BI__builtin_neon_vclezd_f64: - case NEON::BI__builtin_neon_vcltzs_f32: - case NEON::BI__builtin_neon_vcltzd_f64: - // Add implicit zero operand. - Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy)); - break; - } - - // It didn't need any handling specific to the AArch64 backend, so defer to - // common code. - return EmitCommonNeonSISDBuiltinExpr(CGF, SISDInfo, Ops, E); -} - Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, @@ -3534,796 +3021,6 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef Ops, return CGF.EmitNeonCall(TblF, TblOps, Name); } -static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, - unsigned BuiltinID, - const CallExpr *E) { - unsigned int Int = 0; - const char *s = nullptr; - - switch (BuiltinID) { - default: - return nullptr; - case NEON::BI__builtin_neon_vtbl1_v: - case NEON::BI__builtin_neon_vqtbl1_v: - case NEON::BI__builtin_neon_vqtbl1q_v: - case NEON::BI__builtin_neon_vtbl2_v: - case NEON::BI__builtin_neon_vqtbl2_v: - case NEON::BI__builtin_neon_vqtbl2q_v: - case NEON::BI__builtin_neon_vtbl3_v: - case NEON::BI__builtin_neon_vqtbl3_v: - case NEON::BI__builtin_neon_vqtbl3q_v: - case NEON::BI__builtin_neon_vtbl4_v: - case NEON::BI__builtin_neon_vqtbl4_v: - case NEON::BI__builtin_neon_vqtbl4q_v: - case NEON::BI__builtin_neon_vtbx1_v: - case NEON::BI__builtin_neon_vqtbx1_v: - case NEON::BI__builtin_neon_vqtbx1q_v: - case NEON::BI__builtin_neon_vtbx2_v: - case NEON::BI__builtin_neon_vqtbx2_v: - case NEON::BI__builtin_neon_vqtbx2q_v: - case NEON::BI__builtin_neon_vtbx3_v: - case NEON::BI__builtin_neon_vqtbx3_v: - case NEON::BI__builtin_neon_vqtbx3q_v: - case NEON::BI__builtin_neon_vtbx4_v: - case NEON::BI__builtin_neon_vqtbx4_v: - case NEON::BI__builtin_neon_vqtbx4q_v: - break; - } - - assert(E->getNumArgs() >= 3); - - // Get the last argument, which specifies the vector type. - llvm::APSInt Result; - const Expr *Arg = E->getArg(E->getNumArgs() - 1); - if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) - return nullptr; - - // Determine the type of this overloaded NEON intrinsic. - NeonTypeFlags Type(Result.getZExtValue()); - llvm::VectorType *VTy = GetNeonType(&CGF, Type); - llvm::Type *Ty = VTy; - if (!Ty) - return nullptr; - - SmallVector Ops; - for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { - Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); - } - - unsigned nElts = VTy->getNumElements(); - - // AArch64 scalar builtins are not overloaded, they do not have an extra - // argument that specifies the vector type, need to handle each case. - SmallVector TblOps; - switch (BuiltinID) { - case NEON::BI__builtin_neon_vtbl1_v: { - TblOps.push_back(Ops[0]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[1], Ty, - Intrinsic::aarch64_neon_vtbl1, "vtbl1"); - } - case NEON::BI__builtin_neon_vtbl2_v: { - TblOps.push_back(Ops[0]); - TblOps.push_back(Ops[1]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, - Intrinsic::aarch64_neon_vtbl1, "vtbl1"); - } - case NEON::BI__builtin_neon_vtbl3_v: { - TblOps.push_back(Ops[0]); - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[3], Ty, - Intrinsic::aarch64_neon_vtbl2, "vtbl2"); - } - case NEON::BI__builtin_neon_vtbl4_v: { - TblOps.push_back(Ops[0]); - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - TblOps.push_back(Ops[3]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, - Intrinsic::aarch64_neon_vtbl2, "vtbl2"); - } - case NEON::BI__builtin_neon_vtbx1_v: { - TblOps.push_back(Ops[1]); - Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, - Intrinsic::aarch64_neon_vtbl1, "vtbl1"); - - llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8); - Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight); - Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); - CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty); - - SmallVector BslOps; - BslOps.push_back(CmpRes); - BslOps.push_back(Ops[0]); - BslOps.push_back(TblRes); - Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty); - return CGF.EmitNeonCall(BslF, BslOps, "vbsl"); - } - case NEON::BI__builtin_neon_vtbx2_v: { - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty, - Intrinsic::aarch64_neon_vtbx1, "vtbx1"); - } - case NEON::BI__builtin_neon_vtbx3_v: { - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - TblOps.push_back(Ops[3]); - Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, - Intrinsic::aarch64_neon_vtbl2, "vtbl2"); - - llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24); - Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour); - Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], - TwentyFourV); - CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty); - - SmallVector BslOps; - BslOps.push_back(CmpRes); - BslOps.push_back(Ops[0]); - BslOps.push_back(TblRes); - Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty); - return CGF.EmitNeonCall(BslF, BslOps, "vbsl"); - } - case NEON::BI__builtin_neon_vtbx4_v: { - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - TblOps.push_back(Ops[3]); - TblOps.push_back(Ops[4]); - return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty, - Intrinsic::aarch64_neon_vtbx2, "vtbx2"); - } - case NEON::BI__builtin_neon_vqtbl1_v: - case NEON::BI__builtin_neon_vqtbl1q_v: - Int = Intrinsic::aarch64_neon_vtbl1; s = "vtbl1"; break; - case NEON::BI__builtin_neon_vqtbl2_v: - case NEON::BI__builtin_neon_vqtbl2q_v: { - Int = Intrinsic::aarch64_neon_vtbl2; s = "vtbl2"; break; - case NEON::BI__builtin_neon_vqtbl3_v: - case NEON::BI__builtin_neon_vqtbl3q_v: - Int = Intrinsic::aarch64_neon_vtbl3; s = "vtbl3"; break; - case NEON::BI__builtin_neon_vqtbl4_v: - case NEON::BI__builtin_neon_vqtbl4q_v: - Int = Intrinsic::aarch64_neon_vtbl4; s = "vtbl4"; break; - case NEON::BI__builtin_neon_vqtbx1_v: - case NEON::BI__builtin_neon_vqtbx1q_v: - Int = Intrinsic::aarch64_neon_vtbx1; s = "vtbx1"; break; - case NEON::BI__builtin_neon_vqtbx2_v: - case NEON::BI__builtin_neon_vqtbx2q_v: - Int = Intrinsic::aarch64_neon_vtbx2; s = "vtbx2"; break; - case NEON::BI__builtin_neon_vqtbx3_v: - case NEON::BI__builtin_neon_vqtbx3q_v: - Int = Intrinsic::aarch64_neon_vtbx3; s = "vtbx3"; break; - case NEON::BI__builtin_neon_vqtbx4_v: - case NEON::BI__builtin_neon_vqtbx4q_v: - Int = Intrinsic::aarch64_neon_vtbx4; s = "vtbx4"; break; - } - } - - if (!Int) - return nullptr; - - Function *F = CGF.CGM.getIntrinsic(Int, Ty); - return CGF.EmitNeonCall(F, Ops, s); -} - -Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { - - // Process AArch64 scalar builtins - llvm::ArrayRef SISDInfo(AArch64SISDIntrinsicInfo); - const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( - SISDInfo, BuiltinID, AArch64SISDIntrinsicInfoProvenSorted); - - if (Builtin) { - Value *Result = EmitAArch64ScalarBuiltinExpr(*this, *Builtin, E); - assert(Result && "SISD intrinsic should have been handled"); - return Result; - } - - // Process AArch64 table lookup builtins - if (Value *Result = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E)) - return Result; - - if (BuiltinID == AArch64::BI__clear_cache) { - assert(E->getNumArgs() == 2 && - "Variadic __clear_cache slipped through on AArch64"); - - const FunctionDecl *FD = E->getDirectCallee(); - SmallVector Ops; - for (unsigned i = 0; i < E->getNumArgs(); i++) - Ops.push_back(EmitScalarExpr(E->getArg(i))); - llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); - llvm::FunctionType *FTy = cast(Ty); - StringRef Name = FD->getName(); - return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); - } - - SmallVector Ops; - llvm::Value *Align = nullptr; // Alignment for load/store - - if (BuiltinID == NEON::BI__builtin_neon_vldrq_p128) { - Value *Op = EmitScalarExpr(E->getArg(0)); - unsigned addressSpace = - cast(Op->getType())->getAddressSpace(); - llvm::Type *Ty = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace); - Op = Builder.CreateBitCast(Op, Ty); - Op = Builder.CreateLoad(Op); - Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); - return Builder.CreateBitCast(Op, Ty); - } - if (BuiltinID == NEON::BI__builtin_neon_vstrq_p128) { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - unsigned addressSpace = - cast(Op0->getType())->getAddressSpace(); - llvm::Type *PTy = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace); - Op0 = Builder.CreateBitCast(Op0, PTy); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - llvm::Type *Ty = llvm::Type::getFP128Ty(getLLVMContext()); - Op1 = Builder.CreateBitCast(Op1, Ty); - return Builder.CreateStore(Op1, Op0); - } - for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { - if (i == 0) { - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld1_v: - case NEON::BI__builtin_neon_vld1q_v: - case NEON::BI__builtin_neon_vst1_v: - case NEON::BI__builtin_neon_vst1q_v: - case NEON::BI__builtin_neon_vst2_v: - case NEON::BI__builtin_neon_vst2q_v: - case NEON::BI__builtin_neon_vst3_v: - case NEON::BI__builtin_neon_vst3q_v: - case NEON::BI__builtin_neon_vst4_v: - case NEON::BI__builtin_neon_vst4q_v: - case NEON::BI__builtin_neon_vst1_x2_v: - case NEON::BI__builtin_neon_vst1q_x2_v: - case NEON::BI__builtin_neon_vst1_x3_v: - case NEON::BI__builtin_neon_vst1q_x3_v: - case NEON::BI__builtin_neon_vst1_x4_v: - case NEON::BI__builtin_neon_vst1q_x4_v: - // Handle ld1/st1 lane in this function a little different from ARM. - case NEON::BI__builtin_neon_vld1_lane_v: - case NEON::BI__builtin_neon_vld1q_lane_v: - case NEON::BI__builtin_neon_vst1_lane_v: - case NEON::BI__builtin_neon_vst1q_lane_v: - case NEON::BI__builtin_neon_vst2_lane_v: - case NEON::BI__builtin_neon_vst2q_lane_v: - case NEON::BI__builtin_neon_vst3_lane_v: - case NEON::BI__builtin_neon_vst3q_lane_v: - case NEON::BI__builtin_neon_vst4_lane_v: - case NEON::BI__builtin_neon_vst4q_lane_v: - case NEON::BI__builtin_neon_vld1_dup_v: - case NEON::BI__builtin_neon_vld1q_dup_v: - // Get the alignment for the argument in addition to the value; - // we'll use it later. - std::pair Src = - EmitPointerWithAlignment(E->getArg(0)); - Ops.push_back(Src.first); - Align = Builder.getInt32(Src.second); - continue; - } - } - if (i == 1) { - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_v: - case NEON::BI__builtin_neon_vld2q_v: - case NEON::BI__builtin_neon_vld3_v: - case NEON::BI__builtin_neon_vld3q_v: - case NEON::BI__builtin_neon_vld4_v: - case NEON::BI__builtin_neon_vld4q_v: - case NEON::BI__builtin_neon_vld1_x2_v: - case NEON::BI__builtin_neon_vld1q_x2_v: - case NEON::BI__builtin_neon_vld1_x3_v: - case NEON::BI__builtin_neon_vld1q_x3_v: - case NEON::BI__builtin_neon_vld1_x4_v: - case NEON::BI__builtin_neon_vld1q_x4_v: - // Handle ld1/st1 dup lane in this function a little different from ARM. - case NEON::BI__builtin_neon_vld2_dup_v: - case NEON::BI__builtin_neon_vld2q_dup_v: - case NEON::BI__builtin_neon_vld3_dup_v: - case NEON::BI__builtin_neon_vld3q_dup_v: - case NEON::BI__builtin_neon_vld4_dup_v: - case NEON::BI__builtin_neon_vld4q_dup_v: - case NEON::BI__builtin_neon_vld2_lane_v: - case NEON::BI__builtin_neon_vld2q_lane_v: - case NEON::BI__builtin_neon_vld3_lane_v: - case NEON::BI__builtin_neon_vld3q_lane_v: - case NEON::BI__builtin_neon_vld4_lane_v: - case NEON::BI__builtin_neon_vld4q_lane_v: - // Get the alignment for the argument in addition to the value; - // we'll use it later. - std::pair Src = - EmitPointerWithAlignment(E->getArg(1)); - Ops.push_back(Src.first); - Align = Builder.getInt32(Src.second); - continue; - } - } - Ops.push_back(EmitScalarExpr(E->getArg(i))); - } - - // Get the last argument, which specifies the vector type. - llvm::APSInt Result; - const Expr *Arg = E->getArg(E->getNumArgs() - 1); - if (!Arg->isIntegerConstantExpr(Result, getContext())) - return nullptr; - - // Determine the type of this overloaded NEON intrinsic. - NeonTypeFlags Type(Result.getZExtValue()); - bool usgn = Type.isUnsigned(); - bool quad = Type.isQuad(); - - llvm::VectorType *VTy = GetNeonType(this, Type); - llvm::Type *Ty = VTy; - if (!Ty) - return nullptr; - - // Many NEON builtins have identical semantics and uses in ARM and - // AArch64. Emit these in a single function. - llvm::ArrayRef IntrinsicMap(ARMSIMDIntrinsicMap); - Builtin = findNeonIntrinsicInMap(IntrinsicMap, BuiltinID, - NEONSIMDIntrinsicsProvenSorted); - if (Builtin) - return EmitCommonNeonBuiltinExpr( - Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, - Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align); - - unsigned Int; - switch (BuiltinID) { - default: - return nullptr; - - // AArch64 builtins mapping to legacy ARM v7 builtins. - // FIXME: the mapped builtins listed correspond to what has been tested - // in aarch64-neon-intrinsics.c so far. - - // Shift by immediate - case NEON::BI__builtin_neon_vrshr_n_v: - case NEON::BI__builtin_neon_vrshrq_n_v: - Int = usgn ? Intrinsic::aarch64_neon_vurshr - : Intrinsic::aarch64_neon_vsrshr; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n"); - case NEON::BI__builtin_neon_vsra_n_v: - if (VTy->getElementType()->isIntegerTy(64)) { - Int = usgn ? Intrinsic::aarch64_neon_vsradu_n - : Intrinsic::aarch64_neon_vsrads_n; - return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vsra_n"); - } - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vsra_n_v, E); - case NEON::BI__builtin_neon_vsraq_n_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vsraq_n_v, E); - case NEON::BI__builtin_neon_vrsra_n_v: - if (VTy->getElementType()->isIntegerTy(64)) { - Int = usgn ? Intrinsic::aarch64_neon_vrsradu_n - : Intrinsic::aarch64_neon_vrsrads_n; - return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vrsra_n"); - } - // fall through - case NEON::BI__builtin_neon_vrsraq_n_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Int = usgn ? Intrinsic::aarch64_neon_vurshr - : Intrinsic::aarch64_neon_vsrshr; - Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); - return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); - } - case NEON::BI__builtin_neon_vqshlu_n_v: - case NEON::BI__builtin_neon_vqshluq_n_v: - Int = Intrinsic::aarch64_neon_vsqshlu; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n"); - case NEON::BI__builtin_neon_vsri_n_v: - case NEON::BI__builtin_neon_vsriq_n_v: - Int = Intrinsic::aarch64_neon_vsri; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsri_n"); - case NEON::BI__builtin_neon_vsli_n_v: - case NEON::BI__builtin_neon_vsliq_n_v: - Int = Intrinsic::aarch64_neon_vsli; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsli_n"); - case NEON::BI__builtin_neon_vqshrun_n_v: - Int = Intrinsic::aarch64_neon_vsqshrun; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); - case NEON::BI__builtin_neon_vrshrn_n_v: - Int = Intrinsic::aarch64_neon_vrshrn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); - case NEON::BI__builtin_neon_vqrshrun_n_v: - Int = Intrinsic::aarch64_neon_vsqrshrun; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); - case NEON::BI__builtin_neon_vqshrn_n_v: - Int = usgn ? Intrinsic::aarch64_neon_vuqshrn - : Intrinsic::aarch64_neon_vsqshrn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); - case NEON::BI__builtin_neon_vqrshrn_n_v: - Int = usgn ? Intrinsic::aarch64_neon_vuqrshrn - : Intrinsic::aarch64_neon_vsqrshrn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); - - // Convert - case NEON::BI__builtin_neon_vcvt_n_f64_v: - case NEON::BI__builtin_neon_vcvtq_n_f64_v: { - llvm::Type *FloatTy = - GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); - llvm::Type *Tys[2] = { FloatTy, Ty }; - Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp - : Intrinsic::arm_neon_vcvtfxs2fp; - Function *F = CGM.getIntrinsic(Int, Tys); - return EmitNeonCall(F, Ops, "vcvt_n"); - } - - // Load/Store - case NEON::BI__builtin_neon_vld1_x2_v: - case NEON::BI__builtin_neon_vld1q_x2_v: - case NEON::BI__builtin_neon_vld1_x3_v: - case NEON::BI__builtin_neon_vld1q_x3_v: - case NEON::BI__builtin_neon_vld1_x4_v: - case NEON::BI__builtin_neon_vld1q_x4_v: { - unsigned Int; - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld1_x2_v: - case NEON::BI__builtin_neon_vld1q_x2_v: - Int = Intrinsic::aarch64_neon_vld1x2; - break; - case NEON::BI__builtin_neon_vld1_x3_v: - case NEON::BI__builtin_neon_vld1q_x3_v: - Int = Intrinsic::aarch64_neon_vld1x3; - break; - case NEON::BI__builtin_neon_vld1_x4_v: - case NEON::BI__builtin_neon_vld1q_x4_v: - Int = Intrinsic::aarch64_neon_vld1x4; - break; - } - Function *F = CGM.getIntrinsic(Int, Ty); - Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld1xN"); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vst1_x2_v: - case NEON::BI__builtin_neon_vst1q_x2_v: - case NEON::BI__builtin_neon_vst1_x3_v: - case NEON::BI__builtin_neon_vst1q_x3_v: - case NEON::BI__builtin_neon_vst1_x4_v: - case NEON::BI__builtin_neon_vst1q_x4_v: { - Ops.push_back(Align); - unsigned Int; - switch (BuiltinID) { - case NEON::BI__builtin_neon_vst1_x2_v: - case NEON::BI__builtin_neon_vst1q_x2_v: - Int = Intrinsic::aarch64_neon_vst1x2; - break; - case NEON::BI__builtin_neon_vst1_x3_v: - case NEON::BI__builtin_neon_vst1q_x3_v: - Int = Intrinsic::aarch64_neon_vst1x3; - break; - case NEON::BI__builtin_neon_vst1_x4_v: - case NEON::BI__builtin_neon_vst1q_x4_v: - Int = Intrinsic::aarch64_neon_vst1x4; - break; - } - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, ""); - } - case NEON::BI__builtin_neon_vld1_lane_v: - case NEON::BI__builtin_neon_vld1q_lane_v: { - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ty = llvm::PointerType::getUnqual(VTy->getElementType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - LoadInst *Ld = Builder.CreateLoad(Ops[0]); - Ld->setAlignment(cast(Align)->getZExtValue()); - return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); - } - case NEON::BI__builtin_neon_vst1_lane_v: - case NEON::BI__builtin_neon_vst1q_lane_v: { - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - StoreInst *St = - Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); - St->setAlignment(cast(Align)->getZExtValue()); - return St; - } - case NEON::BI__builtin_neon_vld2_dup_v: - case NEON::BI__builtin_neon_vld2q_dup_v: - case NEON::BI__builtin_neon_vld3_dup_v: - case NEON::BI__builtin_neon_vld3q_dup_v: - case NEON::BI__builtin_neon_vld4_dup_v: - case NEON::BI__builtin_neon_vld4q_dup_v: { - // Handle 64-bit x 1 elements as a special-case. There is no "dup" needed. - if (VTy->getElementType()->getPrimitiveSizeInBits() == 64 && - VTy->getNumElements() == 1) { - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_dup_v: - Int = Intrinsic::arm_neon_vld2; - break; - case NEON::BI__builtin_neon_vld3_dup_v: - Int = Intrinsic::arm_neon_vld3; - break; - case NEON::BI__builtin_neon_vld4_dup_v: - Int = Intrinsic::arm_neon_vld4; - break; - default: - llvm_unreachable("unknown vld_dup intrinsic?"); - } - Function *F = CGM.getIntrinsic(Int, Ty); - Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); - } - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_dup_v: - case NEON::BI__builtin_neon_vld2q_dup_v: - Int = Intrinsic::arm_neon_vld2lane; - break; - case NEON::BI__builtin_neon_vld3_dup_v: - case NEON::BI__builtin_neon_vld3q_dup_v: - Int = Intrinsic::arm_neon_vld3lane; - break; - case NEON::BI__builtin_neon_vld4_dup_v: - case NEON::BI__builtin_neon_vld4q_dup_v: - Int = Intrinsic::arm_neon_vld4lane; - break; - } - Function *F = CGM.getIntrinsic(Int, Ty); - llvm::StructType *STy = cast(F->getReturnType()); - - SmallVector Args; - Args.push_back(Ops[1]); - Args.append(STy->getNumElements(), UndefValue::get(Ty)); - - llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); - Args.push_back(CI); - Args.push_back(Align); - - Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); - // splat lane 0 to all elts in each vector of the result. - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Value *Val = Builder.CreateExtractValue(Ops[1], i); - Value *Elt = Builder.CreateBitCast(Val, Ty); - Elt = EmitNeonSplat(Elt, CI); - Elt = Builder.CreateBitCast(Elt, Val->getType()); - Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); - } - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); - } - - case NEON::BI__builtin_neon_vmul_lane_v: - case NEON::BI__builtin_neon_vmul_laneq_v: { - // v1f64 vmul_lane should be mapped to Neon scalar mul lane - bool Quad = false; - if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) - Quad = true; - Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); - llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); - Ops[1] = Builder.CreateBitCast(Ops[1], VTy); - Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); - Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); - return Builder.CreateBitCast(Result, Ty); - } - - // AArch64-only builtins - case NEON::BI__builtin_neon_vfmaq_laneq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - Ops[2] = EmitNeonSplat(Ops[2], cast(Ops[3])); - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfmaq_lane_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - llvm::VectorType *VTy = cast(Ty); - llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), - VTy->getNumElements() / 2); - Ops[2] = Builder.CreateBitCast(Ops[2], STy); - Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), - cast(Ops[3])); - Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); - - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfma_lane_v: { - llvm::VectorType *VTy = cast(Ty); - // v1f64 fma should be mapped to Neon scalar f64 fma - if (VTy && VTy->getElementType() == DoubleTy) { - Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); - Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); - llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, false)); - Ops[2] = Builder.CreateBitCast(Ops[2], VTy); - Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); - Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - return Builder.CreateBitCast(Result, Ty); - } - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - Ops[2] = EmitNeonSplat(Ops[2], cast(Ops[3])); - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfma_laneq_v: { - llvm::VectorType *VTy = cast(Ty); - // v1f64 fma should be mapped to Neon scalar f64 fma - if (VTy && VTy->getElementType() == DoubleTy) { - Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); - Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); - llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, true)); - Ops[2] = Builder.CreateBitCast(Ops[2], VTy); - Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); - Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - return Builder.CreateBitCast(Result, Ty); - } - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), - VTy->getNumElements() * 2); - Ops[2] = Builder.CreateBitCast(Ops[2], STy); - Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), - cast(Ops[3])); - Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); - - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfms_v: - case NEON::BI__builtin_neon_vfmsq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ops[1] = Builder.CreateFNeg(Ops[1]); - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - - // LLVM's fma intrinsic puts the accumulator in the last position, but the - // AArch64 intrinsic has it first. - return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - } - case NEON::BI__builtin_neon_vmaxnm_v: - case NEON::BI__builtin_neon_vmaxnmq_v: { - Int = Intrinsic::aarch64_neon_vmaxnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); - } - case NEON::BI__builtin_neon_vminnm_v: - case NEON::BI__builtin_neon_vminnmq_v: { - Int = Intrinsic::aarch64_neon_vminnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); - } - case NEON::BI__builtin_neon_vpmaxnm_v: - case NEON::BI__builtin_neon_vpmaxnmq_v: { - Int = Intrinsic::aarch64_neon_vpmaxnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); - } - case NEON::BI__builtin_neon_vpminnm_v: - case NEON::BI__builtin_neon_vpminnmq_v: { - Int = Intrinsic::aarch64_neon_vpminnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); - } - case NEON::BI__builtin_neon_vpmaxq_v: { - Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); - } - case NEON::BI__builtin_neon_vpminq_v: { - Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); - } - case NEON::BI__builtin_neon_vmulx_v: - case NEON::BI__builtin_neon_vmulxq_v: { - Int = Intrinsic::aarch64_neon_vmulx; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); - } - case NEON::BI__builtin_neon_vsqadd_v: - case NEON::BI__builtin_neon_vsqaddq_v: { - Int = Intrinsic::aarch64_neon_usqadd; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); - } - case NEON::BI__builtin_neon_vuqadd_v: - case NEON::BI__builtin_neon_vuqaddq_v: { - Int = Intrinsic::aarch64_neon_suqadd; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); - } - case NEON::BI__builtin_neon_vrbit_v: - case NEON::BI__builtin_neon_vrbitq_v: - Int = Intrinsic::aarch64_neon_rbit; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); - case NEON::BI__builtin_neon_vcvt_f32_f64: { - NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); - Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); - return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); - } - case NEON::BI__builtin_neon_vcvtx_f32_v: { - llvm::Type *EltTy = FloatTy; - llvm::Type *ResTy = llvm::VectorType::get(EltTy, 2); - llvm::Type *Tys[2] = { ResTy, Ty }; - Int = Intrinsic::aarch64_neon_vcvtxn; - return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtx_f32_f64"); - } - case NEON::BI__builtin_neon_vcvt_f64_f32: { - llvm::Type *OpTy = - GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false)); - Ops[0] = Builder.CreateBitCast(Ops[0], OpTy); - return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); - } - case NEON::BI__builtin_neon_vcvt_f64_v: - case NEON::BI__builtin_neon_vcvtq_f64_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); - return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") - : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); - } - case NEON::BI__builtin_neon_vrndn_v: - case NEON::BI__builtin_neon_vrndnq_v: { - Int = Intrinsic::aarch64_neon_frintn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); - } - case NEON::BI__builtin_neon_vrnda_v: - case NEON::BI__builtin_neon_vrndaq_v: { - Int = Intrinsic::round; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); - } - case NEON::BI__builtin_neon_vrndp_v: - case NEON::BI__builtin_neon_vrndpq_v: { - Int = Intrinsic::ceil; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); - } - case NEON::BI__builtin_neon_vrndm_v: - case NEON::BI__builtin_neon_vrndmq_v: { - Int = Intrinsic::floor; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); - } - case NEON::BI__builtin_neon_vrndx_v: - case NEON::BI__builtin_neon_vrndxq_v: { - Int = Intrinsic::rint; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); - } - case NEON::BI__builtin_neon_vrnd_v: - case NEON::BI__builtin_neon_vrndq_v: { - Int = Intrinsic::trunc; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd"); - } - case NEON::BI__builtin_neon_vrndi_v: - case NEON::BI__builtin_neon_vrndiq_v: { - Int = Intrinsic::nearbyint; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); - } - case NEON::BI__builtin_neon_vsqrt_v: - case NEON::BI__builtin_neon_vsqrtq_v: { - Int = Intrinsic::sqrt; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); - } - case NEON::BI__builtin_neon_vceqz_v: - case NEON::BI__builtin_neon_vceqzq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, - ICmpInst::ICMP_EQ, "vceqz"); - case NEON::BI__builtin_neon_vcgez_v: - case NEON::BI__builtin_neon_vcgezq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, - ICmpInst::ICMP_SGE, "vcgez"); - case NEON::BI__builtin_neon_vclez_v: - case NEON::BI__builtin_neon_vclezq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, - ICmpInst::ICMP_SLE, "vclez"); - case NEON::BI__builtin_neon_vcgtz_v: - case NEON::BI__builtin_neon_vcgtzq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, - ICmpInst::ICMP_SGT, "vcgtz"); - case NEON::BI__builtin_neon_vcltz_v: - case NEON::BI__builtin_neon_vcltzq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, - ICmpInst::ICMP_SLT, "vcltz"); - } -} - Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { unsigned HintID = static_cast(-1); -- cgit v1.2.3