diff options
author | Tim Northover <tnorthover@apple.com> | 2014-05-24 12:51:25 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2014-05-24 12:51:25 +0000 |
commit | 25e8a6754e3f4c447ddfe5b742c01c16cb050b67 (patch) | |
tree | 021b38deaa4234437746f2f6cee753c8dc59a2ce /clang/lib/CodeGen/CGBuiltin.cpp | |
parent | 3b0846e8f76899815159389be96d7184ad015a8a (diff) | |
download | bcm5719-llvm-25e8a6754e3f4c447ddfe5b742c01c16cb050b67.tar.gz bcm5719-llvm-25e8a6754e3f4c447ddfe5b742c01c16cb050b67.zip |
AArch64/ARM64: update Clang after AArch64 removal.
A few (mostly CodeGen) parts of Clang were tightly coupled to the
AArch64 backend. Now that it's gone, they will not even compile.
I've also deduplicated RUN lines in many of the AArch64 tests. This
might improve "make check-all" time noticably: some of those NEON
tests were monsters.
llvm-svn: 209578
Diffstat (limited to 'clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 1307 |
1 files changed, 2 insertions, 1305 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 9d692d8e1e2..585db1778bf 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1637,14 +1637,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (getTarget().getTriple().getArch()) { - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_be: - return EmitAArch64BuiltinExpr(BuiltinID, E); case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: return EmitARMBuiltinExpr(BuiltinID, E); + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: case llvm::Triple::arm64: case llvm::Triple::arm64_be: return EmitARM64BuiltinExpr(BuiltinID, E); @@ -1883,354 +1882,6 @@ enum { Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ #NameBase, TypeModifier } -static const NeonIntrinsicInfo AArch64SISDIntrinsicInfo[] = { - NEONMAP1(vabdd_f64, aarch64_neon_vabd, AddRetType), - NEONMAP1(vabds_f32, aarch64_neon_vabd, AddRetType), - NEONMAP1(vabsd_s64, aarch64_neon_vabs, 0), - NEONMAP1(vaddd_s64, aarch64_neon_vaddds, 0), - NEONMAP1(vaddd_u64, aarch64_neon_vadddu, 0), - NEONMAP1(vaddlv_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vaddv_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vaddvq_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vaddvq_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_s64, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u64, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vcaged_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcages_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcagtd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vcagts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vcaled_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcales_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcaltd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vcalts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vceqd_f64, aarch64_neon_fceq, VectorRet | Add2ArgTypes), - NEONMAP1(vceqd_s64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqd_u64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqs_f32, aarch64_neon_fceq, VectorRet | Add2ArgTypes), - NEONMAP1(vceqzd_f64, aarch64_neon_fceq, FpCmpzModifiers), - NEONMAP1(vceqzd_s64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqzd_u64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqzs_f32, aarch64_neon_fceq, FpCmpzModifiers), - NEONMAP1(vcged_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vcged_s64, aarch64_neon_vcge, VectorRetGetArgs01), - NEONMAP1(vcged_u64, aarch64_neon_vchs, VectorRetGetArgs01), - NEONMAP1(vcges_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vcgezd_f64, aarch64_neon_fcge, FpCmpzModifiers), - NEONMAP1(vcgezd_s64, aarch64_neon_vcge, VectorRetGetArgs01), - NEONMAP1(vcgezs_f32, aarch64_neon_fcge, FpCmpzModifiers), - NEONMAP1(vcgtd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcgtd_s64, aarch64_neon_vcgt, VectorRetGetArgs01), - NEONMAP1(vcgtd_u64, aarch64_neon_vchi, VectorRetGetArgs01), - NEONMAP1(vcgts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcgtzd_f64, aarch64_neon_fcgt, FpCmpzModifiers), - NEONMAP1(vcgtzd_s64, aarch64_neon_vcgt, VectorRetGetArgs01), - NEONMAP1(vcgtzs_f32, aarch64_neon_fcgt, FpCmpzModifiers), - NEONMAP1(vcled_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vcled_s64, aarch64_neon_vcge, VectorRetGetArgs01), - NEONMAP1(vcled_u64, aarch64_neon_vchs, VectorRetGetArgs01), - NEONMAP1(vcles_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vclezd_f64, aarch64_neon_fclez, FpCmpzModifiers), - NEONMAP1(vclezd_s64, aarch64_neon_vclez, VectorRetGetArgs01), - NEONMAP1(vclezs_f32, aarch64_neon_fclez, FpCmpzModifiers), - NEONMAP1(vcltd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcltd_s64, aarch64_neon_vcgt, VectorRetGetArgs01), - NEONMAP1(vcltd_u64, aarch64_neon_vchi, VectorRetGetArgs01), - NEONMAP1(vclts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcltzd_f64, aarch64_neon_fcltz, FpCmpzModifiers), - NEONMAP1(vcltzd_s64, aarch64_neon_vcltz, VectorRetGetArgs01), - NEONMAP1(vcltzs_f32, aarch64_neon_fcltz, FpCmpzModifiers), - NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, VectorRet | Add1ArgType), - NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, VectorRet | Add1ArgType), - NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, VectorRet | Add1ArgType), - NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_f64_s64, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_f64_u64, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, VectorRet | Add1ArgType), - NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, VectorRet | Add1ArgType), - NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, VectorRet | Add1ArgType), - NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, VectorRet | Add1ArgType), - NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, VectorRet | Add1ArgType), - NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, VectorRet | Add1ArgType), - NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, VectorRet | Add1ArgType), - NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, VectorRet | Add1ArgType), - NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, VectorRet | Add1ArgType), - NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, VectorRet | Add1ArgType), - NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, VectorRet | Add1ArgType), - NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, VectorRet | Add1ArgType), - NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, VectorRet | Add1ArgType), - NEONMAP1(vcvts_f32_s32, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_f32_u32, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType), - NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType), - NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, VectorRet | Add1ArgType), - NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, VectorRet | Add1ArgType), - NEONMAP1(vcvtxd_f32_f64, aarch64_neon_fcvtxn, 0), - NEONMAP0(vdupb_lane_i8), - NEONMAP0(vdupb_laneq_i8), - NEONMAP0(vdupd_lane_f64), - NEONMAP0(vdupd_lane_i64), - NEONMAP0(vdupd_laneq_f64), - NEONMAP0(vdupd_laneq_i64), - NEONMAP0(vduph_lane_i16), - NEONMAP0(vduph_laneq_i16), - NEONMAP0(vdups_lane_f32), - NEONMAP0(vdups_lane_i32), - NEONMAP0(vdups_laneq_f32), - NEONMAP0(vdups_laneq_i32), - NEONMAP0(vfmad_lane_f64), - NEONMAP0(vfmad_laneq_f64), - NEONMAP0(vfmas_lane_f32), - NEONMAP0(vfmas_laneq_f32), - NEONMAP0(vget_lane_f32), - NEONMAP0(vget_lane_f64), - NEONMAP0(vget_lane_i16), - NEONMAP0(vget_lane_i32), - NEONMAP0(vget_lane_i64), - NEONMAP0(vget_lane_i8), - NEONMAP0(vgetq_lane_f32), - NEONMAP0(vgetq_lane_f64), - NEONMAP0(vgetq_lane_i16), - NEONMAP0(vgetq_lane_i32), - NEONMAP0(vgetq_lane_i64), - NEONMAP0(vgetq_lane_i8), - NEONMAP1(vmaxnmv_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vmaxnmvq_f32, aarch64_neon_vmaxnmv, 0), - NEONMAP1(vmaxnmvq_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vmaxv_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vmaxv_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_f32, aarch64_neon_vmaxv, 0), - NEONMAP1(vmaxvq_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vmaxvq_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vminnmv_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vminnmvq_f32, aarch64_neon_vminnmv, 0), - NEONMAP1(vminnmvq_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vminv_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vminv_s16, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_s32, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_s8, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_u16, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_u32, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_u8, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_f32, aarch64_neon_vminv, 0), - NEONMAP1(vminvq_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vminvq_s16, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_s32, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_s8, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_u16, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_u32, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_u8, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP0(vmul_n_f64), - NEONMAP1(vmull_p64, aarch64_neon_vmull_p64, 0), - NEONMAP0(vmulxd_f64), - NEONMAP0(vmulxs_f32), - NEONMAP1(vnegd_s64, aarch64_neon_vneg, 0), - NEONMAP1(vpaddd_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vpaddd_s64, aarch64_neon_vpadd, 0), - NEONMAP1(vpaddd_u64, aarch64_neon_vpadd, 0), - NEONMAP1(vpadds_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vpmaxnmqd_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vpmaxnms_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vpmaxqd_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vpmaxs_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vpminnmqd_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vpminnms_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vpminqd_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vpmins_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vqabsb_s8, arm_neon_vqabs, VectorRet), - NEONMAP1(vqabsd_s64, arm_neon_vqabs, VectorRet), - NEONMAP1(vqabsh_s16, arm_neon_vqabs, VectorRet), - NEONMAP1(vqabss_s32, arm_neon_vqabs, VectorRet), - NEONMAP1(vqaddb_s8, arm_neon_vqadds, VectorRet), - NEONMAP1(vqaddb_u8, arm_neon_vqaddu, VectorRet), - NEONMAP1(vqaddd_s64, arm_neon_vqadds, VectorRet), - NEONMAP1(vqaddd_u64, arm_neon_vqaddu, VectorRet), - NEONMAP1(vqaddh_s16, arm_neon_vqadds, VectorRet), - NEONMAP1(vqaddh_u16, arm_neon_vqaddu, VectorRet), - NEONMAP1(vqadds_s32, arm_neon_vqadds, VectorRet), - NEONMAP1(vqadds_u32, arm_neon_vqaddu, VectorRet), - NEONMAP0(vqdmlalh_lane_s16), - NEONMAP0(vqdmlalh_laneq_s16), - NEONMAP1(vqdmlalh_s16, aarch64_neon_vqdmlal, VectorRet), - NEONMAP0(vqdmlals_lane_s32), - NEONMAP0(vqdmlals_laneq_s32), - NEONMAP1(vqdmlals_s32, aarch64_neon_vqdmlal, VectorRet), - NEONMAP0(vqdmlslh_lane_s16), - NEONMAP0(vqdmlslh_laneq_s16), - NEONMAP1(vqdmlslh_s16, aarch64_neon_vqdmlsl, VectorRet), - NEONMAP0(vqdmlsls_lane_s32), - NEONMAP0(vqdmlsls_laneq_s32), - NEONMAP1(vqdmlsls_s32, aarch64_neon_vqdmlsl, VectorRet), - NEONMAP1(vqdmulhh_s16, arm_neon_vqdmulh, VectorRet), - NEONMAP1(vqdmulhs_s32, arm_neon_vqdmulh, VectorRet), - NEONMAP1(vqdmullh_s16, arm_neon_vqdmull, VectorRet), - NEONMAP1(vqdmulls_s32, arm_neon_vqdmull, VectorRet), - NEONMAP1(vqmovnd_s64, arm_neon_vqmovns, VectorRet), - NEONMAP1(vqmovnd_u64, arm_neon_vqmovnu, VectorRet), - NEONMAP1(vqmovnh_s16, arm_neon_vqmovns, VectorRet), - NEONMAP1(vqmovnh_u16, arm_neon_vqmovnu, VectorRet), - NEONMAP1(vqmovns_s32, arm_neon_vqmovns, VectorRet), - NEONMAP1(vqmovns_u32, arm_neon_vqmovnu, VectorRet), - NEONMAP1(vqmovund_s64, arm_neon_vqmovnsu, VectorRet), - NEONMAP1(vqmovunh_s16, arm_neon_vqmovnsu, VectorRet), - NEONMAP1(vqmovuns_s32, arm_neon_vqmovnsu, VectorRet), - NEONMAP1(vqnegb_s8, arm_neon_vqneg, VectorRet), - NEONMAP1(vqnegd_s64, arm_neon_vqneg, VectorRet), - NEONMAP1(vqnegh_s16, arm_neon_vqneg, VectorRet), - NEONMAP1(vqnegs_s32, arm_neon_vqneg, VectorRet), - NEONMAP1(vqrdmulhh_s16, arm_neon_vqrdmulh, VectorRet), - NEONMAP1(vqrdmulhs_s32, arm_neon_vqrdmulh, VectorRet), - NEONMAP1(vqrshlb_s8, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshlb_u8, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshld_s64, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshld_u64, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshlh_s16, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshlh_u16, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshls_s32, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshls_u32, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshrnd_n_s64, aarch64_neon_vsqrshrn, VectorRet), - NEONMAP1(vqrshrnd_n_u64, aarch64_neon_vuqrshrn, VectorRet), - NEONMAP1(vqrshrnh_n_s16, aarch64_neon_vsqrshrn, VectorRet), - NEONMAP1(vqrshrnh_n_u16, aarch64_neon_vuqrshrn, VectorRet), - NEONMAP1(vqrshrns_n_s32, aarch64_neon_vsqrshrn, VectorRet), - NEONMAP1(vqrshrns_n_u32, aarch64_neon_vuqrshrn, VectorRet), - NEONMAP1(vqrshrund_n_s64, aarch64_neon_vsqrshrun, VectorRet), - NEONMAP1(vqrshrunh_n_s16, aarch64_neon_vsqrshrun, VectorRet), - NEONMAP1(vqrshruns_n_s32, aarch64_neon_vsqrshrun, VectorRet), - NEONMAP1(vqshlb_n_s8, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshlb_n_u8, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshlb_s8, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshlb_u8, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshld_n_s64, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshld_n_u64, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshld_s64, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshld_u64, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshlh_n_s16, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshlh_n_u16, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshlh_s16, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshlh_u16, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshls_n_s32, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshls_n_u32, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshls_s32, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshls_u32, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshlub_n_s8, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshlud_n_s64, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshluh_n_s16, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshlus_n_s32, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshrnd_n_s64, aarch64_neon_vsqshrn, VectorRet), - NEONMAP1(vqshrnd_n_u64, aarch64_neon_vuqshrn, VectorRet), - NEONMAP1(vqshrnh_n_s16, aarch64_neon_vsqshrn, VectorRet), - NEONMAP1(vqshrnh_n_u16, aarch64_neon_vuqshrn, VectorRet), - NEONMAP1(vqshrns_n_s32, aarch64_neon_vsqshrn, VectorRet), - NEONMAP1(vqshrns_n_u32, aarch64_neon_vuqshrn, VectorRet), - NEONMAP1(vqshrund_n_s64, aarch64_neon_vsqshrun, VectorRet), - NEONMAP1(vqshrunh_n_s16, aarch64_neon_vsqshrun, VectorRet), - NEONMAP1(vqshruns_n_s32, aarch64_neon_vsqshrun, VectorRet), - NEONMAP1(vqsubb_s8, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubb_u8, arm_neon_vqsubu, VectorRet), - NEONMAP1(vqsubd_s64, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubd_u64, arm_neon_vqsubu, VectorRet), - NEONMAP1(vqsubh_s16, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubh_u16, arm_neon_vqsubu, VectorRet), - NEONMAP1(vqsubs_s32, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubs_u32, arm_neon_vqsubu, VectorRet), - NEONMAP1(vrecped_f64, aarch64_neon_vrecpe, AddRetType), - NEONMAP1(vrecpes_f32, aarch64_neon_vrecpe, AddRetType), - NEONMAP1(vrecpsd_f64, aarch64_neon_vrecps, AddRetType), - NEONMAP1(vrecpss_f32, aarch64_neon_vrecps, AddRetType), - NEONMAP1(vrecpxd_f64, aarch64_neon_vrecpx, AddRetType), - NEONMAP1(vrecpxs_f32, aarch64_neon_vrecpx, AddRetType), - NEONMAP1(vrshld_s64, aarch64_neon_vrshlds, 0), - NEONMAP1(vrshld_u64, aarch64_neon_vrshldu, 0), - NEONMAP1(vrshrd_n_s64, aarch64_neon_vsrshr, VectorRet), - NEONMAP1(vrshrd_n_u64, aarch64_neon_vurshr, VectorRet), - NEONMAP1(vrsqrted_f64, aarch64_neon_vrsqrte, AddRetType), - NEONMAP1(vrsqrtes_f32, aarch64_neon_vrsqrte, AddRetType), - NEONMAP1(vrsqrtsd_f64, aarch64_neon_vrsqrts, AddRetType), - NEONMAP1(vrsqrtss_f32, aarch64_neon_vrsqrts, AddRetType), - NEONMAP1(vrsrad_n_s64, aarch64_neon_vrsrads_n, 0), - NEONMAP1(vrsrad_n_u64, aarch64_neon_vrsradu_n, 0), - NEONMAP0(vset_lane_f32), - NEONMAP0(vset_lane_f64), - NEONMAP0(vset_lane_i16), - NEONMAP0(vset_lane_i32), - NEONMAP0(vset_lane_i64), - NEONMAP0(vset_lane_i8), - NEONMAP0(vsetq_lane_f32), - NEONMAP0(vsetq_lane_f64), - NEONMAP0(vsetq_lane_i16), - NEONMAP0(vsetq_lane_i32), - NEONMAP0(vsetq_lane_i64), - NEONMAP0(vsetq_lane_i8), - NEONMAP1(vsha1cq_u32, arm_neon_sha1c, 0), - NEONMAP1(vsha1h_u32, arm_neon_sha1h, 0), - NEONMAP1(vsha1mq_u32, arm_neon_sha1m, 0), - NEONMAP1(vsha1pq_u32, arm_neon_sha1p, 0), - NEONMAP1(vshld_n_s64, aarch64_neon_vshld_n, 0), - NEONMAP1(vshld_n_u64, aarch64_neon_vshld_n, 0), - NEONMAP1(vshld_s64, aarch64_neon_vshlds, 0), - NEONMAP1(vshld_u64, aarch64_neon_vshldu, 0), - NEONMAP1(vshrd_n_s64, aarch64_neon_vshrds_n, 0), - NEONMAP1(vshrd_n_u64, aarch64_neon_vshrdu_n, 0), - NEONMAP1(vslid_n_s64, aarch64_neon_vsli, VectorRet), - NEONMAP1(vslid_n_u64, aarch64_neon_vsli, VectorRet), - NEONMAP1(vsqaddb_u8, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsqaddd_u64, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsqaddh_u16, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsqadds_u32, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsrad_n_s64, aarch64_neon_vsrads_n, 0), - NEONMAP1(vsrad_n_u64, aarch64_neon_vsradu_n, 0), - NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, VectorRet), - NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, VectorRet), - NEONMAP1(vsubd_s64, aarch64_neon_vsubds, 0), - NEONMAP1(vsubd_u64, aarch64_neon_vsubdu, 0), - NEONMAP1(vtstd_s64, aarch64_neon_vtstd, VectorRetGetArgs01), - NEONMAP1(vtstd_u64, aarch64_neon_vtstd, VectorRetGetArgs01), - NEONMAP1(vuqaddb_s8, aarch64_neon_vuqadd, VectorRet), - NEONMAP1(vuqaddd_s64, aarch64_neon_vuqadd, VectorRet), - NEONMAP1(vuqaddh_s16, aarch64_neon_vuqadd, VectorRet), - NEONMAP1(vuqadds_s32, aarch64_neon_vuqadd, VectorRet) -}; - static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), @@ -2739,7 +2390,6 @@ static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = { #undef NEONMAP2 static bool NEONSIMDIntrinsicsProvenSorted = false; -static bool AArch64SISDIntrinsicInfoProvenSorted = false; static bool ARM64SIMDIntrinsicsProvenSorted = false; static bool ARM64SISDIntrinsicsProvenSorted = false; @@ -2869,169 +2519,6 @@ static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, return CGF.Builder.CreateBitCast(Result, ResultType, s); } -static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, - const NeonIntrinsicInfo &SISDInfo, - const CallExpr *E) { - unsigned BuiltinID = SISDInfo.BuiltinID; - unsigned int Int = SISDInfo.LLVMIntrinsic; - const char *s = SISDInfo.NameHint; - - SmallVector<Value *, 4> Ops; - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); - } - - // AArch64 scalar builtins are not overloaded, they do not have an extra - // argument that specifies the vector type, need to handle each case. - switch (BuiltinID) { - default: break; - case NEON::BI__builtin_neon_vdups_lane_f32: - case NEON::BI__builtin_neon_vdupd_lane_f64: - case NEON::BI__builtin_neon_vdups_laneq_f32: - case NEON::BI__builtin_neon_vdupd_laneq_f64: { - return CGF.Builder.CreateExtractElement(Ops[0], Ops[1], "vdup_lane"); - } - case NEON::BI__builtin_neon_vdupb_lane_i8: - case NEON::BI__builtin_neon_vduph_lane_i16: - case NEON::BI__builtin_neon_vdups_lane_i32: - case NEON::BI__builtin_neon_vdupd_lane_i64: - case NEON::BI__builtin_neon_vdupb_laneq_i8: - case NEON::BI__builtin_neon_vduph_laneq_i16: - case NEON::BI__builtin_neon_vdups_laneq_i32: - case NEON::BI__builtin_neon_vdupd_laneq_i64: { - // The backend treats Neon scalar types as v1ix types - // So we want to dup lane from any vector to v1ix vector - // with shufflevector - s = "vdup_lane"; - Value* SV = llvm::ConstantVector::getSplat(1, cast<ConstantInt>(Ops[1])); - Value *Result = CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], SV, s); - llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); - // AArch64 intrinsic one-element vector type cast to - // scalar type expected by the builtin - return CGF.Builder.CreateBitCast(Result, Ty, s); - } - case NEON::BI__builtin_neon_vqdmlalh_lane_s16 : - case NEON::BI__builtin_neon_vqdmlalh_laneq_s16 : - case NEON::BI__builtin_neon_vqdmlals_lane_s32 : - case NEON::BI__builtin_neon_vqdmlals_laneq_s32 : - case NEON::BI__builtin_neon_vqdmlslh_lane_s16 : - case NEON::BI__builtin_neon_vqdmlslh_laneq_s16 : - case NEON::BI__builtin_neon_vqdmlsls_lane_s32 : - case NEON::BI__builtin_neon_vqdmlsls_laneq_s32 : { - Int = Intrinsic::arm_neon_vqadds; - if (BuiltinID == NEON::BI__builtin_neon_vqdmlslh_lane_s16 || - BuiltinID == NEON::BI__builtin_neon_vqdmlslh_laneq_s16 || - BuiltinID == NEON::BI__builtin_neon_vqdmlsls_lane_s32 || - BuiltinID == NEON::BI__builtin_neon_vqdmlsls_laneq_s32) { - Int = Intrinsic::arm_neon_vqsubs; - } - // create vqdmull call with b * c[i] - llvm::Type *Ty = CGF.ConvertType(E->getArg(1)->getType()); - llvm::VectorType *OpVTy = llvm::VectorType::get(Ty, 1); - Ty = CGF.ConvertType(E->getArg(0)->getType()); - llvm::VectorType *ResVTy = llvm::VectorType::get(Ty, 1); - Value *F = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, ResVTy); - Value *V = UndefValue::get(OpVTy); - llvm::Constant *CI = ConstantInt::get(CGF.Int32Ty, 0); - SmallVector<Value *, 2> MulOps; - MulOps.push_back(Ops[1]); - MulOps.push_back(Ops[2]); - MulOps[0] = CGF.Builder.CreateInsertElement(V, MulOps[0], CI); - MulOps[1] = CGF.Builder.CreateExtractElement(MulOps[1], Ops[3], "extract"); - MulOps[1] = CGF.Builder.CreateInsertElement(V, MulOps[1], CI); - Value *MulRes = CGF.Builder.CreateCall2(F, MulOps[0], MulOps[1]); - // create vqadds call with a +/- vqdmull result - F = CGF.CGM.getIntrinsic(Int, ResVTy); - SmallVector<Value *, 2> AddOps; - AddOps.push_back(Ops[0]); - AddOps.push_back(MulRes); - V = UndefValue::get(ResVTy); - AddOps[0] = CGF.Builder.CreateInsertElement(V, AddOps[0], CI); - Value *AddRes = CGF.Builder.CreateCall2(F, AddOps[0], AddOps[1]); - return CGF.Builder.CreateBitCast(AddRes, Ty); - } - case NEON::BI__builtin_neon_vfmas_lane_f32: - case NEON::BI__builtin_neon_vfmas_laneq_f32: - case NEON::BI__builtin_neon_vfmad_lane_f64: - case NEON::BI__builtin_neon_vfmad_laneq_f64: { - llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); - Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - } - // Scalar Floating-point Multiply Extended - case NEON::BI__builtin_neon_vmulxs_f32: - case NEON::BI__builtin_neon_vmulxd_f64: { - Int = Intrinsic::aarch64_neon_vmulx; - llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); - return CGF.EmitNeonCall(CGF.CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); - } - case NEON::BI__builtin_neon_vmul_n_f64: { - // v1f64 vmul_n_f64 should be mapped to Neon scalar mul lane - llvm::Type *VTy = GetNeonType(&CGF, - NeonTypeFlags(NeonTypeFlags::Float64, false, false)); - Ops[0] = CGF.Builder.CreateBitCast(Ops[0], VTy); - llvm::Value *Idx = llvm::ConstantInt::get(CGF.Int32Ty, 0); - Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], Idx, "extract"); - Value *Result = CGF.Builder.CreateFMul(Ops[0], Ops[1]); - return CGF.Builder.CreateBitCast(Result, VTy); - } - case NEON::BI__builtin_neon_vget_lane_i8: - case NEON::BI__builtin_neon_vget_lane_i16: - case NEON::BI__builtin_neon_vget_lane_i32: - case NEON::BI__builtin_neon_vget_lane_i64: - case NEON::BI__builtin_neon_vget_lane_f32: - case NEON::BI__builtin_neon_vget_lane_f64: - case NEON::BI__builtin_neon_vgetq_lane_i8: - case NEON::BI__builtin_neon_vgetq_lane_i16: - case NEON::BI__builtin_neon_vgetq_lane_i32: - case NEON::BI__builtin_neon_vgetq_lane_i64: - case NEON::BI__builtin_neon_vgetq_lane_f32: - case NEON::BI__builtin_neon_vgetq_lane_f64: - return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vget_lane_i8, E); - case NEON::BI__builtin_neon_vset_lane_i8: - case NEON::BI__builtin_neon_vset_lane_i16: - case NEON::BI__builtin_neon_vset_lane_i32: - case NEON::BI__builtin_neon_vset_lane_i64: - case NEON::BI__builtin_neon_vset_lane_f32: - case NEON::BI__builtin_neon_vset_lane_f64: - case NEON::BI__builtin_neon_vsetq_lane_i8: - case NEON::BI__builtin_neon_vsetq_lane_i16: - case NEON::BI__builtin_neon_vsetq_lane_i32: - case NEON::BI__builtin_neon_vsetq_lane_i64: - case NEON::BI__builtin_neon_vsetq_lane_f32: - case NEON::BI__builtin_neon_vsetq_lane_f64: - return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vset_lane_i8, E); - - case NEON::BI__builtin_neon_vceqzd_s64: - case NEON::BI__builtin_neon_vceqzd_u64: - case NEON::BI__builtin_neon_vcgezd_s64: - case NEON::BI__builtin_neon_vcgtzd_s64: - case NEON::BI__builtin_neon_vclezd_s64: - case NEON::BI__builtin_neon_vcltzd_s64: - // Add implicit zero operand. - Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); - break; - case NEON::BI__builtin_neon_vceqzs_f32: - case NEON::BI__builtin_neon_vceqzd_f64: - case NEON::BI__builtin_neon_vcgezs_f32: - case NEON::BI__builtin_neon_vcgezd_f64: - case NEON::BI__builtin_neon_vcgtzs_f32: - case NEON::BI__builtin_neon_vcgtzd_f64: - case NEON::BI__builtin_neon_vclezs_f32: - case NEON::BI__builtin_neon_vclezd_f64: - case NEON::BI__builtin_neon_vcltzs_f32: - case NEON::BI__builtin_neon_vcltzd_f64: - // Add implicit zero operand. - Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy)); - break; - } - - // It didn't need any handling specific to the AArch64 backend, so defer to - // common code. - return EmitCommonNeonSISDBuiltinExpr(CGF, SISDInfo, Ops, E); -} - Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, @@ -3534,796 +3021,6 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, return CGF.EmitNeonCall(TblF, TblOps, Name); } -static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, - unsigned BuiltinID, - const CallExpr *E) { - unsigned int Int = 0; - const char *s = nullptr; - - switch (BuiltinID) { - default: - return nullptr; - case NEON::BI__builtin_neon_vtbl1_v: - case NEON::BI__builtin_neon_vqtbl1_v: - case NEON::BI__builtin_neon_vqtbl1q_v: - case NEON::BI__builtin_neon_vtbl2_v: - case NEON::BI__builtin_neon_vqtbl2_v: - case NEON::BI__builtin_neon_vqtbl2q_v: - case NEON::BI__builtin_neon_vtbl3_v: - case NEON::BI__builtin_neon_vqtbl3_v: - case NEON::BI__builtin_neon_vqtbl3q_v: - case NEON::BI__builtin_neon_vtbl4_v: - case NEON::BI__builtin_neon_vqtbl4_v: - case NEON::BI__builtin_neon_vqtbl4q_v: - case NEON::BI__builtin_neon_vtbx1_v: - case NEON::BI__builtin_neon_vqtbx1_v: - case NEON::BI__builtin_neon_vqtbx1q_v: - case NEON::BI__builtin_neon_vtbx2_v: - case NEON::BI__builtin_neon_vqtbx2_v: - case NEON::BI__builtin_neon_vqtbx2q_v: - case NEON::BI__builtin_neon_vtbx3_v: - case NEON::BI__builtin_neon_vqtbx3_v: - case NEON::BI__builtin_neon_vqtbx3q_v: - case NEON::BI__builtin_neon_vtbx4_v: - case NEON::BI__builtin_neon_vqtbx4_v: - case NEON::BI__builtin_neon_vqtbx4q_v: - break; - } - - assert(E->getNumArgs() >= 3); - - // Get the last argument, which specifies the vector type. - llvm::APSInt Result; - const Expr *Arg = E->getArg(E->getNumArgs() - 1); - if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) - return nullptr; - - // Determine the type of this overloaded NEON intrinsic. - NeonTypeFlags Type(Result.getZExtValue()); - llvm::VectorType *VTy = GetNeonType(&CGF, Type); - llvm::Type *Ty = VTy; - if (!Ty) - return nullptr; - - SmallVector<Value *, 4> Ops; - for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { - Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); - } - - unsigned nElts = VTy->getNumElements(); - - // AArch64 scalar builtins are not overloaded, they do not have an extra - // argument that specifies the vector type, need to handle each case. - SmallVector<Value *, 2> TblOps; - switch (BuiltinID) { - case NEON::BI__builtin_neon_vtbl1_v: { - TblOps.push_back(Ops[0]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[1], Ty, - Intrinsic::aarch64_neon_vtbl1, "vtbl1"); - } - case NEON::BI__builtin_neon_vtbl2_v: { - TblOps.push_back(Ops[0]); - TblOps.push_back(Ops[1]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, - Intrinsic::aarch64_neon_vtbl1, "vtbl1"); - } - case NEON::BI__builtin_neon_vtbl3_v: { - TblOps.push_back(Ops[0]); - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[3], Ty, - Intrinsic::aarch64_neon_vtbl2, "vtbl2"); - } - case NEON::BI__builtin_neon_vtbl4_v: { - TblOps.push_back(Ops[0]); - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - TblOps.push_back(Ops[3]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, - Intrinsic::aarch64_neon_vtbl2, "vtbl2"); - } - case NEON::BI__builtin_neon_vtbx1_v: { - TblOps.push_back(Ops[1]); - Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, - Intrinsic::aarch64_neon_vtbl1, "vtbl1"); - - llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8); - Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight); - Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); - CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty); - - SmallVector<Value *, 4> BslOps; - BslOps.push_back(CmpRes); - BslOps.push_back(Ops[0]); - BslOps.push_back(TblRes); - Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty); - return CGF.EmitNeonCall(BslF, BslOps, "vbsl"); - } - case NEON::BI__builtin_neon_vtbx2_v: { - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty, - Intrinsic::aarch64_neon_vtbx1, "vtbx1"); - } - case NEON::BI__builtin_neon_vtbx3_v: { - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - TblOps.push_back(Ops[3]); - Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, - Intrinsic::aarch64_neon_vtbl2, "vtbl2"); - - llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24); - Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour); - Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], - TwentyFourV); - CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty); - - SmallVector<Value *, 4> BslOps; - BslOps.push_back(CmpRes); - BslOps.push_back(Ops[0]); - BslOps.push_back(TblRes); - Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty); - return CGF.EmitNeonCall(BslF, BslOps, "vbsl"); - } - case NEON::BI__builtin_neon_vtbx4_v: { - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - TblOps.push_back(Ops[3]); - TblOps.push_back(Ops[4]); - return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty, - Intrinsic::aarch64_neon_vtbx2, "vtbx2"); - } - case NEON::BI__builtin_neon_vqtbl1_v: - case NEON::BI__builtin_neon_vqtbl1q_v: - Int = Intrinsic::aarch64_neon_vtbl1; s = "vtbl1"; break; - case NEON::BI__builtin_neon_vqtbl2_v: - case NEON::BI__builtin_neon_vqtbl2q_v: { - Int = Intrinsic::aarch64_neon_vtbl2; s = "vtbl2"; break; - case NEON::BI__builtin_neon_vqtbl3_v: - case NEON::BI__builtin_neon_vqtbl3q_v: - Int = Intrinsic::aarch64_neon_vtbl3; s = "vtbl3"; break; - case NEON::BI__builtin_neon_vqtbl4_v: - case NEON::BI__builtin_neon_vqtbl4q_v: - Int = Intrinsic::aarch64_neon_vtbl4; s = "vtbl4"; break; - case NEON::BI__builtin_neon_vqtbx1_v: - case NEON::BI__builtin_neon_vqtbx1q_v: - Int = Intrinsic::aarch64_neon_vtbx1; s = "vtbx1"; break; - case NEON::BI__builtin_neon_vqtbx2_v: - case NEON::BI__builtin_neon_vqtbx2q_v: - Int = Intrinsic::aarch64_neon_vtbx2; s = "vtbx2"; break; - case NEON::BI__builtin_neon_vqtbx3_v: - case NEON::BI__builtin_neon_vqtbx3q_v: - Int = Intrinsic::aarch64_neon_vtbx3; s = "vtbx3"; break; - case NEON::BI__builtin_neon_vqtbx4_v: - case NEON::BI__builtin_neon_vqtbx4q_v: - Int = Intrinsic::aarch64_neon_vtbx4; s = "vtbx4"; break; - } - } - - if (!Int) - return nullptr; - - Function *F = CGF.CGM.getIntrinsic(Int, Ty); - return CGF.EmitNeonCall(F, Ops, s); -} - -Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { - - // Process AArch64 scalar builtins - llvm::ArrayRef<NeonIntrinsicInfo> SISDInfo(AArch64SISDIntrinsicInfo); - const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( - SISDInfo, BuiltinID, AArch64SISDIntrinsicInfoProvenSorted); - - if (Builtin) { - Value *Result = EmitAArch64ScalarBuiltinExpr(*this, *Builtin, E); - assert(Result && "SISD intrinsic should have been handled"); - return Result; - } - - // Process AArch64 table lookup builtins - if (Value *Result = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E)) - return Result; - - if (BuiltinID == AArch64::BI__clear_cache) { - assert(E->getNumArgs() == 2 && - "Variadic __clear_cache slipped through on AArch64"); - - const FunctionDecl *FD = E->getDirectCallee(); - SmallVector<Value *, 2> Ops; - for (unsigned i = 0; i < E->getNumArgs(); i++) - Ops.push_back(EmitScalarExpr(E->getArg(i))); - llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); - llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); - StringRef Name = FD->getName(); - return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); - } - - SmallVector<Value *, 4> Ops; - llvm::Value *Align = nullptr; // Alignment for load/store - - if (BuiltinID == NEON::BI__builtin_neon_vldrq_p128) { - Value *Op = EmitScalarExpr(E->getArg(0)); - unsigned addressSpace = - cast<llvm::PointerType>(Op->getType())->getAddressSpace(); - llvm::Type *Ty = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace); - Op = Builder.CreateBitCast(Op, Ty); - Op = Builder.CreateLoad(Op); - Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); - return Builder.CreateBitCast(Op, Ty); - } - if (BuiltinID == NEON::BI__builtin_neon_vstrq_p128) { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - unsigned addressSpace = - cast<llvm::PointerType>(Op0->getType())->getAddressSpace(); - llvm::Type *PTy = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace); - Op0 = Builder.CreateBitCast(Op0, PTy); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - llvm::Type *Ty = llvm::Type::getFP128Ty(getLLVMContext()); - Op1 = Builder.CreateBitCast(Op1, Ty); - return Builder.CreateStore(Op1, Op0); - } - for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { - if (i == 0) { - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld1_v: - case NEON::BI__builtin_neon_vld1q_v: - case NEON::BI__builtin_neon_vst1_v: - case NEON::BI__builtin_neon_vst1q_v: - case NEON::BI__builtin_neon_vst2_v: - case NEON::BI__builtin_neon_vst2q_v: - case NEON::BI__builtin_neon_vst3_v: - case NEON::BI__builtin_neon_vst3q_v: - case NEON::BI__builtin_neon_vst4_v: - case NEON::BI__builtin_neon_vst4q_v: - case NEON::BI__builtin_neon_vst1_x2_v: - case NEON::BI__builtin_neon_vst1q_x2_v: - case NEON::BI__builtin_neon_vst1_x3_v: - case NEON::BI__builtin_neon_vst1q_x3_v: - case NEON::BI__builtin_neon_vst1_x4_v: - case NEON::BI__builtin_neon_vst1q_x4_v: - // Handle ld1/st1 lane in this function a little different from ARM. - case NEON::BI__builtin_neon_vld1_lane_v: - case NEON::BI__builtin_neon_vld1q_lane_v: - case NEON::BI__builtin_neon_vst1_lane_v: - case NEON::BI__builtin_neon_vst1q_lane_v: - case NEON::BI__builtin_neon_vst2_lane_v: - case NEON::BI__builtin_neon_vst2q_lane_v: - case NEON::BI__builtin_neon_vst3_lane_v: - case NEON::BI__builtin_neon_vst3q_lane_v: - case NEON::BI__builtin_neon_vst4_lane_v: - case NEON::BI__builtin_neon_vst4q_lane_v: - case NEON::BI__builtin_neon_vld1_dup_v: - case NEON::BI__builtin_neon_vld1q_dup_v: - // Get the alignment for the argument in addition to the value; - // we'll use it later. - std::pair<llvm::Value *, unsigned> Src = - EmitPointerWithAlignment(E->getArg(0)); - Ops.push_back(Src.first); - Align = Builder.getInt32(Src.second); - continue; - } - } - if (i == 1) { - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_v: - case NEON::BI__builtin_neon_vld2q_v: - case NEON::BI__builtin_neon_vld3_v: - case NEON::BI__builtin_neon_vld3q_v: - case NEON::BI__builtin_neon_vld4_v: - case NEON::BI__builtin_neon_vld4q_v: - case NEON::BI__builtin_neon_vld1_x2_v: - case NEON::BI__builtin_neon_vld1q_x2_v: - case NEON::BI__builtin_neon_vld1_x3_v: - case NEON::BI__builtin_neon_vld1q_x3_v: - case NEON::BI__builtin_neon_vld1_x4_v: - case NEON::BI__builtin_neon_vld1q_x4_v: - // Handle ld1/st1 dup lane in this function a little different from ARM. - case NEON::BI__builtin_neon_vld2_dup_v: - case NEON::BI__builtin_neon_vld2q_dup_v: - case NEON::BI__builtin_neon_vld3_dup_v: - case NEON::BI__builtin_neon_vld3q_dup_v: - case NEON::BI__builtin_neon_vld4_dup_v: - case NEON::BI__builtin_neon_vld4q_dup_v: - case NEON::BI__builtin_neon_vld2_lane_v: - case NEON::BI__builtin_neon_vld2q_lane_v: - case NEON::BI__builtin_neon_vld3_lane_v: - case NEON::BI__builtin_neon_vld3q_lane_v: - case NEON::BI__builtin_neon_vld4_lane_v: - case NEON::BI__builtin_neon_vld4q_lane_v: - // Get the alignment for the argument in addition to the value; - // we'll use it later. - std::pair<llvm::Value *, unsigned> Src = - EmitPointerWithAlignment(E->getArg(1)); - Ops.push_back(Src.first); - Align = Builder.getInt32(Src.second); - continue; - } - } - Ops.push_back(EmitScalarExpr(E->getArg(i))); - } - - // Get the last argument, which specifies the vector type. - llvm::APSInt Result; - const Expr *Arg = E->getArg(E->getNumArgs() - 1); - if (!Arg->isIntegerConstantExpr(Result, getContext())) - return nullptr; - - // Determine the type of this overloaded NEON intrinsic. - NeonTypeFlags Type(Result.getZExtValue()); - bool usgn = Type.isUnsigned(); - bool quad = Type.isQuad(); - - llvm::VectorType *VTy = GetNeonType(this, Type); - llvm::Type *Ty = VTy; - if (!Ty) - return nullptr; - - // Many NEON builtins have identical semantics and uses in ARM and - // AArch64. Emit these in a single function. - llvm::ArrayRef<NeonIntrinsicInfo> IntrinsicMap(ARMSIMDIntrinsicMap); - Builtin = findNeonIntrinsicInMap(IntrinsicMap, BuiltinID, - NEONSIMDIntrinsicsProvenSorted); - if (Builtin) - return EmitCommonNeonBuiltinExpr( - Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, - Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align); - - unsigned Int; - switch (BuiltinID) { - default: - return nullptr; - - // AArch64 builtins mapping to legacy ARM v7 builtins. - // FIXME: the mapped builtins listed correspond to what has been tested - // in aarch64-neon-intrinsics.c so far. - - // Shift by immediate - case NEON::BI__builtin_neon_vrshr_n_v: - case NEON::BI__builtin_neon_vrshrq_n_v: - Int = usgn ? Intrinsic::aarch64_neon_vurshr - : Intrinsic::aarch64_neon_vsrshr; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n"); - case NEON::BI__builtin_neon_vsra_n_v: - if (VTy->getElementType()->isIntegerTy(64)) { - Int = usgn ? Intrinsic::aarch64_neon_vsradu_n - : Intrinsic::aarch64_neon_vsrads_n; - return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vsra_n"); - } - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vsra_n_v, E); - case NEON::BI__builtin_neon_vsraq_n_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vsraq_n_v, E); - case NEON::BI__builtin_neon_vrsra_n_v: - if (VTy->getElementType()->isIntegerTy(64)) { - Int = usgn ? Intrinsic::aarch64_neon_vrsradu_n - : Intrinsic::aarch64_neon_vrsrads_n; - return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vrsra_n"); - } - // fall through - case NEON::BI__builtin_neon_vrsraq_n_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Int = usgn ? Intrinsic::aarch64_neon_vurshr - : Intrinsic::aarch64_neon_vsrshr; - Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); - return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); - } - case NEON::BI__builtin_neon_vqshlu_n_v: - case NEON::BI__builtin_neon_vqshluq_n_v: - Int = Intrinsic::aarch64_neon_vsqshlu; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n"); - case NEON::BI__builtin_neon_vsri_n_v: - case NEON::BI__builtin_neon_vsriq_n_v: - Int = Intrinsic::aarch64_neon_vsri; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsri_n"); - case NEON::BI__builtin_neon_vsli_n_v: - case NEON::BI__builtin_neon_vsliq_n_v: - Int = Intrinsic::aarch64_neon_vsli; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsli_n"); - case NEON::BI__builtin_neon_vqshrun_n_v: - Int = Intrinsic::aarch64_neon_vsqshrun; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); - case NEON::BI__builtin_neon_vrshrn_n_v: - Int = Intrinsic::aarch64_neon_vrshrn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); - case NEON::BI__builtin_neon_vqrshrun_n_v: - Int = Intrinsic::aarch64_neon_vsqrshrun; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); - case NEON::BI__builtin_neon_vqshrn_n_v: - Int = usgn ? Intrinsic::aarch64_neon_vuqshrn - : Intrinsic::aarch64_neon_vsqshrn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); - case NEON::BI__builtin_neon_vqrshrn_n_v: - Int = usgn ? Intrinsic::aarch64_neon_vuqrshrn - : Intrinsic::aarch64_neon_vsqrshrn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); - - // Convert - case NEON::BI__builtin_neon_vcvt_n_f64_v: - case NEON::BI__builtin_neon_vcvtq_n_f64_v: { - llvm::Type *FloatTy = - GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); - llvm::Type *Tys[2] = { FloatTy, Ty }; - Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp - : Intrinsic::arm_neon_vcvtfxs2fp; - Function *F = CGM.getIntrinsic(Int, Tys); - return EmitNeonCall(F, Ops, "vcvt_n"); - } - - // Load/Store - case NEON::BI__builtin_neon_vld1_x2_v: - case NEON::BI__builtin_neon_vld1q_x2_v: - case NEON::BI__builtin_neon_vld1_x3_v: - case NEON::BI__builtin_neon_vld1q_x3_v: - case NEON::BI__builtin_neon_vld1_x4_v: - case NEON::BI__builtin_neon_vld1q_x4_v: { - unsigned Int; - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld1_x2_v: - case NEON::BI__builtin_neon_vld1q_x2_v: - Int = Intrinsic::aarch64_neon_vld1x2; - break; - case NEON::BI__builtin_neon_vld1_x3_v: - case NEON::BI__builtin_neon_vld1q_x3_v: - Int = Intrinsic::aarch64_neon_vld1x3; - break; - case NEON::BI__builtin_neon_vld1_x4_v: - case NEON::BI__builtin_neon_vld1q_x4_v: - Int = Intrinsic::aarch64_neon_vld1x4; - break; - } - Function *F = CGM.getIntrinsic(Int, Ty); - Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld1xN"); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vst1_x2_v: - case NEON::BI__builtin_neon_vst1q_x2_v: - case NEON::BI__builtin_neon_vst1_x3_v: - case NEON::BI__builtin_neon_vst1q_x3_v: - case NEON::BI__builtin_neon_vst1_x4_v: - case NEON::BI__builtin_neon_vst1q_x4_v: { - Ops.push_back(Align); - unsigned Int; - switch (BuiltinID) { - case NEON::BI__builtin_neon_vst1_x2_v: - case NEON::BI__builtin_neon_vst1q_x2_v: - Int = Intrinsic::aarch64_neon_vst1x2; - break; - case NEON::BI__builtin_neon_vst1_x3_v: - case NEON::BI__builtin_neon_vst1q_x3_v: - Int = Intrinsic::aarch64_neon_vst1x3; - break; - case NEON::BI__builtin_neon_vst1_x4_v: - case NEON::BI__builtin_neon_vst1q_x4_v: - Int = Intrinsic::aarch64_neon_vst1x4; - break; - } - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, ""); - } - case NEON::BI__builtin_neon_vld1_lane_v: - case NEON::BI__builtin_neon_vld1q_lane_v: { - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ty = llvm::PointerType::getUnqual(VTy->getElementType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - LoadInst *Ld = Builder.CreateLoad(Ops[0]); - Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); - return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); - } - case NEON::BI__builtin_neon_vst1_lane_v: - case NEON::BI__builtin_neon_vst1q_lane_v: { - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - StoreInst *St = - Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); - St->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); - return St; - } - case NEON::BI__builtin_neon_vld2_dup_v: - case NEON::BI__builtin_neon_vld2q_dup_v: - case NEON::BI__builtin_neon_vld3_dup_v: - case NEON::BI__builtin_neon_vld3q_dup_v: - case NEON::BI__builtin_neon_vld4_dup_v: - case NEON::BI__builtin_neon_vld4q_dup_v: { - // Handle 64-bit x 1 elements as a special-case. There is no "dup" needed. - if (VTy->getElementType()->getPrimitiveSizeInBits() == 64 && - VTy->getNumElements() == 1) { - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_dup_v: - Int = Intrinsic::arm_neon_vld2; - break; - case NEON::BI__builtin_neon_vld3_dup_v: - Int = Intrinsic::arm_neon_vld3; - break; - case NEON::BI__builtin_neon_vld4_dup_v: - Int = Intrinsic::arm_neon_vld4; - break; - default: - llvm_unreachable("unknown vld_dup intrinsic?"); - } - Function *F = CGM.getIntrinsic(Int, Ty); - Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); - } - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_dup_v: - case NEON::BI__builtin_neon_vld2q_dup_v: - Int = Intrinsic::arm_neon_vld2lane; - break; - case NEON::BI__builtin_neon_vld3_dup_v: - case NEON::BI__builtin_neon_vld3q_dup_v: - Int = Intrinsic::arm_neon_vld3lane; - break; - case NEON::BI__builtin_neon_vld4_dup_v: - case NEON::BI__builtin_neon_vld4q_dup_v: - Int = Intrinsic::arm_neon_vld4lane; - break; - } - Function *F = CGM.getIntrinsic(Int, Ty); - llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); - - SmallVector<Value *, 6> Args; - Args.push_back(Ops[1]); - Args.append(STy->getNumElements(), UndefValue::get(Ty)); - - llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); - Args.push_back(CI); - Args.push_back(Align); - - Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); - // splat lane 0 to all elts in each vector of the result. - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Value *Val = Builder.CreateExtractValue(Ops[1], i); - Value *Elt = Builder.CreateBitCast(Val, Ty); - Elt = EmitNeonSplat(Elt, CI); - Elt = Builder.CreateBitCast(Elt, Val->getType()); - Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); - } - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); - } - - case NEON::BI__builtin_neon_vmul_lane_v: - case NEON::BI__builtin_neon_vmul_laneq_v: { - // v1f64 vmul_lane should be mapped to Neon scalar mul lane - bool Quad = false; - if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) - Quad = true; - Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); - llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); - Ops[1] = Builder.CreateBitCast(Ops[1], VTy); - Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); - Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); - return Builder.CreateBitCast(Result, Ty); - } - - // AArch64-only builtins - case NEON::BI__builtin_neon_vfmaq_laneq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfmaq_lane_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); - llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), - VTy->getNumElements() / 2); - Ops[2] = Builder.CreateBitCast(Ops[2], STy); - Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), - cast<ConstantInt>(Ops[3])); - Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); - - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfma_lane_v: { - llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); - // v1f64 fma should be mapped to Neon scalar f64 fma - if (VTy && VTy->getElementType() == DoubleTy) { - Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); - Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); - llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, false)); - Ops[2] = Builder.CreateBitCast(Ops[2], VTy); - Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); - Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - return Builder.CreateBitCast(Result, Ty); - } - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfma_laneq_v: { - llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); - // v1f64 fma should be mapped to Neon scalar f64 fma - if (VTy && VTy->getElementType() == DoubleTy) { - Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); - Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); - llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, true)); - Ops[2] = Builder.CreateBitCast(Ops[2], VTy); - Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); - Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - return Builder.CreateBitCast(Result, Ty); - } - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), - VTy->getNumElements() * 2); - Ops[2] = Builder.CreateBitCast(Ops[2], STy); - Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), - cast<ConstantInt>(Ops[3])); - Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); - - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfms_v: - case NEON::BI__builtin_neon_vfmsq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ops[1] = Builder.CreateFNeg(Ops[1]); - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - - // LLVM's fma intrinsic puts the accumulator in the last position, but the - // AArch64 intrinsic has it first. - return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - } - case NEON::BI__builtin_neon_vmaxnm_v: - case NEON::BI__builtin_neon_vmaxnmq_v: { - Int = Intrinsic::aarch64_neon_vmaxnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); - } - case NEON::BI__builtin_neon_vminnm_v: - case NEON::BI__builtin_neon_vminnmq_v: { - Int = Intrinsic::aarch64_neon_vminnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); - } - case NEON::BI__builtin_neon_vpmaxnm_v: - case NEON::BI__builtin_neon_vpmaxnmq_v: { - Int = Intrinsic::aarch64_neon_vpmaxnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); - } - case NEON::BI__builtin_neon_vpminnm_v: - case NEON::BI__builtin_neon_vpminnmq_v: { - Int = Intrinsic::aarch64_neon_vpminnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); - } - case NEON::BI__builtin_neon_vpmaxq_v: { - Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); - } - case NEON::BI__builtin_neon_vpminq_v: { - Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); - } - case NEON::BI__builtin_neon_vmulx_v: - case NEON::BI__builtin_neon_vmulxq_v: { - Int = Intrinsic::aarch64_neon_vmulx; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); - } - case NEON::BI__builtin_neon_vsqadd_v: - case NEON::BI__builtin_neon_vsqaddq_v: { - Int = Intrinsic::aarch64_neon_usqadd; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); - } - case NEON::BI__builtin_neon_vuqadd_v: - case NEON::BI__builtin_neon_vuqaddq_v: { - Int = Intrinsic::aarch64_neon_suqadd; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); - } - case NEON::BI__builtin_neon_vrbit_v: - case NEON::BI__builtin_neon_vrbitq_v: - Int = Intrinsic::aarch64_neon_rbit; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); - case NEON::BI__builtin_neon_vcvt_f32_f64: { - NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); - Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); - return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); - } - case NEON::BI__builtin_neon_vcvtx_f32_v: { - llvm::Type *EltTy = FloatTy; - llvm::Type *ResTy = llvm::VectorType::get(EltTy, 2); - llvm::Type *Tys[2] = { ResTy, Ty }; - Int = Intrinsic::aarch64_neon_vcvtxn; - return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtx_f32_f64"); - } - case NEON::BI__builtin_neon_vcvt_f64_f32: { - llvm::Type *OpTy = - GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false)); - Ops[0] = Builder.CreateBitCast(Ops[0], OpTy); - return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); - } - case NEON::BI__builtin_neon_vcvt_f64_v: - case NEON::BI__builtin_neon_vcvtq_f64_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); - return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") - : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); - } - case NEON::BI__builtin_neon_vrndn_v: - case NEON::BI__builtin_neon_vrndnq_v: { - Int = Intrinsic::aarch64_neon_frintn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); - } - case NEON::BI__builtin_neon_vrnda_v: - case NEON::BI__builtin_neon_vrndaq_v: { - Int = Intrinsic::round; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); - } - case NEON::BI__builtin_neon_vrndp_v: - case NEON::BI__builtin_neon_vrndpq_v: { - Int = Intrinsic::ceil; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); - } - case NEON::BI__builtin_neon_vrndm_v: - case NEON::BI__builtin_neon_vrndmq_v: { - Int = Intrinsic::floor; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); - } - case NEON::BI__builtin_neon_vrndx_v: - case NEON::BI__builtin_neon_vrndxq_v: { - Int = Intrinsic::rint; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); - } - case NEON::BI__builtin_neon_vrnd_v: - case NEON::BI__builtin_neon_vrndq_v: { - Int = Intrinsic::trunc; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd"); - } - case NEON::BI__builtin_neon_vrndi_v: - case NEON::BI__builtin_neon_vrndiq_v: { - Int = Intrinsic::nearbyint; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); - } - case NEON::BI__builtin_neon_vsqrt_v: - case NEON::BI__builtin_neon_vsqrtq_v: { - Int = Intrinsic::sqrt; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); - } - case NEON::BI__builtin_neon_vceqz_v: - case NEON::BI__builtin_neon_vceqzq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, - ICmpInst::ICMP_EQ, "vceqz"); - case NEON::BI__builtin_neon_vcgez_v: - case NEON::BI__builtin_neon_vcgezq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, - ICmpInst::ICMP_SGE, "vcgez"); - case NEON::BI__builtin_neon_vclez_v: - case NEON::BI__builtin_neon_vclezq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, - ICmpInst::ICMP_SLE, "vclez"); - case NEON::BI__builtin_neon_vcgtz_v: - case NEON::BI__builtin_neon_vcgtzq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, - ICmpInst::ICMP_SGT, "vcgtz"); - case NEON::BI__builtin_neon_vcltz_v: - case NEON::BI__builtin_neon_vcltzq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, - ICmpInst::ICMP_SLT, "vcltz"); - } -} - Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { unsigned HintID = static_cast<unsigned>(-1); |