diff options
| author | Hao Liu <Hao.Liu@arm.com> | 2013-09-04 09:29:13 +0000 |
|---|---|---|
| committer | Hao Liu <Hao.Liu@arm.com> | 2013-09-04 09:29:13 +0000 |
| commit | b1852eed38288964367b13830dfc455a95902c0e (patch) | |
| tree | 0a079bea9f05c53f8c1ad43991aac841cc7ea875 /clang/lib/CodeGen | |
| parent | d4aede098f9042513b881ec83aefd863e949ea3e (diff) | |
| download | bcm5719-llvm-b1852eed38288964367b13830dfc455a95902c0e.tar.gz bcm5719-llvm-b1852eed38288964367b13830dfc455a95902c0e.zip | |
Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions:
sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshr$
and 4 convert instructions:
scvtf,ucvtf,fcvtzs,fcvtzu
llvm-svn: 189926
Diffstat (limited to 'clang/lib/CodeGen')
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 147 |
1 files changed, 110 insertions, 37 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index febde9a322a..6bf5d6f54af 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1620,37 +1620,6 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, llvm_unreachable("Invalid NeonTypeFlags element type!"); } -static Value *EmitExtendedSHL(CodeGenFunction &CGF, - SmallVectorImpl<Value*> &Ops, - llvm::VectorType *VTy, bool usgn, bool isHigh) { - CGBuilderTy Builder = CGF.Builder; - if (isHigh){ - unsigned NumElts = VTy->getNumElements(); - unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); - llvm::Type *EltTy = - llvm::IntegerType::get(VTy->getContext(), EltBits / 2); - // The source operand type has twice as many elements of half the size. - llvm::Type *SrcTy = llvm::VectorType::get(EltTy, NumElts * 2); - SmallVector<Constant*, 8> Indices; - for (unsigned i = 0; i != NumElts; i++) - Indices.push_back(Builder.getInt32(i + NumElts)); - Value *SV = llvm::ConstantVector::get(Indices); - Value *Undef = llvm::UndefValue::get(SrcTy); - Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); - Ops[0] = Builder.CreateShuffleVector(Ops[0], Undef, SV); - } else { - llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); - Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); - } - - if (usgn) - Ops[0] = Builder.CreateZExt(Ops[0], VTy); - else - Ops[0] = Builder.CreateSExt(Ops[0], VTy); - Ops[1] = CGF.EmitNeonShiftVector(Ops[1], VTy, false); - return Builder.CreateShl(Ops[0], Ops[1], "vshl_n"); -} - Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); Value* SV = llvm::ConstantVector::getSplat(nElts, C); @@ -1893,18 +1862,122 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulh_v, E); case AArch64::BI__builtin_neon_vqrdmulhq_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulhq_v, E); + + // Shift by immediate + case AArch64::BI__builtin_neon_vshr_n_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshr_n_v, E); + case AArch64::BI__builtin_neon_vshrq_n_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshrq_n_v, E); + case AArch64::BI__builtin_neon_vrshr_n_v: + case AArch64::BI__builtin_neon_vrshrq_n_v: + Int = usgn ? Intrinsic::aarch64_neon_vurshr + : Intrinsic::aarch64_neon_vsrshr; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n"); + case AArch64::BI__builtin_neon_vsra_n_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsra_n_v, E); + case AArch64::BI__builtin_neon_vsraq_n_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsraq_n_v, E); + case AArch64::BI__builtin_neon_vrsra_n_v: + case AArch64::BI__builtin_neon_vrsraq_n_v: { + Ops[0] = Builder.CreateBitCast(Ops[0], Ty); + Ops[1] = Builder.CreateBitCast(Ops[1], Ty); + Int = usgn ? Intrinsic::aarch64_neon_vurshr + : Intrinsic::aarch64_neon_vsrshr; + Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); + return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); + } case AArch64::BI__builtin_neon_vshl_n_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_n_v, E); case AArch64::BI__builtin_neon_vshlq_n_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_n_v, E); + case AArch64::BI__builtin_neon_vqshl_n_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_n_v, E); + case AArch64::BI__builtin_neon_vqshlq_n_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_n_v, E); + case AArch64::BI__builtin_neon_vqshlu_n_v: + case AArch64::BI__builtin_neon_vqshluq_n_v: + Int = Intrinsic::aarch64_neon_vsqshlu; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n"); + case AArch64::BI__builtin_neon_vsri_n_v: + case AArch64::BI__builtin_neon_vsriq_n_v: + Int = Intrinsic::aarch64_neon_vsri; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsri_n"); + case AArch64::BI__builtin_neon_vsli_n_v: + case AArch64::BI__builtin_neon_vsliq_n_v: + Int = Intrinsic::aarch64_neon_vsli; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsli_n"); + case AArch64::BI__builtin_neon_vshll_n_v: { + llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); + Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); + if (usgn) + Ops[0] = Builder.CreateZExt(Ops[0], VTy); + else + Ops[0] = Builder.CreateSExt(Ops[0], VTy); + Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); + return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); + } + case AArch64::BI__builtin_neon_vshrn_n_v: { + llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); + Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); + Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); + if (usgn) + Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); + else + Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); + return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); + } + case AArch64::BI__builtin_neon_vqshrun_n_v: + Int = Intrinsic::aarch64_neon_vsqshrun; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); + case AArch64::BI__builtin_neon_vrshrn_n_v: + Int = Intrinsic::aarch64_neon_vrshrn; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); + case AArch64::BI__builtin_neon_vqrshrun_n_v: + Int = Intrinsic::aarch64_neon_vsqrshrun; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); + case AArch64::BI__builtin_neon_vqshrn_n_v: + Int = usgn ? Intrinsic::aarch64_neon_vuqshrn + : Intrinsic::aarch64_neon_vsqshrn; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); + case AArch64::BI__builtin_neon_vqrshrn_n_v: + Int = usgn ? Intrinsic::aarch64_neon_vuqrshrn + : Intrinsic::aarch64_neon_vsqrshrn; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); + + // Convert case AArch64::BI__builtin_neon_vmovl_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmovl_v, E); - case AArch64::BI__builtin_neon_vshll_n_v: - return EmitExtendedSHL(*this, Ops, VTy, usgn, false); - case AArch64::BI__builtin_neon_vmovl_high_v: - Ops.push_back(ConstantInt::get(Int32Ty, 0)); - case AArch64::BI__builtin_neon_vshll_high_n_v: - return EmitExtendedSHL(*this, Ops, VTy, usgn, true); + case AArch64::BI__builtin_neon_vcvt_n_f32_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_f32_v, E); + case AArch64::BI__builtin_neon_vcvtq_n_f32_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_f32_v, E); + case AArch64::BI__builtin_neon_vcvtq_n_f64_v: { + llvm::Type *FloatTy = + GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); + llvm::Type *Tys[2] = { FloatTy, Ty }; + Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp + : Intrinsic::arm_neon_vcvtfxs2fp; + Function *F = CGM.getIntrinsic(Int, Tys); + return EmitNeonCall(F, Ops, "vcvt_n"); + } + case AArch64::BI__builtin_neon_vcvt_n_s32_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_s32_v, E); + case AArch64::BI__builtin_neon_vcvtq_n_s32_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_s32_v, E); + case AArch64::BI__builtin_neon_vcvt_n_u32_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_u32_v, E); + case AArch64::BI__builtin_neon_vcvtq_n_u32_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_u32_v, E); + case AArch64::BI__builtin_neon_vcvtq_n_s64_v: + case AArch64::BI__builtin_neon_vcvtq_n_u64_v: { + llvm::Type *FloatTy = + GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); + llvm::Type *Tys[2] = { Ty, FloatTy }; + Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu + : Intrinsic::arm_neon_vcvtfp2fxs; + Function *F = CGM.getIntrinsic(Int, Tys); + return EmitNeonCall(F, Ops, "vcvt_n"); + } // AArch64-only builtins case AArch64::BI__builtin_neon_vfms_v: |

