diff options
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 79 |
1 files changed, 16 insertions, 63 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 09edfa7568f..309528404fa 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4081,16 +4081,22 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0), NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0), NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0), + NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0), NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), NEONMAP1(vld2_v, arm_neon_vld2, 0), + NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0), NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), NEONMAP1(vld2q_v, arm_neon_vld2, 0), + NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0), NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), NEONMAP1(vld3_v, arm_neon_vld3, 0), + NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0), NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), NEONMAP1(vld3q_v, arm_neon_vld3, 0), + NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0), NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), NEONMAP1(vld4_v, arm_neon_vld4, 0), + NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0), NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), NEONMAP1(vld4q_v, arm_neon_vld4, 0), NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), @@ -4980,7 +4986,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vld3_v: case NEON::BI__builtin_neon_vld3q_v: case NEON::BI__builtin_neon_vld4_v: - case NEON::BI__builtin_neon_vld4q_v: { + case NEON::BI__builtin_neon_vld4q_v: + case NEON::BI__builtin_neon_vld2_dup_v: + case NEON::BI__builtin_neon_vld2q_dup_v: + case NEON::BI__builtin_neon_vld3_dup_v: + case NEON::BI__builtin_neon_vld3q_dup_v: + case NEON::BI__builtin_neon_vld4_dup_v: + case NEON::BI__builtin_neon_vld4q_dup_v: { llvm::Type *Tys[] = {Ty, Int8PtrTy}; Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); Value *Align = getAlignmentValue32(PtrOp1); @@ -5866,8 +5878,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vld4_lane_v: case NEON::BI__builtin_neon_vld4q_lane_v: case NEON::BI__builtin_neon_vld2_dup_v: + case NEON::BI__builtin_neon_vld2q_dup_v: case NEON::BI__builtin_neon_vld3_dup_v: + case NEON::BI__builtin_neon_vld3q_dup_v: case NEON::BI__builtin_neon_vld4_dup_v: + case NEON::BI__builtin_neon_vld4q_dup_v: // Get the alignment for the argument in addition to the value; // we'll use it later. PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); @@ -6044,68 +6059,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Value *Ld = Builder.CreateLoad(PtrOp0); return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); } - case NEON::BI__builtin_neon_vld2_dup_v: - case NEON::BI__builtin_neon_vld3_dup_v: - case NEON::BI__builtin_neon_vld4_dup_v: { - // Handle 64-bit elements as a special-case. There is no "dup" needed. - if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_dup_v: - Int = Intrinsic::arm_neon_vld2; - break; - case NEON::BI__builtin_neon_vld3_dup_v: - Int = Intrinsic::arm_neon_vld3; - break; - case NEON::BI__builtin_neon_vld4_dup_v: - Int = Intrinsic::arm_neon_vld4; - break; - default: llvm_unreachable("unknown vld_dup intrinsic?"); - } - llvm::Type *Tys[] = {Ty, Int8PtrTy}; - Function *F = CGM.getIntrinsic(Int, Tys); - llvm::Value *Align = getAlignmentValue32(PtrOp1); - Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); - } - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_dup_v: - Int = Intrinsic::arm_neon_vld2lane; - break; - case NEON::BI__builtin_neon_vld3_dup_v: - Int = Intrinsic::arm_neon_vld3lane; - break; - case NEON::BI__builtin_neon_vld4_dup_v: - Int = Intrinsic::arm_neon_vld4lane; - break; - default: llvm_unreachable("unknown vld_dup intrinsic?"); - } - llvm::Type *Tys[] = {Ty, Int8PtrTy}; - Function *F = CGM.getIntrinsic(Int, Tys); - llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); - - SmallVector<Value*, 6> Args; - Args.push_back(Ops[1]); - Args.append(STy->getNumElements(), UndefValue::get(Ty)); - - llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); - Args.push_back(CI); - Args.push_back(getAlignmentValue32(PtrOp1)); - - Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); - // splat lane 0 to all elts in each vector of the result. - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Value *Val = Builder.CreateExtractValue(Ops[1], i); - Value *Elt = Builder.CreateBitCast(Val, Ty); - Elt = EmitNeonSplat(Elt, CI); - Elt = Builder.CreateBitCast(Elt, Val->getType()); - Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); - } - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); - } case NEON::BI__builtin_neon_vqrshrn_n_v: Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; |