diff options
Diffstat (limited to 'clang')
-rw-r--r-- | clang/lib/CodeGen/TargetInfo.cpp | 152 | ||||
-rw-r--r-- | clang/test/CodeGen/arm-aapcs-vfp.c | 10 | ||||
-rw-r--r-- | clang/test/CodeGen/arm-homogenous.c | 10 | ||||
-rw-r--r-- | clang/test/CodeGen/arm64-aapcs-arguments.c | 6 | ||||
-rw-r--r-- | clang/test/CodeGen/arm64-arguments.c | 15 | ||||
-rw-r--r-- | clang/test/CodeGenCXX/homogeneous-aggregates.cpp | 39 |
6 files changed, 57 insertions, 175 deletions
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 6e2c83e1fc4..cf7050757e8 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -3813,9 +3813,7 @@ private: bool isDarwinPCS() const { return Kind == DarwinPCS; } ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP, - bool &IsHA, unsigned &AllocatedGPR, - bool &IsSmallAggr, bool IsNamedArg) const; + ABIArgInfo classifyArgumentType(QualType RetTy) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; @@ -3823,68 +3821,11 @@ private: bool isIllegalVectorType(QualType Ty) const; void computeInfo(CGFunctionInfo &FI) const override { - // To correctly handle Homogeneous Aggregate, we need to keep track of the - // number of SIMD and Floating-point registers allocated so far. - // If the argument is an HFA or an HVA and there are sufficient unallocated - // SIMD and Floating-point registers, then the argument is allocated to SIMD - // and Floating-point Registers (with one register per member of the HFA or - // HVA). Otherwise, the NSRN is set to 8. - unsigned AllocatedVFP = 0; - - // To correctly handle small aggregates, we need to keep track of the number - // of GPRs allocated so far. If the small aggregate can't all fit into - // registers, it will be on stack. We don't allow the aggregate to be - // partially in registers. - unsigned AllocatedGPR = 0; - - // Find the number of named arguments. Variadic arguments get special - // treatment with the Darwin ABI. - unsigned NumRequiredArgs = FI.getNumRequiredArgs(); - if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - unsigned ArgNo = 0; - for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); - it != ie; ++it, ++ArgNo) { - unsigned PreAllocation = AllocatedVFP, PreGPR = AllocatedGPR; - bool IsHA = false, IsSmallAggr = false; - const unsigned NumVFPs = 8; - const unsigned NumGPRs = 8; - bool IsNamedArg = ArgNo < NumRequiredArgs; - it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA, - AllocatedGPR, IsSmallAggr, IsNamedArg); - - // Under AAPCS the 64-bit stack slot alignment means we can't pass HAs - // as sequences of floats since they'll get "holes" inserted as - // padding by the back end. - if (IsHA && AllocatedVFP > NumVFPs && !isDarwinPCS() && - getContext().getTypeAlign(it->type) < 64) { - uint32_t NumStackSlots = getContext().getTypeSize(it->type); - NumStackSlots = llvm::RoundUpToAlignment(NumStackSlots, 64) / 64; - - llvm::Type *CoerceTy = llvm::ArrayType::get( - llvm::Type::getDoubleTy(getVMContext()), NumStackSlots); - it->info = ABIArgInfo::getDirect(CoerceTy); - } - // If we do not have enough VFP registers for the HA, any VFP registers - // that are unallocated are marked as unavailable. To achieve this, we add - // padding of (NumVFPs - PreAllocation) floats. - if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) { - llvm::Type *PaddingTy = llvm::ArrayType::get( - llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation); - it->info.setPaddingType(PaddingTy); - } - - // If we do not have enough GPRs for the small aggregate, any GPR regs - // that are unallocated are marked as unavailable. - if (IsSmallAggr && AllocatedGPR > NumGPRs && PreGPR < NumGPRs) { - llvm::Type *PaddingTy = llvm::ArrayType::get( - llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreGPR); - it->info = - ABIArgInfo::getDirect(it->info.getCoerceToType(), 0, PaddingTy); - } - } + for (auto &it : FI.arguments()) + it.info = classifyArgumentType(it.type); } llvm::Value *EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty, @@ -3915,12 +3856,7 @@ public: }; } -ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, - unsigned &AllocatedVFP, - bool &IsHA, - unsigned &AllocatedGPR, - bool &IsSmallAggr, - bool IsNamedArg) const { +ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); // Handle illegal vector types here. @@ -3928,48 +3864,26 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 32) { llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext()); - AllocatedGPR++; return ABIArgInfo::getDirect(ResType); } if (Size == 64) { llvm::Type *ResType = llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2); - AllocatedVFP++; return ABIArgInfo::getDirect(ResType); } if (Size == 128) { llvm::Type *ResType = llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4); - AllocatedVFP++; return ABIArgInfo::getDirect(ResType); } - AllocatedGPR++; return ABIArgInfo::getIndirect(0, /*ByVal=*/false); } - if (Ty->isVectorType()) - // Size of a legal vector should be either 64 or 128. - AllocatedVFP++; - if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { - if (BT->getKind() == BuiltinType::Half || - BT->getKind() == BuiltinType::Float || - BT->getKind() == BuiltinType::Double || - BT->getKind() == BuiltinType::LongDouble) - AllocatedVFP++; - } if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); - if (!Ty->isFloatingType() && !Ty->isVectorType()) { - unsigned Alignment = getContext().getTypeAlign(Ty); - if (!isDarwinPCS() && Alignment > 64) - AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64); - - int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1; - AllocatedGPR += RegsNeeded; - } return (Ty->isPromotableIntegerType() && isDarwinPCS() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); @@ -3978,9 +3892,8 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, // Structures with either a non-trivial destructor or a non-trivial // copy constructor are always indirect. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { - AllocatedGPR++; return ABIArgInfo::getIndirect(0, /*ByVal=*/RAA == - CGCXXABI::RAA_DirectInMemory); + CGCXXABI::RAA_DirectInMemory); } // Empty records are always ignored on Darwin, but actually passed in C++ mode @@ -3989,7 +3902,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS()) return ABIArgInfo::getIgnore(); - ++AllocatedGPR; return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); } @@ -3997,28 +3909,16 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { - IsHA = true; - if (!IsNamedArg && isDarwinPCS()) { - // With the Darwin ABI, variadic arguments are always passed on the stack - // and should not be expanded. Treat variadic HFAs as arrays of doubles. - uint64_t Size = getContext().getTypeSize(Ty); - llvm::Type *BaseTy = llvm::Type::getDoubleTy(getVMContext()); - return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64)); - } - AllocatedVFP += Members; - return ABIArgInfo::getExpand(); + return ABIArgInfo::getDirect( + llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members)); } // Aggregates <= 16 bytes are passed directly in registers or on the stack. uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 128) { unsigned Alignment = getContext().getTypeAlign(Ty); - if (!isDarwinPCS() && Alignment > 64) - AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64); - Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes - AllocatedGPR += Size / 64; - IsSmallAggr = true; + // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. if (Alignment < 128 && Size == 128) { @@ -4028,7 +3928,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); } - AllocatedGPR++; return ABIArgInfo::getIndirect(0, /*ByVal=*/false); } @@ -4104,14 +4003,25 @@ bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, return Members <= 4; } -llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty, - CodeGenFunction &CGF) const { - unsigned AllocatedGPR = 0, AllocatedVFP = 0; - bool IsHA = false, IsSmallAggr = false; - ABIArgInfo AI = classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR, - IsSmallAggr, false /*IsNamedArg*/); +llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, + QualType Ty, + CodeGenFunction &CGF) const { + ABIArgInfo AI = classifyArgumentType(Ty); bool IsIndirect = AI.isIndirect(); + llvm::Type *BaseTy = CGF.ConvertType(Ty); + if (IsIndirect) + BaseTy = llvm::PointerType::getUnqual(BaseTy); + else if (AI.getCoerceToType()) + BaseTy = AI.getCoerceToType(); + + unsigned NumRegs = 1; + if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) { + BaseTy = ArrTy->getElementType(); + NumRegs = ArrTy->getNumElements(); + } + bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy(); + // The AArch64 va_list type and handling is specified in the Procedure Call // Standard, section B.4: // @@ -4131,21 +4041,19 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty llvm::Value *reg_offs_p = nullptr, *reg_offs = nullptr; int reg_top_index; - int RegSize; - if (AllocatedGPR) { - assert(!AllocatedVFP && "Arguments never split between int & VFP regs"); + int RegSize = IsIndirect ? 8 : getContext().getTypeSize(Ty) / 8; + if (!IsFPR) { // 3 is the field number of __gr_offs reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p"); reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs"); reg_top_index = 1; // field number for __gr_top - RegSize = 8 * AllocatedGPR; + RegSize = llvm::RoundUpToAlignment(RegSize, 8); } else { - assert(!AllocatedGPR && "Argument must go in VFP or int regs"); // 4 is the field number of __vr_offs. reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p"); reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs"); reg_top_index = 2; // field number for __vr_top - RegSize = 16 * AllocatedVFP; + RegSize = 16 * NumRegs; } //======================================= @@ -4169,7 +4077,7 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty // Integer arguments may need to correct register alignment (for example a // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we // align __gr_offs to calculate the potential address. - if (AllocatedGPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) { + if (!IsFPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) { int Align = Ctx.getTypeAlign(Ty) / 8; reg_offs = CGF.Builder.CreateAdd( diff --git a/clang/test/CodeGen/arm-aapcs-vfp.c b/clang/test/CodeGen/arm-aapcs-vfp.c index da1e675aba8..7ef7c4e52ed 100644 --- a/clang/test/CodeGen/arm-aapcs-vfp.c +++ b/clang/test/CodeGen/arm-aapcs-vfp.c @@ -29,7 +29,7 @@ struct homogeneous_struct { float f4; }; // CHECK: define arm_aapcs_vfpcc %struct.homogeneous_struct @test_struct(%struct.homogeneous_struct %{{.*}}) -// CHECK64: define %struct.homogeneous_struct @test_struct(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) +// CHECK64: define %struct.homogeneous_struct @test_struct([4 x float] %{{.*}}) extern struct homogeneous_struct struct_callee(struct homogeneous_struct); struct homogeneous_struct test_struct(struct homogeneous_struct arg) { return struct_callee(arg); @@ -44,7 +44,7 @@ struct nested_array { double d[4]; }; // CHECK: define arm_aapcs_vfpcc void @test_array(%struct.nested_array %{{.*}}) -// CHECK64: define void @test_array(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}) +// CHECK64: define void @test_array([4 x double] %{{.*}}) extern void array_callee(struct nested_array); void test_array(struct nested_array arg) { array_callee(arg); @@ -52,7 +52,7 @@ void test_array(struct nested_array arg) { extern void complex_callee(__complex__ double); // CHECK: define arm_aapcs_vfpcc void @test_complex({ double, double } %{{.*}}) -// CHECK64: define void @test_complex(double %{{.*}}, double %{{.*}}) +// CHECK64: define void @test_complex([2 x double] %cd.coerce) void test_complex(__complex__ double cd) { complex_callee(cd); } @@ -98,7 +98,7 @@ void test_hetero(struct heterogeneous_struct arg) { // Neon multi-vector types are homogeneous aggregates. // CHECK: define arm_aapcs_vfpcc <16 x i8> @f0(%struct.int8x16x4_t %{{.*}}) -// CHECK64: define <16 x i8> @f0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) +// CHECK64: define <16 x i8> @f0([4 x <16 x i8>] %{{.*}}) int8x16_t f0(int8x16x4_t v4) { return vaddq_s8(v4.val[0], v4.val[3]); } @@ -112,7 +112,7 @@ struct neon_struct { int16x4_t v4; }; // CHECK: define arm_aapcs_vfpcc void @test_neon(%struct.neon_struct %{{.*}}) -// CHECK64: define void @test_neon(<8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <2 x i32> %{{.*}}, <4 x i16> %{{.*}}) +// CHECK64: define void @test_neon([4 x <8 x i8>] %{{.*}}) extern void neon_callee(struct neon_struct); void test_neon(struct neon_struct arg) { neon_callee(arg); diff --git a/clang/test/CodeGen/arm-homogenous.c b/clang/test/CodeGen/arm-homogenous.c index 2ab6c105a59..3426d995cae 100644 --- a/clang/test/CodeGen/arm-homogenous.c +++ b/clang/test/CodeGen/arm-homogenous.c @@ -5,7 +5,7 @@ // RUN: -ffreestanding -emit-llvm -w -o - %s | FileCheck -check-prefix=CHECK64 %s // RUN: %clang_cc1 -triple arm64-linux-gnu -ffreestanding -emit-llvm -w -o - %s \ -// RUN: | FileCheck --check-prefix=CHECK64-AAPCS %s +// RUN: | FileCheck --check-prefix=CHECK64 %s typedef long long int64_t; typedef unsigned int uint32_t; @@ -176,9 +176,7 @@ void test_struct_of_four_doubles(void) { // CHECK: test_struct_of_four_doubles // CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, %struct.struct_of_four_doubles {{.*}}, %struct.struct_of_four_doubles {{.*}}, double {{.*}}) // CHECK64: test_struct_of_four_doubles -// CHECK64: call void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [3 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}) -// CHECK64-AAPCS: test_struct_of_four_doubles -// CHECK64-AAPCS: call void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [3 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}) +// CHECK64: call void @takes_struct_of_four_doubles(double {{.*}}, [4 x double] {{.*}}, [4 x double] {{.*}}, double {{.*}}) takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0); } @@ -212,9 +210,7 @@ void test_struct_of_vecs(void) { // CHECK: test_struct_of_vecs // CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, %struct.struct_of_vecs {{.*}}, %struct.struct_of_vecs {{.*}}, double {{.*}}) // CHECK64: test_struct_of_vecs -// CHECK64: call void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [3 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}}) -// CHECK64-AAPCS: test_struct_of_vecs -// CHECK64-AAPCS: call void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [3 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}}) +// CHECK64: call void @takes_struct_of_vecs(double {{.*}}, [4 x <8 x i8>] {{.*}}, [4 x <8 x i8>] {{.*}}, double {{.*}}) takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0); } diff --git a/clang/test/CodeGen/arm64-aapcs-arguments.c b/clang/test/CodeGen/arm64-aapcs-arguments.c index 38ac522de5d..ab302d40704 100644 --- a/clang/test/CodeGen/arm64-aapcs-arguments.c +++ b/clang/test/CodeGen/arm64-aapcs-arguments.c @@ -17,7 +17,7 @@ void test2(int x0, Small x2_x3, int x4, Small x6_x7, int sp, Small sp16) { // stack in order to avoid holes. Make sure we get all of them, and not just the // first: -// CHECK: void @test3(float %s0_s3.0, float %s0_s3.1, float %s0_s3.2, float %s0_s3.3, float %s4, [3 x float], [2 x double] %sp.coerce, [2 x double] %sp16.coerce) +// CHECK: void @test3([4 x float] %s0_s3.coerce, float %s4, [4 x float] %sp.coerce, [4 x float] %sp16.coerce) typedef struct { float arr[4]; } HFA; void test3(HFA s0_s3, float s4, HFA sp, HFA sp16) { } @@ -28,7 +28,7 @@ void test3(HFA s0_s3, float s4, HFA sp, HFA sp16) { // fp128] or something, but leaving them as-is retains more information for // users to debug. -// CHECK: void @test4(<16 x i8> %v0_v2.0, <16 x i8> %v0_v2.1, <16 x i8> %v0_v2.2, <16 x i8> %v3_v5.0, <16 x i8> %v3_v5.1, <16 x i8> %v3_v5.2, [2 x float], <16 x i8> %sp.0, <16 x i8> %sp.1, <16 x i8> %sp.2, double %sp48, <16 x i8> %sp64.0, <16 x i8> %sp64.1, <16 x i8> %sp64.2) +// CHECK: void @test4([3 x <16 x i8>] %v0_v2.coerce, [3 x <16 x i8>] %v3_v5.coerce, [3 x <16 x i8>] %sp.coerce, double %sp48, [3 x <16 x i8>] %sp64.coerce) typedef __attribute__((neon_vector_type(16))) signed char int8x16_t; typedef struct { int8x16_t arr[3]; } BigHFA; void test4(BigHFA v0_v2, BigHFA v3_v5, BigHFA sp, double sp48, BigHFA sp64) { @@ -46,6 +46,6 @@ unsigned char test5(unsigned char a, signed short b) { __fp16 test_half(__fp16 A) { } // __fp16 is a base type for homogeneous floating-point aggregates for AArch64 (but not 32-bit ARM). -// CHECK: define %struct.HFA_half @test_half_hfa(half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}) +// CHECK: define %struct.HFA_half @test_half_hfa([4 x half] %{{.*}}) struct HFA_half { __fp16 a[4]; }; struct HFA_half test_half_hfa(struct HFA_half A) { } diff --git a/clang/test/CodeGen/arm64-arguments.c b/clang/test/CodeGen/arm64-arguments.c index b2de08dbe68..ae1ff98800a 100644 --- a/clang/test/CodeGen/arm64-arguments.c +++ b/clang/test/CodeGen/arm64-arguments.c @@ -123,8 +123,7 @@ void f31(struct s31 s) { } struct s32 { double x; }; void f32(struct s32 s) { } -// Expand Homogeneous Aggregate. -// CHECK: @f32(double %{{.*}}) +// CHECK: @f32([1 x double] %{{.*}}) // A composite type larger than 16 bytes should be passed indirectly. struct s33 { char buf[32*32]; }; @@ -197,7 +196,7 @@ typedef struct s35 s35_with_align; typedef __attribute__((neon_vector_type(4))) float float32x4_t; float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) { -// CHECK: define <4 x float> @f35(i32 %i, float %s1.0, float %s1.1, float %s1.2, float %s1.3, float %s2.0, float %s2.1, float %s2.2, float %s2.3) +// CHECK: define <4 x float> @f35(i32 %i, [4 x float] %s1.coerce, [4 x float] %s2.coerce) // CHECK: %s1 = alloca %struct.s35, align 16 // CHECK: %s2 = alloca %struct.s35, align 16 // CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>* @@ -598,24 +597,24 @@ int caller43_stack() { __attribute__ ((noinline)) int f40_split(int i, int i2, int i3, int i4, int i5, int i6, int i7, s40_no_align s1, s40_no_align s2) { -// CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) +// CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s; } int caller40_split() { // CHECK: define i32 @caller40_split() -// CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, [2 x i64] %{{.*}} [2 x i64] %{{.*}}) +// CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [2 x i64] %{{.*}} [2 x i64] %{{.*}}) return f40_split(1, 2, 3, 4, 5, 6, 7, g40, g40_2); } __attribute__ ((noinline)) int f41_split(int i, int i2, int i3, int i4, int i5, int i6, int i7, s41_with_align s1, s41_with_align s2) { -// CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], i128 %s1.coerce, i128 %s2.coerce) +// CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i128 %s1.coerce, i128 %s2.coerce) return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s; } int caller41_split() { // CHECK: define i32 @caller41_split() -// CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, i128 %{{.*}}, i128 %{{.*}}) +// CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 %{{.*}}, i128 %{{.*}}) return f41_split(1, 2, 3, 4, 5, 6, 7, g41, g41_2); } @@ -642,7 +641,7 @@ float test_hfa(int n, ...) { float test_hfa_call(struct HFA *a) { // CHECK-LABEL: define float @test_hfa_call(%struct.HFA* %a) -// CHECK: call float (i32, ...)* @test_hfa(i32 1, [2 x double] {{.*}}) +// CHECK: call float (i32, ...)* @test_hfa(i32 1, [4 x float] {{.*}}) test_hfa(1, *a); } diff --git a/clang/test/CodeGenCXX/homogeneous-aggregates.cpp b/clang/test/CodeGenCXX/homogeneous-aggregates.cpp index 4800aacbfe5..77c6b3a527b 100644 --- a/clang/test/CodeGenCXX/homogeneous-aggregates.cpp +++ b/clang/test/CodeGenCXX/homogeneous-aggregates.cpp @@ -46,7 +46,7 @@ D1 CC func_D1(D1 x) { return x; } // PPC: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce) // ARM32: define arm_aapcs_vfpcc %struct.D2 @_Z7func_D22D2(%struct.D2 %x.coerce) -// ARM64: define %struct.D2 @_Z7func_D22D2(double %x.0, double %x.1, double %x.2) +// ARM64: define %struct.D2 @_Z7func_D22D2([3 x double] %x.coerce) // X64: define x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(double %x.0, double %x.1, double %x.2) D2 CC func_D2(D2 x) { return x; } @@ -57,7 +57,7 @@ D3 CC func_D3(D3 x) { return x; } // PPC: define [4 x double] @_Z7func_D42D4([4 x double] %x.coerce) // ARM32: define arm_aapcs_vfpcc %struct.D4 @_Z7func_D42D4(%struct.D4 %x.coerce) -// ARM64: define %struct.D4 @_Z7func_D42D4(double %x.0, double %x.1, double %x.2, double %x.3) +// ARM64: define %struct.D4 @_Z7func_D42D4([4 x double] %x.coerce) D4 CC func_D4(D4 x) { return x; } D5 CC func_D5(D5 x) { return x; } @@ -67,17 +67,9 @@ D5 CC func_D5(D5 x) { return x; } // The C++ multiple inheritance expansion case is a little more complicated, so // do some extra checking. // -// ARM64-LABEL: define %struct.D5 @_Z7func_D52D5(double %x.0, double %x.1, double %x.2) -// ARM64: bitcast %struct.D5* %{{.*}} to %struct.I1* -// ARM64: bitcast %struct.I1* %{{.*}} to %struct.Base2* -// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0 -// ARM64: store double %x.0, double* -// ARM64: getelementptr inbounds i8* %{{.*}}, i64 8 -// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0 -// ARM64: store double %x.1, double* -// ARM64: getelementptr inbounds i8* %{{.*}}, i64 16 -// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0 -// ARM64: store double %x.2, double* +// ARM64-LABEL: define %struct.D5 @_Z7func_D52D5([3 x double] %x.coerce) +// ARM64: bitcast %struct.D5* %{{.*}} to [3 x double]* +// ARM64: store [3 x double] %x.coerce, [3 x double]* void call_D5(D5 *p) { func_D5(*p); @@ -86,21 +78,8 @@ void call_D5(D5 *p) { // Check the call site. // // ARM64-LABEL: define void @_Z7call_D5P2D5(%struct.D5* %p) -// ARM64: bitcast %struct.D5* %{{.*}} to %struct.I1* -// ARM64: bitcast %struct.I1* %{{.*}} to %struct.Base2* -// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0 -// ARM64: load double* -// ARM64: getelementptr inbounds i8* %{{.*}}, i64 8 -// ARM64: bitcast i8* %{{.*}} to %struct.I2* -// ARM64: bitcast %struct.I2* %{{.*}} to %struct.Base2* -// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0 -// ARM64: load double* -// ARM64: getelementptr inbounds i8* %{{.*}}, i64 16 -// ARM64: bitcast i8* %{{.*}} to %struct.I3* -// ARM64: bitcast %struct.I3* %{{.*}} to %struct.Base2* -// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0 -// ARM64: load double* -// ARM64: call %struct.D5 @_Z7func_D52D5(double %{{.*}}, double %{{.*}}, double %{{.*}}) +// ARM64: load [3 x double]* +// ARM64: call %struct.D5 @_Z7func_D52D5([3 x double] %{{.*}}) struct Empty { }; struct Float1 { float x; }; @@ -108,7 +87,7 @@ struct Float2 { float y; }; struct HVAWithEmptyBase : Float1, Empty, Float2 { float z; }; // PPC: define void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] %a.coerce) -// ARM64: define void @_Z15with_empty_base16HVAWithEmptyBase(float %a.0, float %a.1, float %a.2) +// ARM64: define void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] %a.coerce) // ARM32: define arm_aapcs_vfpcc void @_Z15with_empty_base16HVAWithEmptyBase(%struct.HVAWithEmptyBase %a.coerce) void CC with_empty_base(HVAWithEmptyBase a) {} @@ -121,7 +100,7 @@ struct HVAWithEmptyBitField : Float1, Float2 { }; // PPC: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce) -// ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField(float %a.0, float %a.1, float %a.2) +// ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce) // ARM32: define arm_aapcs_vfpcc void @_Z19with_empty_bitfield20HVAWithEmptyBitField(%struct.HVAWithEmptyBitField %a.coerce) // X64: define x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(float %a.0, float %a.1, float %a.2) void CC with_empty_bitfield(HVAWithEmptyBitField a) {} |