6 files changed, 57 insertions, 175 deletions
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 6e2c83e1fc4..cf7050757e8 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -3813,9 +3813,7 @@ private:
   bool isDarwinPCS() const { return Kind == DarwinPCS; }
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
-                                  bool &IsHA, unsigned &AllocatedGPR,
-                                  bool &IsSmallAggr, bool IsNamedArg) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy) const;
   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
                                          uint64_t Members) const override;
@@ -3823,68 +3821,11 @@ private:
   bool isIllegalVectorType(QualType Ty) const;
 
   void computeInfo(CGFunctionInfo &FI) const override {
-    // To correctly handle Homogeneous Aggregate, we need to keep track of the
-    // number of SIMD and Floating-point registers allocated so far.
-    // If the argument is an HFA or an HVA and there are sufficient unallocated
-    // SIMD and Floating-point registers, then the argument is allocated to SIMD
-    // and Floating-point Registers (with one register per member of the HFA or
-    // HVA). Otherwise, the NSRN is set to 8.
-    unsigned AllocatedVFP = 0;
-
-    // To correctly handle small aggregates, we need to keep track of the number
-    // of GPRs allocated so far. If the small aggregate can't all fit into
-    // registers, it will be on stack. We don't allow the aggregate to be
-    // partially in registers.
-    unsigned AllocatedGPR = 0;
-
-    // Find the number of named arguments. Variadic arguments get special
-    // treatment with the Darwin ABI.
-    unsigned NumRequiredArgs = FI.getNumRequiredArgs();
-
     if (!getCXXABI().classifyReturnType(FI))
       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
-    unsigned ArgNo = 0;
-    for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-         it != ie; ++it, ++ArgNo) {
-      unsigned PreAllocation = AllocatedVFP, PreGPR = AllocatedGPR;
-      bool IsHA = false, IsSmallAggr = false;
-      const unsigned NumVFPs = 8;
-      const unsigned NumGPRs = 8;
-      bool IsNamedArg = ArgNo < NumRequiredArgs;
-      it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA,
-                                      AllocatedGPR, IsSmallAggr, IsNamedArg);
-
-      // Under AAPCS the 64-bit stack slot alignment means we can't pass HAs
-      // as sequences of floats since they'll get "holes" inserted as
-      // padding by the back end.
-      if (IsHA && AllocatedVFP > NumVFPs && !isDarwinPCS() &&
-          getContext().getTypeAlign(it->type) < 64) {
-        uint32_t NumStackSlots = getContext().getTypeSize(it->type);
-        NumStackSlots = llvm::RoundUpToAlignment(NumStackSlots, 64) / 64;
-
-        llvm::Type *CoerceTy = llvm::ArrayType::get(
-            llvm::Type::getDoubleTy(getVMContext()), NumStackSlots);
-        it->info = ABIArgInfo::getDirect(CoerceTy);
-      }
 
-      // If we do not have enough VFP registers for the HA, any VFP registers
-      // that are unallocated are marked as unavailable. To achieve this, we add
-      // padding of (NumVFPs - PreAllocation) floats.
-      if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
-        llvm::Type *PaddingTy = llvm::ArrayType::get(
-            llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
-        it->info.setPaddingType(PaddingTy);
-      }
-
-      // If we do not have enough GPRs for the small aggregate, any GPR regs
-      // that are unallocated are marked as unavailable.
-      if (IsSmallAggr && AllocatedGPR > NumGPRs && PreGPR < NumGPRs) {
-        llvm::Type *PaddingTy = llvm::ArrayType::get(
-            llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreGPR);
-        it->info =
-            ABIArgInfo::getDirect(it->info.getCoerceToType(), 0, PaddingTy);
-      }
-    }
+    for (auto &it : FI.arguments())
+      it.info = classifyArgumentType(it.type);
   }
 
   llvm::Value *EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -3915,12 +3856,7 @@ public:
 };
 }
 
-ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
-                                                unsigned &AllocatedVFP,
-                                                bool &IsHA,
-                                                unsigned &AllocatedGPR,
-                                                bool &IsSmallAggr,
-                                                bool IsNamedArg) const {
+ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
   // Handle illegal vector types here.
@@ -3928,48 +3864,26 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
     uint64_t Size = getContext().getTypeSize(Ty);
     if (Size <= 32) {
       llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
-      AllocatedGPR++;
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 64) {
       llvm::Type *ResType =
           llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
-      AllocatedVFP++;
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 128) {
       llvm::Type *ResType =
           llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
-      AllocatedVFP++;
       return ABIArgInfo::getDirect(ResType);
     }
-    AllocatedGPR++;
     return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
   }
-  if (Ty->isVectorType())
-    // Size of a legal vector should be either 64 or 128.
-    AllocatedVFP++;
-  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
-    if (BT->getKind() == BuiltinType::Half ||
-        BT->getKind() == BuiltinType::Float ||
-        BT->getKind() == BuiltinType::Double ||
-        BT->getKind() == BuiltinType::LongDouble)
-      AllocatedVFP++;
-  }
 
   if (!isAggregateTypeForABI(Ty)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
       Ty = EnumTy->getDecl()->getIntegerType();
 
-    if (!Ty->isFloatingType() && !Ty->isVectorType()) {
-      unsigned Alignment = getContext().getTypeAlign(Ty);
-      if (!isDarwinPCS() && Alignment > 64)
-        AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64);
-
-      int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1;
-      AllocatedGPR += RegsNeeded;
-    }
     return (Ty->isPromotableIntegerType() && isDarwinPCS()
                 ? ABIArgInfo::getExtend()
                 : ABIArgInfo::getDirect());
@@ -3978,9 +3892,8 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
   // Structures with either a non-trivial destructor or a non-trivial
   // copy constructor are always indirect.
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
-    AllocatedGPR++;
     return ABIArgInfo::getIndirect(0, /*ByVal=*/RAA ==
-                                          CGCXXABI::RAA_DirectInMemory);
+                                   CGCXXABI::RAA_DirectInMemory);
   }
 
   // Empty records are always ignored on Darwin, but actually passed in C++ mode
@@ -3989,7 +3902,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
     if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
       return ABIArgInfo::getIgnore();
 
-    ++AllocatedGPR;
     return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
   }
 
@@ -3997,28 +3909,16 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
   const Type *Base = nullptr;
   uint64_t Members = 0;
   if (isHomogeneousAggregate(Ty, Base, Members)) {
-    IsHA = true;
-    if (!IsNamedArg && isDarwinPCS()) {
-      // With the Darwin ABI, variadic arguments are always passed on the stack
-      // and should not be expanded. Treat variadic HFAs as arrays of doubles.
-      uint64_t Size = getContext().getTypeSize(Ty);
-      llvm::Type *BaseTy = llvm::Type::getDoubleTy(getVMContext());
-      return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
-    }
-    AllocatedVFP += Members;
-    return ABIArgInfo::getExpand();
+    return ABIArgInfo::getDirect(
+        llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
   }
 
   // Aggregates <= 16 bytes are passed directly in registers or on the stack.
   uint64_t Size = getContext().getTypeSize(Ty);
   if (Size <= 128) {
     unsigned Alignment = getContext().getTypeAlign(Ty);
-    if (!isDarwinPCS() && Alignment > 64)
-      AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64);
-
     Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
-    AllocatedGPR += Size / 64;
-    IsSmallAggr = true;
+
     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
     // For aggregates with 16-byte alignment, we use i128.
     if (Alignment < 128 && Size == 128) {
@@ -4028,7 +3928,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
   }
 
-  AllocatedGPR++;
   return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
 }
 
@@ -4104,14 +4003,25 @@ bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
   return Members <= 4;
 }
 
-llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                          CodeGenFunction &CGF) const {
-  unsigned AllocatedGPR = 0, AllocatedVFP = 0;
-  bool IsHA = false, IsSmallAggr = false;
-  ABIArgInfo AI = classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR,
-                                       IsSmallAggr, false /*IsNamedArg*/);
+llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr,
+                                            QualType Ty,
+                                            CodeGenFunction &CGF) const {
+  ABIArgInfo AI = classifyArgumentType(Ty);
   bool IsIndirect = AI.isIndirect();
 
+  llvm::Type *BaseTy = CGF.ConvertType(Ty);
+  if (IsIndirect)
+    BaseTy = llvm::PointerType::getUnqual(BaseTy);
+  else if (AI.getCoerceToType())
+    BaseTy = AI.getCoerceToType();
+
+  unsigned NumRegs = 1;
+  if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) {
+    BaseTy = ArrTy->getElementType();
+    NumRegs = ArrTy->getNumElements();
+  }
+  bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy();
+
   // The AArch64 va_list type and handling is specified in the Procedure Call
   // Standard, section B.4:
   //
@@ -4131,21 +4041,19 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty
 
   llvm::Value *reg_offs_p = nullptr, *reg_offs = nullptr;
   int reg_top_index;
-  int RegSize;
-  if (AllocatedGPR) {
-    assert(!AllocatedVFP && "Arguments never split between int & VFP regs");
+  int RegSize = IsIndirect ? 8 : getContext().getTypeSize(Ty) / 8;
+  if (!IsFPR) {
     // 3 is the field number of __gr_offs
     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
     reg_top_index = 1; // field number for __gr_top
-    RegSize = 8 * AllocatedGPR;
+    RegSize = llvm::RoundUpToAlignment(RegSize, 8);
   } else {
-    assert(!AllocatedGPR && "Argument must go in VFP or int regs");
     // 4 is the field number of __vr_offs.
     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
     reg_top_index = 2; // field number for __vr_top
-    RegSize = 16 * AllocatedVFP;
+    RegSize = 16 * NumRegs;
   }
 
   //=======================================
@@ -4169,7 +4077,7 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty
   // Integer arguments may need to correct register alignment (for example a
   // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
   // align __gr_offs to calculate the potential address.
-  if (AllocatedGPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
+  if (!IsFPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
     int Align = Ctx.getTypeAlign(Ty) / 8;
 
     reg_offs = CGF.Builder.CreateAdd(
diff --git a/clang/test/CodeGen/arm-aapcs-vfp.c b/clang/test/CodeGen/arm-aapcs-vfp.c
index da1e675aba8..7ef7c4e52ed 100644
--- a/clang/test/CodeGen/arm-aapcs-vfp.c
+++ b/clang/test/CodeGen/arm-aapcs-vfp.c
@@ -29,7 +29,7 @@ struct homogeneous_struct {
   float f4;
 };
 // CHECK: define arm_aapcs_vfpcc %struct.homogeneous_struct @test_struct(%struct.homogeneous_struct %{{.*}})
-// CHECK64: define %struct.homogeneous_struct @test_struct(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+// CHECK64: define %struct.homogeneous_struct @test_struct([4 x float] %{{.*}})
 extern struct homogeneous_struct struct_callee(struct homogeneous_struct);
 struct homogeneous_struct test_struct(struct homogeneous_struct arg) {
   return struct_callee(arg);
@@ -44,7 +44,7 @@ struct nested_array {
   double d[4];
 };
 // CHECK: define arm_aapcs_vfpcc void @test_array(%struct.nested_array %{{.*}})
-// CHECK64: define void @test_array(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}})
+// CHECK64: define void @test_array([4 x double] %{{.*}})
 extern void array_callee(struct nested_array);
 void test_array(struct nested_array arg) {
   array_callee(arg);
@@ -52,7 +52,7 @@ void test_array(struct nested_array arg) {
 
 extern void complex_callee(__complex__ double);
 // CHECK: define arm_aapcs_vfpcc void @test_complex({ double, double } %{{.*}})
-// CHECK64: define void @test_complex(double %{{.*}}, double %{{.*}})
+// CHECK64: define void @test_complex([2 x double] %cd.coerce)
 void test_complex(__complex__ double cd) {
   complex_callee(cd);
 }
@@ -98,7 +98,7 @@ void test_hetero(struct heterogeneous_struct arg) {
 
 // Neon multi-vector types are homogeneous aggregates.
 // CHECK: define arm_aapcs_vfpcc <16 x i8> @f0(%struct.int8x16x4_t %{{.*}})
-// CHECK64: define <16 x i8> @f0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+// CHECK64: define <16 x i8> @f0([4 x <16 x i8>] %{{.*}})
 int8x16_t f0(int8x16x4_t v4) {
   return vaddq_s8(v4.val[0], v4.val[3]);
 }
@@ -112,7 +112,7 @@ struct neon_struct {
   int16x4_t v4;
 };
 // CHECK: define arm_aapcs_vfpcc void @test_neon(%struct.neon_struct %{{.*}})
-// CHECK64: define void @test_neon(<8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <2 x i32> %{{.*}}, <4 x i16> %{{.*}})
+// CHECK64: define void @test_neon([4 x <8 x i8>] %{{.*}})
 extern void neon_callee(struct neon_struct);
 void test_neon(struct neon_struct arg) {
   neon_callee(arg);
diff --git a/clang/test/CodeGen/arm-homogenous.c b/clang/test/CodeGen/arm-homogenous.c
index 2ab6c105a59..3426d995cae 100644
--- a/clang/test/CodeGen/arm-homogenous.c
+++ b/clang/test/CodeGen/arm-homogenous.c
@@ -5,7 +5,7 @@
 // RUN:  -ffreestanding -emit-llvm -w -o - %s | FileCheck -check-prefix=CHECK64 %s
 
 // RUN: %clang_cc1 -triple arm64-linux-gnu -ffreestanding -emit-llvm -w -o - %s \
-// RUN:   | FileCheck --check-prefix=CHECK64-AAPCS %s
+// RUN:   | FileCheck --check-prefix=CHECK64 %s
 typedef long long int64_t;
 typedef unsigned int uint32_t;
 
@@ -176,9 +176,7 @@ void test_struct_of_four_doubles(void) {
 // CHECK: test_struct_of_four_doubles
 // CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, %struct.struct_of_four_doubles {{.*}}, %struct.struct_of_four_doubles {{.*}}, double {{.*}})
 // CHECK64: test_struct_of_four_doubles
-// CHECK64: call void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [3 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
-// CHECK64-AAPCS: test_struct_of_four_doubles
-// CHECK64-AAPCS: call void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [3 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
+// CHECK64: call void @takes_struct_of_four_doubles(double {{.*}}, [4 x double] {{.*}}, [4 x double] {{.*}}, double {{.*}})
   takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0);
 }
 
@@ -212,9 +210,7 @@ void test_struct_of_vecs(void) {
 // CHECK: test_struct_of_vecs
 // CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, %struct.struct_of_vecs {{.*}}, %struct.struct_of_vecs {{.*}}, double {{.*}})
 // CHECK64: test_struct_of_vecs
-// CHECK64: call void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [3 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
-// CHECK64-AAPCS: test_struct_of_vecs
-// CHECK64-AAPCS: call void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [3 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
+// CHECK64: call void @takes_struct_of_vecs(double {{.*}}, [4 x <8 x i8>] {{.*}}, [4 x <8 x i8>] {{.*}}, double {{.*}})
   takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0);
 }
 
diff --git a/clang/test/CodeGen/arm64-aapcs-arguments.c b/clang/test/CodeGen/arm64-aapcs-arguments.c
index 38ac522de5d..ab302d40704 100644
--- a/clang/test/CodeGen/arm64-aapcs-arguments.c
+++ b/clang/test/CodeGen/arm64-aapcs-arguments.c
@@ -17,7 +17,7 @@ void test2(int x0, Small x2_x3, int x4, Small x6_x7, int sp, Small sp16) {
 // stack in order to avoid holes. Make sure we get all of them, and not just the
 // first:
 
-// CHECK: void @test3(float %s0_s3.0, float %s0_s3.1, float %s0_s3.2, float %s0_s3.3, float %s4, [3 x float], [2 x double] %sp.coerce, [2 x double] %sp16.coerce)
+// CHECK: void @test3([4 x float] %s0_s3.coerce, float %s4, [4 x float] %sp.coerce, [4 x float] %sp16.coerce)
 typedef struct { float arr[4]; } HFA;
 void test3(HFA s0_s3, float s4, HFA sp, HFA sp16) {
 }
@@ -28,7 +28,7 @@ void test3(HFA s0_s3, float s4, HFA sp, HFA sp16) {
 // fp128] or something, but leaving them as-is retains more information for
 // users to debug.
 
-//  CHECK: void @test4(<16 x i8> %v0_v2.0, <16 x i8> %v0_v2.1, <16 x i8> %v0_v2.2, <16 x i8> %v3_v5.0, <16 x i8> %v3_v5.1, <16 x i8> %v3_v5.2, [2 x float], <16 x i8> %sp.0, <16 x i8> %sp.1, <16 x i8> %sp.2, double %sp48, <16 x i8> %sp64.0, <16 x i8> %sp64.1, <16 x i8> %sp64.2)
+//  CHECK: void @test4([3 x <16 x i8>] %v0_v2.coerce, [3 x <16 x i8>] %v3_v5.coerce, [3 x <16 x i8>] %sp.coerce, double %sp48, [3 x <16 x i8>] %sp64.coerce)
 typedef __attribute__((neon_vector_type(16))) signed char int8x16_t;
 typedef struct { int8x16_t arr[3]; } BigHFA;
 void test4(BigHFA v0_v2, BigHFA v3_v5, BigHFA sp, double sp48, BigHFA sp64) {
@@ -46,6 +46,6 @@ unsigned char test5(unsigned char a, signed short b) {
 __fp16 test_half(__fp16 A) { }
 
 // __fp16 is a base type for homogeneous floating-point aggregates for AArch64 (but not 32-bit ARM).
-// CHECK: define %struct.HFA_half @test_half_hfa(half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
+// CHECK: define %struct.HFA_half @test_half_hfa([4 x half] %{{.*}})
 struct HFA_half { __fp16 a[4]; };
 struct HFA_half test_half_hfa(struct HFA_half A) { }
diff --git a/clang/test/CodeGen/arm64-arguments.c b/clang/test/CodeGen/arm64-arguments.c
index b2de08dbe68..ae1ff98800a 100644
--- a/clang/test/CodeGen/arm64-arguments.c
+++ b/clang/test/CodeGen/arm64-arguments.c
@@ -123,8 +123,7 @@ void f31(struct s31 s) { }
 
 struct s32 { double x; };
 void f32(struct s32 s) { }
-// Expand Homogeneous Aggregate.
-// CHECK: @f32(double %{{.*}})
+// CHECK: @f32([1 x double] %{{.*}})
 
 // A composite type larger than 16 bytes should be passed indirectly.
 struct s33 { char buf[32*32]; };
@@ -197,7 +196,7 @@ typedef struct s35 s35_with_align;
 
 typedef __attribute__((neon_vector_type(4))) float float32x4_t;
 float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
-// CHECK: define <4 x float> @f35(i32 %i, float %s1.0, float %s1.1, float %s1.2, float %s1.3, float %s2.0, float %s2.1, float %s2.2, float %s2.3)
+// CHECK: define <4 x float> @f35(i32 %i, [4 x float] %s1.coerce, [4 x float] %s2.coerce)
 // CHECK: %s1 = alloca %struct.s35, align 16
 // CHECK: %s2 = alloca %struct.s35, align 16
 // CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>*
@@ -598,24 +597,24 @@ int caller43_stack() {
 __attribute__ ((noinline))
 int f40_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
               s40_no_align s1, s40_no_align s2) {
-// CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
+// CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
 }
 int caller40_split() {
 // CHECK: define i32 @caller40_split()
-// CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, [2 x i64] %{{.*}} [2 x i64] %{{.*}})
+// CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [2 x i64] %{{.*}} [2 x i64] %{{.*}})
   return f40_split(1, 2, 3, 4, 5, 6, 7, g40, g40_2);
 }
 
 __attribute__ ((noinline))
 int f41_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
               s41_with_align s1, s41_with_align s2) {
-// CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], i128 %s1.coerce, i128 %s2.coerce)
+// CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i128 %s1.coerce, i128 %s2.coerce)
   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
 }
 int caller41_split() {
 // CHECK: define i32 @caller41_split()
-// CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, i128 %{{.*}}, i128 %{{.*}})
+// CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 %{{.*}}, i128 %{{.*}})
   return f41_split(1, 2, 3, 4, 5, 6, 7, g41, g41_2);
 }
 
@@ -642,7 +641,7 @@ float test_hfa(int n, ...) {
 
 float test_hfa_call(struct HFA *a) {
 // CHECK-LABEL: define float @test_hfa_call(%struct.HFA* %a)
-// CHECK: call float (i32, ...)* @test_hfa(i32 1, [2 x double] {{.*}})
+// CHECK: call float (i32, ...)* @test_hfa(i32 1, [4 x float] {{.*}})
   test_hfa(1, *a);
 }
 
diff --git a/clang/test/CodeGenCXX/homogeneous-aggregates.cpp b/clang/test/CodeGenCXX/homogeneous-aggregates.cpp
index 4800aacbfe5..77c6b3a527b 100644
--- a/clang/test/CodeGenCXX/homogeneous-aggregates.cpp
+++ b/clang/test/CodeGenCXX/homogeneous-aggregates.cpp
@@ -46,7 +46,7 @@ D1 CC func_D1(D1 x) { return x; }
 
 // PPC: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce)
 // ARM32: define arm_aapcs_vfpcc %struct.D2 @_Z7func_D22D2(%struct.D2 %x.coerce)
-// ARM64: define %struct.D2 @_Z7func_D22D2(double %x.0, double %x.1, double %x.2)
+// ARM64: define %struct.D2 @_Z7func_D22D2([3 x double] %x.coerce)
 // X64: define x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(double %x.0, double %x.1, double %x.2)
 D2 CC func_D2(D2 x) { return x; }
 
@@ -57,7 +57,7 @@ D3 CC func_D3(D3 x) { return x; }
 
 // PPC: define [4 x double] @_Z7func_D42D4([4 x double] %x.coerce)
 // ARM32: define arm_aapcs_vfpcc %struct.D4 @_Z7func_D42D4(%struct.D4 %x.coerce)
-// ARM64: define %struct.D4 @_Z7func_D42D4(double %x.0, double %x.1, double %x.2, double %x.3)
+// ARM64: define %struct.D4 @_Z7func_D42D4([4 x double] %x.coerce)
 D4 CC func_D4(D4 x) { return x; }
 
 D5 CC func_D5(D5 x) { return x; }
@@ -67,17 +67,9 @@ D5 CC func_D5(D5 x) { return x; }
 // The C++ multiple inheritance expansion case is a little more complicated, so
 // do some extra checking.
 //
-// ARM64-LABEL: define %struct.D5 @_Z7func_D52D5(double %x.0, double %x.1, double %x.2)
-// ARM64: bitcast %struct.D5* %{{.*}} to %struct.I1*
-// ARM64: bitcast %struct.I1* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: store double %x.0, double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 8
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: store double %x.1, double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 16
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: store double %x.2, double*
+// ARM64-LABEL: define %struct.D5 @_Z7func_D52D5([3 x double] %x.coerce)
+// ARM64: bitcast %struct.D5* %{{.*}} to [3 x double]*
+// ARM64: store [3 x double] %x.coerce, [3 x double]*
 
 void call_D5(D5 *p) {
   func_D5(*p);
@@ -86,21 +78,8 @@ void call_D5(D5 *p) {
 // Check the call site.
 //
 // ARM64-LABEL: define void @_Z7call_D5P2D5(%struct.D5* %p)
-// ARM64: bitcast %struct.D5* %{{.*}} to %struct.I1*
-// ARM64: bitcast %struct.I1* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: load double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 8
-// ARM64: bitcast i8* %{{.*}} to %struct.I2*
-// ARM64: bitcast %struct.I2* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: load double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 16
-// ARM64: bitcast i8* %{{.*}} to %struct.I3*
-// ARM64: bitcast %struct.I3* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: load double*
-// ARM64: call %struct.D5 @_Z7func_D52D5(double %{{.*}}, double %{{.*}}, double %{{.*}})
+// ARM64: load [3 x double]*
+// ARM64: call %struct.D5 @_Z7func_D52D5([3 x double] %{{.*}})
 
 struct Empty { };
 struct Float1 { float x; };
@@ -108,7 +87,7 @@ struct Float2 { float y; };
 struct HVAWithEmptyBase : Float1, Empty, Float2 { float z; };
 
 // PPC: define void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] %a.coerce)
-// ARM64: define void @_Z15with_empty_base16HVAWithEmptyBase(float %a.0, float %a.1, float %a.2)
+// ARM64: define void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] %a.coerce)
 // ARM32: define arm_aapcs_vfpcc void @_Z15with_empty_base16HVAWithEmptyBase(%struct.HVAWithEmptyBase %a.coerce)
 void CC with_empty_base(HVAWithEmptyBase a) {}
 
@@ -121,7 +100,7 @@ struct HVAWithEmptyBitField : Float1, Float2 {
 };
 
 // PPC: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce)
-// ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField(float %a.0, float %a.1, float %a.2)
+// ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce)
 // ARM32: define arm_aapcs_vfpcc void @_Z19with_empty_bitfield20HVAWithEmptyBitField(%struct.HVAWithEmptyBitField %a.coerce)
 // X64: define x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(float %a.0, float %a.1, float %a.2)
 void CC with_empty_bitfield(HVAWithEmptyBitField a) {}