summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/ARM/ARMCallingConv.h11
-rw-r--r--llvm/test/CodeGen/ARM/aggregate-padding.ll16
2 files changed, 22 insertions, 5 deletions
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.h b/llvm/lib/Target/ARM/ARMCallingConv.h
index 63bf48abb7a..543165de38d 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -269,14 +269,15 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
for (auto Reg : RegList)
State.AllocateReg(Reg);
+ // After the first item has been allocated, the rest are packed as tightly as
+ // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
+ // be allocating a bunch of i32 slots).
+ unsigned RestAlign = std::min(Align, Size);
+
for (auto &It : PendingMembers) {
It.convertToMem(State.AllocateStack(Size, Align));
State.addLoc(It);
-
- // After the first item has been allocated, the rest are packed as tightly
- // as possible. (E.g. an incoming i64 would have starting Align of 8, but
- // we'll be allocating a bunch of i32 slots).
- Align = Size;
+ Align = RestAlign;
}
// All pending members have now been allocated
diff --git a/llvm/test/CodeGen/ARM/aggregate-padding.ll b/llvm/test/CodeGen/ARM/aggregate-padding.ll
index bc46a9cdf91..ae7ab90fcd2 100644
--- a/llvm/test/CodeGen/ARM/aggregate-padding.ll
+++ b/llvm/test/CodeGen/ARM/aggregate-padding.ll
@@ -99,3 +99,19 @@ define i16 @test_i16_forced_stack([8 x double], double, i32, i32, [3 x i16] %arg
%sum = add i16 %val0, %val2
ret i16 %sum
}
+
+; [2 x <4 x i32>] should be aligned only on a 64-bit boundary and contiguous.
+; None of the two <4 x i32> elements should introduce any padding to 128 bits.
+define i32 @test_4xi32_64bit_aligned_and_contiguous([8 x double], float, [2 x <4 x i32>] %arg) nounwind {
+; CHECK-LABEL: test_4xi32_64bit_aligned_and_contiguous:
+; CHECK-DAG: ldr [[VAL0_0:r[0-9]+]], [sp, #8]
+; CHECK-DAG: ldr [[VAL1_0:r[0-9]+]], [sp, #24]
+; CHECK: add r0, [[VAL0_0]], [[VAL1_0]]
+
+ %val0 = extractvalue [2 x <4 x i32>] %arg, 0
+ %val0_0 = extractelement <4 x i32> %val0, i32 0
+ %val1 = extractvalue [2 x <4 x i32>] %arg, 1
+ %val1_0 = extractelement <4 x i32> %val1, i32 0
+ %sum = add i32 %val0_0, %val1_0
+ ret i32 %sum
+}
OpenPOWER on IntegriCloud