diff options
| -rw-r--r-- | llvm/lib/Target/ARM/ARMCallingConv.h | 11 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/aggregate-padding.ll | 16 | 
2 files changed, 22 insertions, 5 deletions
| diff --git a/llvm/lib/Target/ARM/ARMCallingConv.h b/llvm/lib/Target/ARM/ARMCallingConv.h index 63bf48abb7a..543165de38d 100644 --- a/llvm/lib/Target/ARM/ARMCallingConv.h +++ b/llvm/lib/Target/ARM/ARMCallingConv.h @@ -269,14 +269,15 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,    for (auto Reg : RegList)      State.AllocateReg(Reg); +  // After the first item has been allocated, the rest are packed as tightly as +  // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll +  // be allocating a bunch of i32 slots). +  unsigned RestAlign = std::min(Align, Size); +    for (auto &It : PendingMembers) {      It.convertToMem(State.AllocateStack(Size, Align));      State.addLoc(It); - -    // After the first item has been allocated, the rest are packed as tightly -    // as possible. (E.g. an incoming i64 would have starting Align of 8, but -    // we'll be allocating a bunch of i32 slots). -    Align = Size; +    Align = RestAlign;    }    // All pending members have now been allocated diff --git a/llvm/test/CodeGen/ARM/aggregate-padding.ll b/llvm/test/CodeGen/ARM/aggregate-padding.ll index bc46a9cdf91..ae7ab90fcd2 100644 --- a/llvm/test/CodeGen/ARM/aggregate-padding.ll +++ b/llvm/test/CodeGen/ARM/aggregate-padding.ll @@ -99,3 +99,19 @@ define i16 @test_i16_forced_stack([8 x double], double, i32, i32, [3 x i16] %arg    %sum = add i16 %val0, %val2    ret i16 %sum  } + +; [2 x <4 x i32>] should be aligned only on a 64-bit boundary and contiguous. +; None of the two <4 x i32> elements should introduce any padding to 128 bits. +define i32 @test_4xi32_64bit_aligned_and_contiguous([8 x double], float, [2 x <4 x i32>] %arg) nounwind { +; CHECK-LABEL: test_4xi32_64bit_aligned_and_contiguous: +; CHECK-DAG: ldr [[VAL0_0:r[0-9]+]], [sp, #8] +; CHECK-DAG: ldr [[VAL1_0:r[0-9]+]], [sp, #24] +; CHECK: add r0, [[VAL0_0]], [[VAL1_0]] + +  %val0 = extractvalue [2 x <4 x i32>] %arg, 0 +  %val0_0 = extractelement <4 x i32> %val0, i32 0 +  %val1 = extractvalue [2 x <4 x i32>] %arg, 1 +  %val1_0 = extractelement <4 x i32> %val1, i32 0 +  %sum = add i32 %val0_0, %val1_0 +  ret i32 %sum +} | 

