summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/CallingConvLower.h9
-rw-r--r--llvm/include/llvm/IR/DataLayout.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.h136
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.td6
-rw-r--r--llvm/lib/Target/AArch64/AArch64FastISel.cpp1
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp6
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h4
-rw-r--r--llvm/lib/Target/ARM/ARMCallingConv.h8
-rw-r--r--llvm/test/CodeGen/AArch64/argument-blocks.ll92
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll2
10 files changed, 257 insertions, 9 deletions
diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h
index 0b2ccc6ee6d..903a3954960 100644
--- a/llvm/include/llvm/CodeGen/CallingConvLower.h
+++ b/llvm/include/llvm/CodeGen/CallingConvLower.h
@@ -345,8 +345,13 @@ public:
/// AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive
/// registers. If this is not possible, return zero. Otherwise, return the first
/// register of the block that were allocated, marking the entire block as allocated.
- unsigned AllocateRegBlock(const uint16_t *Regs, unsigned NumRegs, unsigned RegsRequired) {
- for (unsigned StartIdx = 0; StartIdx <= NumRegs - RegsRequired; ++StartIdx) {
+ unsigned AllocateRegBlock(ArrayRef<const uint16_t> Regs,
+ unsigned RegsRequired) {
+ if (RegsRequired > Regs.size())
+ return 0;
+
+ for (unsigned StartIdx = 0; StartIdx <= Regs.size() - RegsRequired;
+ ++StartIdx) {
bool BlockAvailable = true;
// Check for already-allocated regs in this block
for (unsigned BlockIdx = 0; BlockIdx < RegsRequired; ++BlockIdx) {
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 4580a4f56a0..a9e75955ce7 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -228,6 +228,8 @@ public:
return (StackNaturalAlign != 0) && (Align > StackNaturalAlign);
}
+ unsigned getStackAlignment() const { return StackNaturalAlign; }
+
bool hasMicrosoftFastStdCallMangling() const {
return ManglingMode == MM_WINCOFF;
}
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.h b/llvm/lib/Target/AArch64/AArch64CallingConvention.h
new file mode 100644
index 00000000000..b2be99e5419
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.h
@@ -0,0 +1,136 @@
+//=== AArch64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the AArch64 Calling Convention
+// that aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace {
+using namespace llvm;
+
+static const uint16_t XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
+ AArch64::X3, AArch64::X4, AArch64::X5,
+ AArch64::X6, AArch64::X7};
+static const uint16_t SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
+ AArch64::S3, AArch64::S4, AArch64::S5,
+ AArch64::S6, AArch64::S7};
+static const uint16_t DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
+ AArch64::D3, AArch64::D4, AArch64::D5,
+ AArch64::D6, AArch64::D7};
+static const uint16_t QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
+ AArch64::Q3, AArch64::Q4, AArch64::Q5,
+ AArch64::Q6, AArch64::Q7};
+
+static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
+ MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
+ CCState &State, unsigned SlotAlign) {
+ unsigned Size = LocVT.getSizeInBits() / 8;
+ unsigned StackAlign = State.getMachineFunction()
+ .getSubtarget()
+ .getDataLayout()
+ ->getStackAlignment();
+ unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
+
+ for (auto &It : PendingMembers) {
+ It.convertToMem(State.AllocateStack(Size, std::max(Align, SlotAlign)));
+ State.addLoc(It);
+ SlotAlign = 1;
+ }
+
+ // All pending members have now been allocated
+ PendingMembers.clear();
+ return true;
+}
+
+/// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
+/// [N x Ty] type must still be contiguous in memory though.
+static bool CC_AArch64_Custom_Stack_Block(
+ unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // Add the argument to the list to be allocated once we know the size of the
+ // block.
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, 8);
+}
+
+/// Given an [N x Ty] block, it should be passed in a consecutive sequence of
+/// registers. If no such sequence is available, mark the rest of the registers
+/// of that type as used and place the argument on the stack.
+static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ // Try to allocate a contiguous block of registers, each of the correct
+ // size to hold one member.
+ ArrayRef<const uint16_t> RegList;
+ if (LocVT.SimpleTy == MVT::i64)
+ RegList = XRegList;
+ else if (LocVT.SimpleTy == MVT::f32)
+ RegList = SRegList;
+ else if (LocVT.SimpleTy == MVT::f64)
+ RegList = DRegList;
+ else if (LocVT.SimpleTy == MVT::v2f64)
+ RegList = QRegList;
+ else {
+ // Not an array we want to split up after all.
+ return false;
+ }
+
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // Add the argument to the list to be allocated once we know the size of the
+ // block.
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
+ if (RegResult) {
+ for (auto &It : PendingMembers) {
+ It.convertToReg(RegResult);
+ State.addLoc(It);
+ ++RegResult;
+ }
+ PendingMembers.clear();
+ return true;
+ }
+
+ // Mark all regs in the class as unavailable
+ for (auto Reg : RegList)
+ State.AllocateReg(Reg);
+
+ const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
+ State.getMachineFunction().getSubtarget());
+ unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8;
+
+ return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
+}
+
+}
+
+#endif
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 9e707e4083c..1a8040275ca 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -40,6 +40,8 @@ def CC_AArch64_AAPCS : CallingConv<[
// slot is 64-bit.
CCIfByVal<CCPassByVal<8, 8>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -119,6 +121,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[
// slot is 64-bit.
CCIfByVal<CCPassByVal<8, 8>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -159,6 +163,8 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Stack_Block">>,
+
// Handle all scalar types as either i64 or f64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
CCIfType<[f16, f32], CCPromoteToType<f64>>,
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index fb0326bf0bf..d942ace427b 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
+#include "AArch64CallingConvention.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 70cf7599a39..6da468ed6b1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64ISelLowering.h"
+#include "AArch64CallingConvention.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64Subtarget.h"
@@ -8842,3 +8843,8 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
Val, Stxr->getFunctionType()->getParamType(0)),
Addr);
}
+
+bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
+ return Ty->isArrayTy();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index c76f6a865dc..458489bd6c6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -473,6 +473,10 @@ private:
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
+
+ bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
+ CallingConv::ID CallConv,
+ bool isVarArg) const;
};
namespace AArch64 {
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.h b/llvm/lib/Target/ARM/ARMCallingConv.h
index bd07236d0ea..4567a2a1afd 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -194,20 +194,16 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
// Try to allocate a contiguous block of registers, each of the correct
// size to hold one member.
- const uint16_t *RegList;
- unsigned NumRegs;
+ ArrayRef<const uint16_t> RegList;
switch (LocVT.SimpleTy) {
case MVT::f32:
RegList = SRegList;
- NumRegs = 16;
break;
case MVT::f64:
RegList = DRegList;
- NumRegs = 8;
break;
case MVT::v2f64:
RegList = QRegList;
- NumRegs = 4;
break;
default:
llvm_unreachable("Unexpected member type for HA");
@@ -215,7 +211,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
}
unsigned RegResult =
- State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size());
+ State.AllocateRegBlock(RegList, PendingHAMembers.size());
if (RegResult) {
for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin();
diff --git a/llvm/test/CodeGen/AArch64/argument-blocks.ll b/llvm/test/CodeGen/AArch64/argument-blocks.ll
new file mode 100644
index 00000000000..cc65541a644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/argument-blocks.ll
@@ -0,0 +1,92 @@
+; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DARWINPCS
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AAPCS
+
+declare void @callee(...)
+
+define float @test_hfa_regs(float, [2 x float] %in) {
+; CHECK-LABEL: test_hfa_regs:
+; CHECK: fadd s0, s1, s2
+
+ %lhs = extractvalue [2 x float] %in, 0
+ %rhs = extractvalue [2 x float] %in, 1
+ %sum = fadd float %lhs, %rhs
+ ret float %sum
+}
+
+; Check that the array gets allocated to a contiguous block on the stack (rather
+; than the default of 2 8-byte slots).
+define float @test_hfa_block([7 x float], [2 x float] %in) {
+; CHECK-LABEL: test_hfa_block:
+; CHECK: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp]
+; CHECK: fadd s0, [[LHS]], [[RHS]]
+
+ %lhs = extractvalue [2 x float] %in, 0
+ %rhs = extractvalue [2 x float] %in, 1
+ %sum = fadd float %lhs, %rhs
+ ret float %sum
+}
+
+; Check that an HFA prevents backfilling of VFP registers (i.e. %rhs must go on
+; the stack rather than in s7).
+define float @test_hfa_block_consume([7 x float], [2 x float] %in, float %rhs) {
+; CHECK-LABEL: test_hfa_block_consume:
+; CHECK-DAG: ldr [[LHS:s[0-9]+]], [sp]
+; CHECK-DAG: ldr [[RHS:s[0-9]+]], [sp, #8]
+; CHECK: fadd s0, [[LHS]], [[RHS]]
+
+ %lhs = extractvalue [2 x float] %in, 0
+ %sum = fadd float %lhs, %rhs
+ ret float %sum
+}
+
+define float @test_hfa_stackalign([8 x float], [1 x float], [2 x float] %in) {
+; CHECK-LABEL: test_hfa_stackalign:
+; CHECK-AAPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #8]
+; CHECK-DARWINPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #4]
+; CHECK: fadd s0, [[LHS]], [[RHS]]
+ %lhs = extractvalue [2 x float] %in, 0
+ %rhs = extractvalue [2 x float] %in, 1
+ %sum = fadd float %lhs, %rhs
+ ret float %sum
+}
+
+; An HFA that ends up on the stack should not have any effect on where
+; integer-based arguments go.
+define i64 @test_hfa_ignores_gprs([7 x float], [2 x float] %in, i64, i64 %res) {
+; CHECK-LABEL: test_hfa_ignores_gprs:
+; CHECK: mov x0, x1
+ ret i64 %res
+}
+
+; [2 x float] should not be promoted to double by the Darwin varargs handling,
+; but should go in an 8-byte aligned slot.
+define void @test_varargs_stackalign() {
+; CHECK-LABEL: test_varargs_stackalign:
+; CHECK-DARWINPCS: stp {{w[0-9]+}}, {{w[0-9]+}}, [sp, #16]
+
+ call void(...)* @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0])
+ ret void
+}
+
+define i64 @test_smallstruct_block([7 x i64], [2 x i64] %in) {
+; CHECK-LABEL: test_smallstruct_block:
+; CHECK: ldp [[LHS:x[0-9]+]], [[RHS:x[0-9]+]], [sp]
+; CHECK: add x0, [[LHS]], [[RHS]]
+ %lhs = extractvalue [2 x i64] %in, 0
+ %rhs = extractvalue [2 x i64] %in, 1
+ %sum = add i64 %lhs, %rhs
+ ret i64 %sum
+}
+
+; Check that a small struct prevents backfilling of registers (i.e. %rhs
+; must go on the stack rather than in x7).
+define i64 @test_smallstruct_block_consume([7 x i64], [2 x i64] %in, i64 %rhs) {
+; CHECK-LABEL: test_smallstruct_block_consume:
+; CHECK-DAG: ldr [[LHS:x[0-9]+]], [sp]
+; CHECK-DAG: ldr [[RHS:x[0-9]+]], [sp, #16]
+; CHECK: add x0, [[LHS]], [[RHS]]
+
+ %lhs = extractvalue [2 x i64] %in, 0
+ %sum = add i64 %lhs, %rhs
+ ret i64 %sum
+}
diff --git a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
index 36a7bfd9252..8e41304ffad 100644
--- a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
@@ -96,7 +96,7 @@ define void @test_nospare([8 x i64], [8 x float], ...) {
; If there are non-variadic arguments on the stack (here two i64s) then the
; __stack field should point just past them.
-define void @test_offsetstack([10 x i64], [3 x float], ...) {
+define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) {
; CHECK-LABEL: test_offsetstack:
; CHECK: sub sp, sp, #80
; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96
OpenPOWER on IntegriCloud