diff options
-rw-r--r-- | llvm/include/llvm/CodeGen/CallingConvLower.h | 9 | ||||
-rw-r--r-- | llvm/include/llvm/IR/DataLayout.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64CallingConvention.h | 136 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64CallingConvention.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64FastISel.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMCallingConv.h | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/argument-blocks.ll | 92 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll | 2 |
10 files changed, 257 insertions, 9 deletions
diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h index 0b2ccc6ee6d..903a3954960 100644 --- a/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -345,8 +345,13 @@ public: /// AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive /// registers. If this is not possible, return zero. Otherwise, return the first /// register of the block that were allocated, marking the entire block as allocated. - unsigned AllocateRegBlock(const uint16_t *Regs, unsigned NumRegs, unsigned RegsRequired) { - for (unsigned StartIdx = 0; StartIdx <= NumRegs - RegsRequired; ++StartIdx) { + unsigned AllocateRegBlock(ArrayRef<const uint16_t> Regs, + unsigned RegsRequired) { + if (RegsRequired > Regs.size()) + return 0; + + for (unsigned StartIdx = 0; StartIdx <= Regs.size() - RegsRequired; + ++StartIdx) { bool BlockAvailable = true; // Check for already-allocated regs in this block for (unsigned BlockIdx = 0; BlockIdx < RegsRequired; ++BlockIdx) { diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 4580a4f56a0..a9e75955ce7 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -228,6 +228,8 @@ public: return (StackNaturalAlign != 0) && (Align > StackNaturalAlign); } + unsigned getStackAlignment() const { return StackNaturalAlign; } + bool hasMicrosoftFastStdCallMangling() const { return ManglingMode == MM_WINCOFF; } diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.h b/llvm/lib/Target/AArch64/AArch64CallingConvention.h new file mode 100644 index 00000000000..b2be99e5419 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.h @@ -0,0 +1,136 @@ +//=== AArch64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the custom routines for the AArch64 Calling Convention +// that aren't done by tablegen. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H +#define LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H + +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/Target/TargetInstrInfo.h" + +namespace { +using namespace llvm; + +static const uint16_t XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2, + AArch64::X3, AArch64::X4, AArch64::X5, + AArch64::X6, AArch64::X7}; +static const uint16_t SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2, + AArch64::S3, AArch64::S4, AArch64::S5, + AArch64::S6, AArch64::S7}; +static const uint16_t DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2, + AArch64::D3, AArch64::D4, AArch64::D5, + AArch64::D6, AArch64::D7}; +static const uint16_t QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2, + AArch64::Q3, AArch64::Q4, AArch64::Q5, + AArch64::Q6, AArch64::Q7}; + +static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers, + MVT LocVT, ISD::ArgFlagsTy &ArgFlags, + CCState &State, unsigned SlotAlign) { + unsigned Size = LocVT.getSizeInBits() / 8; + unsigned StackAlign = State.getMachineFunction() + .getSubtarget() + .getDataLayout() + ->getStackAlignment(); + unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign); + + for (auto &It : PendingMembers) { + It.convertToMem(State.AllocateStack(Size, std::max(Align, SlotAlign))); + State.addLoc(It); + SlotAlign = 1; + } + + // All pending members have now been allocated + PendingMembers.clear(); + return true; +} + +/// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An +/// [N x Ty] type must still be contiguous in memory though. +static bool CC_AArch64_Custom_Stack_Block( + unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); + + // Add the argument to the list to be allocated once we know the size of the + // block. + PendingMembers.push_back( + CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); + + if (!ArgFlags.isInConsecutiveRegsLast()) + return true; + + return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, 8); +} + +/// Given an [N x Ty] block, it should be passed in a consecutive sequence of +/// registers. If no such sequence is available, mark the rest of the registers +/// of that type as used and place the argument on the stack. +static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + // Try to allocate a contiguous block of registers, each of the correct + // size to hold one member. + ArrayRef<const uint16_t> RegList; + if (LocVT.SimpleTy == MVT::i64) + RegList = XRegList; + else if (LocVT.SimpleTy == MVT::f32) + RegList = SRegList; + else if (LocVT.SimpleTy == MVT::f64) + RegList = DRegList; + else if (LocVT.SimpleTy == MVT::v2f64) + RegList = QRegList; + else { + // Not an array we want to split up after all. + return false; + } + + SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); + + // Add the argument to the list to be allocated once we know the size of the + // block. + PendingMembers.push_back( + CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); + + if (!ArgFlags.isInConsecutiveRegsLast()) + return true; + + unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size()); + if (RegResult) { + for (auto &It : PendingMembers) { + It.convertToReg(RegResult); + State.addLoc(It); + ++RegResult; + } + PendingMembers.clear(); + return true; + } + + // Mark all regs in the class as unavailable + for (auto Reg : RegList) + State.AllocateReg(Reg); + + const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>( + State.getMachineFunction().getSubtarget()); + unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8; + + return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign); +} + +} + +#endif diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 9e707e4083c..1a8040275ca 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -40,6 +40,8 @@ def CC_AArch64_AAPCS : CallingConv<[ // slot is 64-bit. CCIfByVal<CCPassByVal<8, 8>>, + CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>, + // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, @@ -119,6 +121,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[ // slot is 64-bit. CCIfByVal<CCPassByVal<8, 8>>, + CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>, + // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, @@ -159,6 +163,8 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ CCIfType<[v2f32], CCBitConvertToType<v2i32>>, CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>, + CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Stack_Block">>, + // Handle all scalar types as either i64 or f64. CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, CCIfType<[f16, f32], CCPromoteToType<f64>>, diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index fb0326bf0bf..d942ace427b 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "AArch64.h" +#include "AArch64CallingConvention.h" #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 70cf7599a39..6da468ed6b1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "AArch64ISelLowering.h" +#include "AArch64CallingConvention.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64PerfectShuffle.h" #include "AArch64Subtarget.h" @@ -8842,3 +8843,8 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, Val, Stxr->getFunctionType()->getParamType(0)), Addr); } + +bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { + return Ty->isArrayTy(); +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index c76f6a865dc..458489bd6c6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -473,6 +473,10 @@ private: void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const override; + + bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, + CallingConv::ID CallConv, + bool isVarArg) const; }; namespace AArch64 { diff --git a/llvm/lib/Target/ARM/ARMCallingConv.h b/llvm/lib/Target/ARM/ARMCallingConv.h index bd07236d0ea..4567a2a1afd 100644 --- a/llvm/lib/Target/ARM/ARMCallingConv.h +++ b/llvm/lib/Target/ARM/ARMCallingConv.h @@ -194,20 +194,16 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, // Try to allocate a contiguous block of registers, each of the correct // size to hold one member. - const uint16_t *RegList; - unsigned NumRegs; + ArrayRef<const uint16_t> RegList; switch (LocVT.SimpleTy) { case MVT::f32: RegList = SRegList; - NumRegs = 16; break; case MVT::f64: RegList = DRegList; - NumRegs = 8; break; case MVT::v2f64: RegList = QRegList; - NumRegs = 4; break; default: llvm_unreachable("Unexpected member type for HA"); @@ -215,7 +211,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, } unsigned RegResult = - State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size()); + State.AllocateRegBlock(RegList, PendingHAMembers.size()); if (RegResult) { for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin(); diff --git a/llvm/test/CodeGen/AArch64/argument-blocks.ll b/llvm/test/CodeGen/AArch64/argument-blocks.ll new file mode 100644 index 00000000000..cc65541a644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/argument-blocks.ll @@ -0,0 +1,92 @@ +; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DARWINPCS +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AAPCS + +declare void @callee(...) + +define float @test_hfa_regs(float, [2 x float] %in) { +; CHECK-LABEL: test_hfa_regs: +; CHECK: fadd s0, s1, s2 + + %lhs = extractvalue [2 x float] %in, 0 + %rhs = extractvalue [2 x float] %in, 1 + %sum = fadd float %lhs, %rhs + ret float %sum +} + +; Check that the array gets allocated to a contiguous block on the stack (rather +; than the default of 2 8-byte slots). +define float @test_hfa_block([7 x float], [2 x float] %in) { +; CHECK-LABEL: test_hfa_block: +; CHECK: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp] +; CHECK: fadd s0, [[LHS]], [[RHS]] + + %lhs = extractvalue [2 x float] %in, 0 + %rhs = extractvalue [2 x float] %in, 1 + %sum = fadd float %lhs, %rhs + ret float %sum +} + +; Check that an HFA prevents backfilling of VFP registers (i.e. %rhs must go on +; the stack rather than in s7). +define float @test_hfa_block_consume([7 x float], [2 x float] %in, float %rhs) { +; CHECK-LABEL: test_hfa_block_consume: +; CHECK-DAG: ldr [[LHS:s[0-9]+]], [sp] +; CHECK-DAG: ldr [[RHS:s[0-9]+]], [sp, #8] +; CHECK: fadd s0, [[LHS]], [[RHS]] + + %lhs = extractvalue [2 x float] %in, 0 + %sum = fadd float %lhs, %rhs + ret float %sum +} + +define float @test_hfa_stackalign([8 x float], [1 x float], [2 x float] %in) { +; CHECK-LABEL: test_hfa_stackalign: +; CHECK-AAPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #8] +; CHECK-DARWINPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #4] +; CHECK: fadd s0, [[LHS]], [[RHS]] + %lhs = extractvalue [2 x float] %in, 0 + %rhs = extractvalue [2 x float] %in, 1 + %sum = fadd float %lhs, %rhs + ret float %sum +} + +; An HFA that ends up on the stack should not have any effect on where +; integer-based arguments go. +define i64 @test_hfa_ignores_gprs([7 x float], [2 x float] %in, i64, i64 %res) { +; CHECK-LABEL: test_hfa_ignores_gprs: +; CHECK: mov x0, x1 + ret i64 %res +} + +; [2 x float] should not be promoted to double by the Darwin varargs handling, +; but should go in an 8-byte aligned slot. +define void @test_varargs_stackalign() { +; CHECK-LABEL: test_varargs_stackalign: +; CHECK-DARWINPCS: stp {{w[0-9]+}}, {{w[0-9]+}}, [sp, #16] + + call void(...)* @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0]) + ret void +} + +define i64 @test_smallstruct_block([7 x i64], [2 x i64] %in) { +; CHECK-LABEL: test_smallstruct_block: +; CHECK: ldp [[LHS:x[0-9]+]], [[RHS:x[0-9]+]], [sp] +; CHECK: add x0, [[LHS]], [[RHS]] + %lhs = extractvalue [2 x i64] %in, 0 + %rhs = extractvalue [2 x i64] %in, 1 + %sum = add i64 %lhs, %rhs + ret i64 %sum +} + +; Check that a small struct prevents backfilling of registers (i.e. %rhs +; must go on the stack rather than in x7). +define i64 @test_smallstruct_block_consume([7 x i64], [2 x i64] %in, i64 %rhs) { +; CHECK-LABEL: test_smallstruct_block_consume: +; CHECK-DAG: ldr [[LHS:x[0-9]+]], [sp] +; CHECK-DAG: ldr [[RHS:x[0-9]+]], [sp, #16] +; CHECK: add x0, [[LHS]], [[RHS]] + + %lhs = extractvalue [2 x i64] %in, 0 + %sum = add i64 %lhs, %rhs + ret i64 %sum +} diff --git a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll index 36a7bfd9252..8e41304ffad 100644 --- a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -96,7 +96,7 @@ define void @test_nospare([8 x i64], [8 x float], ...) { ; If there are non-variadic arguments on the stack (here two i64s) then the ; __stack field should point just past them. -define void @test_offsetstack([10 x i64], [3 x float], ...) { +define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) { ; CHECK-LABEL: test_offsetstack: ; CHECK: sub sp, sp, #80 ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96 |