summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/ARM')
-rw-r--r--llvm/lib/Target/ARM/ARMCallingConv.h90
-rw-r--r--llvm/lib/Target/ARM/ARMCallingConv.td3
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp141
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h8
4 files changed, 217 insertions, 25 deletions
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.h b/llvm/lib/Target/ARM/ARMCallingConv.h
index 3ac2d60e253..8e0fd893528 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -160,6 +160,96 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
State);
}
+static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12, ARM::S13, ARM::S14, ARM::S15 };
+static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
+static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
+
+// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
+// has InConsecutiveRegs set, and that the last member also has
+// InConsecutiveRegsLast set. We must process all members of the HA before
+// we can allocate it, as we need to know the total number of registers that
+// will be needed in order to (attempt to) allocate a contiguous block.
+static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
+
+ // AAPCS HFAs must have 1-4 elements, all of the same type
+ assert(PendingHAMembers.size() < 4);
+ if (PendingHAMembers.size() > 0)
+ assert(PendingHAMembers[0].getLocVT() == LocVT);
+
+ // Add the argument to the list to be allocated once we know the size of the
+ // HA
+ PendingHAMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (ArgFlags.isInConsecutiveRegsLast()) {
+ assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 &&
+ "Homogeneous aggregates must have between 1 and 4 members");
+
+ // Try to allocate a contiguous block of registers, each of the correct
+ // size to hold one member.
+ const uint16_t *RegList;
+ unsigned NumRegs;
+ switch (LocVT.SimpleTy) {
+ case MVT::f32:
+ RegList = SRegList;
+ NumRegs = 16;
+ break;
+ case MVT::f64:
+ RegList = DRegList;
+ NumRegs = 8;
+ break;
+ case MVT::v2f64:
+ RegList = QRegList;
+ NumRegs = 4;
+ break;
+ default:
+ llvm_unreachable("Unexpected member type for HA");
+ break;
+ }
+
+ unsigned RegResult =
+ State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size());
+
+ if (RegResult) {
+ for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin();
+ It != PendingHAMembers.end(); ++It) {
+ It->convertToReg(RegResult);
+ State.addLoc(*It);
+ ++RegResult;
+ }
+ PendingHAMembers.clear();
+ return true;
+ }
+
+ // Register allocation failed, fall back to the stack
+
+ // Mark all VFP regs as unavailable (AAPCS rule C.2.vfp)
+ for (unsigned regNo = 0; regNo < 16; ++regNo)
+ State.AllocateReg(SRegList[regNo]);
+
+ unsigned Size = LocVT.getSizeInBits() / 8;
+ unsigned Align = LocVT.SimpleTy == MVT::v2f64 ? 8 : Size;
+
+ for (auto It : PendingHAMembers) {
+ It.convertToMem(State.AllocateStack(Size, Align));
+ State.addLoc(It);
+ }
+
+ // All pending members have now been allocated
+ PendingHAMembers.clear();
+ }
+
+ // This will be allocated by the last member of the HA
+ return true;
+}
+
} // End llvm namespace
#endif
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td
index 7cffd824ee4..526089b01b6 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -174,6 +174,9 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ // HFAs are passed in a contiguous block of registers, or on the stack
+ CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_HA">>,
+
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 925b4693fd5..e4832783fdf 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -43,6 +43,7 @@
#include "llvm/IR/Type.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
@@ -1211,40 +1212,58 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
#include "ARMGenCallingConv.inc"
-/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
-/// given CallingConvention value.
-CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
- bool Return,
- bool isVarArg) const {
+/// getEffectiveCallingConv - Get the effective calling convention, taking into
+/// account presence of floating point hardware and calling convention
+/// limitations, such as support for variadic functions.
+CallingConv::ID
+ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
+ bool isVarArg) const {
switch (CC) {
default:
llvm_unreachable("Unsupported calling convention");
- case CallingConv::Fast:
- if (Subtarget->hasVFP2() && !isVarArg) {
- if (!Subtarget->isAAPCS_ABI())
- return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
- // For AAPCS ABI targets, just use VFP variant of the calling convention.
- return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
- }
- // Fallthrough
- case CallingConv::C: {
- // Use target triple & subtarget features to do actual dispatch.
+ case CallingConv::ARM_AAPCS:
+ case CallingConv::ARM_APCS:
+ case CallingConv::GHC:
+ return CC;
+ case CallingConv::ARM_AAPCS_VFP:
+ return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
+ case CallingConv::C:
if (!Subtarget->isAAPCS_ABI())
- return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ return CallingConv::ARM_APCS;
else if (Subtarget->hasVFP2() &&
getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
!isVarArg)
- return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
- return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ return CallingConv::ARM_AAPCS_VFP;
+ else
+ return CallingConv::ARM_AAPCS;
+ case CallingConv::Fast:
+ if (!Subtarget->isAAPCS_ABI()) {
+ if (Subtarget->hasVFP2() && !isVarArg)
+ return CallingConv::Fast;
+ return CallingConv::ARM_APCS;
+ } else if (Subtarget->hasVFP2() && !isVarArg)
+ return CallingConv::ARM_AAPCS_VFP;
+ else
+ return CallingConv::ARM_AAPCS;
}
- case CallingConv::ARM_AAPCS_VFP:
- if (!isVarArg)
- return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
- // Fallthrough
- case CallingConv::ARM_AAPCS:
- return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+}
+
+/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
+/// CallingConvention.
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg) const {
+ switch (getEffectiveCallingConv(CC, isVarArg)) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ case CallingConv::ARM_AAPCS_VFP:
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ case CallingConv::Fast:
+ return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
case CallingConv::GHC:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
}
@@ -10628,3 +10647,77 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Val, Strex->getFunctionType()->getParamType(0)),
Addr);
}
+
+enum HABaseType {
+ HA_UNKNOWN = 0,
+ HA_FLOAT,
+ HA_DOUBLE,
+ HA_VECT64,
+ HA_VECT128
+};
+
+static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
+ uint64_t &Members) {
+ if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0; i < ST->getNumElements(); ++i) {
+ uint64_t SubMembers = 0;
+ if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
+ return false;
+ Members += SubMembers;
+ }
+ } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ uint64_t SubMembers = 0;
+ if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
+ return false;
+ Members += SubMembers * AT->getNumElements();
+ } else if (Ty->isFloatTy()) {
+ if (Base != HA_UNKNOWN && Base != HA_FLOAT)
+ return false;
+ Members = 1;
+ Base = HA_FLOAT;
+ } else if (Ty->isDoubleTy()) {
+ if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
+ return false;
+ Members = 1;
+ Base = HA_DOUBLE;
+ } else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) {
+ Members = 1;
+ switch (Base) {
+ case HA_FLOAT:
+ case HA_DOUBLE:
+ return false;
+ case HA_VECT64:
+ return VT->getBitWidth() == 64;
+ case HA_VECT128:
+ return VT->getBitWidth() == 128;
+ case HA_UNKNOWN:
+ switch (VT->getBitWidth()) {
+ case 64:
+ Base = HA_VECT64;
+ return true;
+ case 128:
+ Base = HA_VECT128;
+ return true;
+ default:
+ return false;
+ }
+ }
+ }
+
+ return (Members > 0 && Members <= 4);
+}
+
+/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate.
+bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
+ if (getEffectiveCallingConv(CallConv, isVarArg) ==
+ CallingConv::ARM_AAPCS_VFP) {
+ HABaseType Base = HA_UNKNOWN;
+ uint64_t Members = 0;
+ bool result = isHomogeneousAggregate(Ty, Base, Members);
+ DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n");
+ return result;
+ } else {
+ return false;
+ }
+}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index cff7a9799a8..305d23cf58a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -384,6 +384,11 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ /// \brief Returns true if an argument of type Ty needs to be passed in a
+ /// contiguous block of registers in calling convention CallConv.
+ bool functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
+
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
@@ -424,6 +429,8 @@ namespace llvm {
SDValue &Root, SelectionDAG &DAG,
SDLoc dl) const;
+ CallingConv::ID getEffectiveCallingConv(CallingConv::ID CC,
+ bool isVarArg) const;
CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
bool isVarArg) const;
SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
@@ -577,7 +584,6 @@ namespace llvm {
OtherModImm
};
-
namespace ARM {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
OpenPOWER on IntegriCloud