summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorTim Northover <tnorthover@apple.com>2014-05-27 10:43:38 +0000
committerTim Northover <tnorthover@apple.com>2014-05-27 10:43:38 +0000
commit4f1909f1da178001187b1bbbee237f73945b9477 (patch)
tree77464822558827c941dd36a6eb746a2e05bc7111 /llvm/lib
parent909b5c94c072516d980013073be9ca2cd8993f67 (diff)
downloadbcm5719-llvm-4f1909f1da178001187b1bbbee237f73945b9477.tar.gz
bcm5719-llvm-4f1909f1da178001187b1bbbee237f73945b9477.zip
ARM: teach AAPCS-VFP to deal with Cortex-M4.
Cortex-M4 only has single-precision floating point support, so any LLVM "double" type will have been split into 2 i32s by now. Fortunately, the consecutive-register framework turns out to be precisely what's needed to reconstruct the double and follow AAPCS-VFP correctly! rdar://problem/17012966 llvm-svn: 209650
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp19
-rw-r--r--llvm/lib/Target/ARM/ARMCallingConv.h17
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp17
3 files changed, 32 insertions, 21 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index c181046ba23..070e929fce7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7176,11 +7176,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
if (Args[i].isNest)
Flags.setNest();
- if (NeedsRegBlock) {
+ if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
- if (Value == NumValues - 1)
- Flags.setInConsecutiveRegsLast();
- }
Flags.setOrigAlign(OriginalAlignment);
MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
@@ -7226,6 +7223,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
else if (j != 0)
MyFlags.Flags.setOrigAlign(1);
+ // Only mark the end at the last register of the last value.
+ if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1)
+ MyFlags.Flags.setInConsecutiveRegsLast();
+
CLI.Outs.push_back(MyFlags);
CLI.OutVals.push_back(Parts[j]);
}
@@ -7412,11 +7413,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
}
if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
Flags.setNest();
- if (NeedsRegBlock) {
+ if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
- if (Value == NumValues - 1)
- Flags.setInConsecutiveRegsLast();
- }
Flags.setOrigAlign(OriginalAlignment);
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
@@ -7429,6 +7427,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// if it isn't first piece, alignment must be 1
else if (i > 0)
MyFlags.Flags.setOrigAlign(1);
+
+ // Only mark the end at the last register of the last value.
+ if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1)
+ MyFlags.Flags.setInConsecutiveRegsLast();
+
Ins.push_back(MyFlags);
}
PartBase += VT.getStoreSize();
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.h b/llvm/lib/Target/ARM/ARMCallingConv.h
index 8e0fd893528..dc41c1c14bb 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -177,9 +177,8 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
-
// AAPCS HFAs must have 1-4 elements, all of the same type
- assert(PendingHAMembers.size() < 4);
+ assert(PendingHAMembers.size() < 8);
if (PendingHAMembers.size() > 0)
assert(PendingHAMembers[0].getLocVT() == LocVT);
@@ -189,7 +188,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
if (ArgFlags.isInConsecutiveRegsLast()) {
- assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 &&
+ assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 &&
"Homogeneous aggregates must have between 1 and 4 members");
// Try to allocate a contiguous block of registers, each of the correct
@@ -197,6 +196,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
const uint16_t *RegList;
unsigned NumRegs;
switch (LocVT.SimpleTy) {
+ case MVT::i32:
case MVT::f32:
RegList = SRegList;
NumRegs = 16;
@@ -235,11 +235,20 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
State.AllocateReg(SRegList[regNo]);
unsigned Size = LocVT.getSizeInBits() / 8;
- unsigned Align = LocVT.SimpleTy == MVT::v2f64 ? 8 : Size;
+ unsigned Align = Size;
+
+ if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) {
+ // Vectors are always aligned to 8 bytes. If we've seen an i32 here
+ // it's because it's been split from a larger type, also with align 8.
+ Align = 8;
+ }
for (auto It : PendingHAMembers) {
It.convertToMem(State.AllocateStack(Size, Align));
State.addLoc(It);
+
+ // Only the first member needs to be aligned.
+ Align = 1;
}
// All pending members have now been allocated
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 5beb752d3a4..00d07e84067 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -10778,14 +10778,13 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate.
bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
- if (getEffectiveCallingConv(CallConv, isVarArg) ==
- CallingConv::ARM_AAPCS_VFP) {
- HABaseType Base = HA_UNKNOWN;
- uint64_t Members = 0;
- bool result = isHomogeneousAggregate(Ty, Base, Members);
- DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n");
- return result;
- } else {
+ if (getEffectiveCallingConv(CallConv, isVarArg) !=
+ CallingConv::ARM_AAPCS_VFP)
return false;
- }
+
+ HABaseType Base = HA_UNKNOWN;
+ uint64_t Members = 0;
+ bool result = isHomogeneousAggregate(Ty, Base, Members);
+ DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n");
+ return result;
}
OpenPOWER on IntegriCloud