summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSander de Smalen <sander.desmalen@arm.com>2018-09-12 12:10:22 +0000
committerSander de Smalen <sander.desmalen@arm.com>2018-09-12 12:10:22 +0000
commit2d77e788f2fd571e2a525acb88f90c1eef94ff12 (patch)
tree83217a336ce2c0449dea5adf19164b36fce661b3 /llvm/lib/Target
parent3a8781cf6cab2413261dff6971046f4ef4a63cc4 (diff)
downloadbcm5719-llvm-2d77e788f2fd571e2a525acb88f90c1eef94ff12.tar.gz
bcm5719-llvm-2d77e788f2fd571e2a525acb88f90c1eef94ff12.zip
[AArch64] Implement aarch64_vector_pcs codegen support.
This patch adds codegen support for the saving/restoring V8-V23 for functions specified with the aarch64_vector_pcs calling convention attribute, as added in patch D51477. Reviewers: t.p.northover, gberry, thegameg, rengolin, javed.absar, MatzeB Reviewed By: thegameg Differential Revision: https://reviews.llvm.org/D51479 llvm-svn: 342049
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.td8
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp119
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp6
3 files changed, 92 insertions, 41 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 30492003df1..91fe3f237af 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -288,6 +288,12 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
D8, D9, D10, D11,
D12, D13, D14, D15)>;
+// AArch64 PCS for vector functions (VPCS)
+// must (additionally) preserve full Q8-Q23 registers
+def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
+ X23, X24, X25, X26, X27, X28,
+ (sequence "Q%u", 8, 23))>;
+
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
// 'this' and the pointer return value are both passed in X0 in these cases,
// this can be partially modelled by treating X0 as a callee-saved register;
@@ -362,5 +368,7 @@ def CSR_AArch64_AAPCS_SwiftError_SCS
: CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>;
def CSR_AArch64_RT_MostRegs_SCS
: CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
+def CSR_AArch64_AAVPCS_SCS
+ : CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>;
def CSR_AArch64_AAPCS_SCS
: CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>;
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 56e659056c4..40efcbe5278 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -461,12 +461,19 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
NewOpc = AArch64::STPDpre;
Scale = 8;
break;
+ case AArch64::STPQi:
+ NewOpc = AArch64::STPQpre;
+ Scale = 16;
+ break;
case AArch64::STRXui:
NewOpc = AArch64::STRXpre;
break;
case AArch64::STRDui:
NewOpc = AArch64::STRDpre;
break;
+ case AArch64::STRQui:
+ NewOpc = AArch64::STRQpre;
+ break;
case AArch64::LDPXi:
NewOpc = AArch64::LDPXpost;
Scale = 8;
@@ -475,12 +482,19 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
NewOpc = AArch64::LDPDpost;
Scale = 8;
break;
+ case AArch64::LDPQi:
+ NewOpc = AArch64::LDPQpost;
+ Scale = 16;
+ break;
case AArch64::LDRXui:
NewOpc = AArch64::LDRXpost;
break;
case AArch64::LDRDui:
NewOpc = AArch64::LDRDpost;
break;
+ case AArch64::LDRQui:
+ NewOpc = AArch64::LDRQpost;
+ break;
}
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
@@ -531,6 +545,12 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
case AArch64::LDRDui:
Scale = 8;
break;
+ case AArch64::STPQi:
+ case AArch64::STRQui:
+ case AArch64::LDPQi:
+ case AArch64::LDRQui:
+ Scale = 16;
+ break;
default:
llvm_unreachable("Unexpected callee-save save/restore opcode!");
}
@@ -541,7 +561,7 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
// Last operand is immediate offset that needs fixing.
MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
// All generated opcodes have scaled offsets.
- assert(LocalStackSize % 8 == 0);
+ assert(LocalStackSize % Scale == 0);
OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
}
@@ -1208,7 +1228,7 @@ struct RegPairInfo {
unsigned Reg2 = AArch64::NoRegister;
int FrameIdx;
int Offset;
- enum RegType { GPR, FPR64 } Type;
+ enum RegType { GPR, FPR64, FPR128 } Type;
RegPairInfo() = default;
@@ -1246,6 +1266,8 @@ static void computeCalleeSaveRegisterPairs(
RPI.Type = RegPairInfo::GPR;
else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
RPI.Type = RegPairInfo::FPR64;
+ else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::FPR128;
else
llvm_unreachable("Unsupported register class.");
@@ -1261,6 +1283,10 @@ static void computeCalleeSaveRegisterPairs(
if (AArch64::FPR64RegClass.contains(NextReg))
RPI.Reg2 = NextReg;
break;
+ case RegPairInfo::FPR128:
+ if (AArch64::FPR128RegClass.contains(NextReg))
+ RPI.Reg2 = NextReg;
+ break;
}
}
@@ -1294,17 +1320,21 @@ static void computeCalleeSaveRegisterPairs(
RPI.FrameIdx = CSI[i].getFrameIdx();
- if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
- // Round up size of non-pair to pair size if we need to pad the
- // callee-save area to ensure 16-byte alignment.
- Offset -= 16;
+ int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
+ Offset -= RPI.isPaired() ? 2 * Scale : Scale;
+
+ // Round up size of non-pair to pair size if we need to pad the
+ // callee-save area to ensure 16-byte alignment.
+ if (AFI->hasCalleeSaveStackFreeSpace() &&
+ RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
+ Offset -= 8;
+ assert(Offset % 16 == 0);
assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
MFI.setObjectAlignment(RPI.FrameIdx, 16);
- AFI->setCalleeSaveStackHasFreeSpace(true);
- } else
- Offset -= RPI.isPaired() ? 16 : 8;
- assert(Offset % 8 == 0);
- RPI.Offset = Offset / 8;
+ }
+
+ assert(Offset % Scale == 0);
+ RPI.Offset = Offset / Scale;
assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
"Offset out of bounds for LDP/STP immediate");
@@ -1370,6 +1400,11 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
Size = 8;
Align = 8;
break;
+ case RegPairInfo::FPR128:
+ StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
+ Size = 16;
+ Align = 16;
+ break;
}
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
@@ -1441,6 +1476,11 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
Size = 8;
Align = 8;
break;
+ case RegPairInfo::FPR128:
+ LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
+ Size = 16;
+ Align = 16;
+ break;
}
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
@@ -1507,24 +1547,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
? RegInfo->getBaseRegister()
: (unsigned)AArch64::NoRegister;
- unsigned SpillEstimate = SavedRegs.count();
- for (unsigned i = 0; CSRegs[i]; ++i) {
- unsigned Reg = CSRegs[i];
- unsigned PairedReg = CSRegs[i ^ 1];
- if (Reg == BasePointerReg)
- SpillEstimate++;
- if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg))
- SpillEstimate++;
- }
- SpillEstimate += 2; // Conservatively include FP+LR in the estimate
- unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate;
-
- // The frame record needs to be created by saving the appropriate registers
- if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) {
- SavedRegs.set(AArch64::FP);
- SavedRegs.set(AArch64::LR);
- }
-
unsigned ExtraCSSpill = 0;
// Figure out which callee-saved registers to save/restore.
for (unsigned i = 0; CSRegs[i]; ++i) {
@@ -1548,7 +1570,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// MachO's compact unwind format relies on all registers being stored in
// pairs.
// FIXME: the usual format is actually better if unwinding isn't needed.
- if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) {
+ if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister &&
+ !SavedRegs.test(PairedReg)) {
SavedRegs.set(PairedReg);
if (AArch64::GPR64RegClass.contains(PairedReg) &&
!RegInfo->isReservedReg(MF, PairedReg))
@@ -1556,6 +1579,24 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
+ // Calculates the callee saved stack size.
+ unsigned CSStackSize = 0;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (unsigned Reg : SavedRegs.set_bits())
+ CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
+
+ // Save number of saved regs, so we can easily update CSStackSize later.
+ unsigned NumSavedRegs = SavedRegs.count();
+
+ // The frame record needs to be created by saving the appropriate registers
+ unsigned EstimatedStackSize = MFI.estimateStackSize(MF);
+ if (hasFP(MF) ||
+ windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
+ SavedRegs.set(AArch64::FP);
+ SavedRegs.set(AArch64::LR);
+ }
+
LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
for (unsigned Reg
: SavedRegs.set_bits()) dbgs()
@@ -1563,15 +1604,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
dbgs() << "\n";);
// If any callee-saved registers are used, the frame cannot be eliminated.
- unsigned NumRegsSpilled = SavedRegs.count();
- bool CanEliminateFrame = NumRegsSpilled == 0;
+ bool CanEliminateFrame = SavedRegs.count() == 0;
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
- unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
- LLVM_DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
- bool BigStack = (CFSize > EstimatedStackSizeLimit);
+ bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
AFI->setHasStackFrame(true);
@@ -1592,7 +1630,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
if (produceCompactUnwindFrame(MF))
SavedRegs.set(UnspilledCSGPRPaired);
ExtraCSSpill = UnspilledCSGPRPaired;
- NumRegsSpilled = SavedRegs.count();
}
// If we didn't find an extra callee-saved register to spill, create
@@ -1609,9 +1646,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
+ // Adding the size of additional 64bit GPR saves.
+ CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
+ unsigned AlignedCSStackSize = alignTo(CSStackSize, 16);
+ LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
+ << EstimatedStackSize + AlignedCSStackSize
+ << " bytes.\n");
+
// Round up to register pair alignment to avoid additional SP adjustment
// instructions.
- AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
+ AFI->setCalleeSavedStackSize(AlignedCSStackSize);
+ AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
}
bool AArch64FrameLowering::enableStackSlotScavenging(
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index ec1925e06f8..fdadcefc1f1 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -50,8 +50,7 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (MF->getFunction().getCallingConv() == CallingConv::AnyReg)
return CSR_AArch64_AllRegs_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
- // FIXME: default to AAPCS until we add full support.
- return CSR_AArch64_AAPCS_SaveList;
+ return CSR_AArch64_AAVPCS_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
@@ -102,8 +101,7 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return SCS ? CSR_AArch64_CXX_TLS_Darwin_SCS_RegMask
: CSR_AArch64_CXX_TLS_Darwin_RegMask;
if (CC == CallingConv::AArch64_VectorCall)
- // FIXME: default to AAPCS until we add full support.
- return SCS ? CSR_AArch64_AAPCS_SCS_RegMask : CSR_AArch64_AAPCS_RegMask;
+ return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask;
if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering()
->supportSwiftError() &&
MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
OpenPOWER on IntegriCloud