summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp103
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.h5
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp39
4 files changed, 108 insertions, 41 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 9ce028cb961..082e17e44d0 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -674,7 +674,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
int BaseOffset = -AFI->getTaggedBasePointerOffset();
unsigned FrameReg;
StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
- MF, BaseOffset, false /*isFixed*/, FrameReg,
+ MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
/*PreferFP=*/false,
/*ForSimm=*/true);
Register SrcReg = FrameReg;
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index c42c16bc1aa..addf3055ec5 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1610,12 +1610,13 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference(
const auto &MFI = MF.getFrameInfo();
int ObjectOffset = MFI.getObjectOffset(FI);
bool isFixed = MFI.isFixedObjectIndex(FI);
- return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, FrameReg,
+ bool isSVE = MFI.getStackID(FI) == TargetStackID::SVEVector;
+ return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
PreferFP, ForSimm);
}
StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
- const MachineFunction &MF, int ObjectOffset, bool isFixed,
+ const MachineFunction &MF, int ObjectOffset, bool isFixed, bool isSVE,
unsigned &FrameReg, bool PreferFP, bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
@@ -1629,16 +1630,17 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize());
const StackOffset &SVEStackSize = getSVEStackSize(MF);
- if (SVEStackSize)
- llvm_unreachable("Accessing frame indices in presence of SVE "
- "not yet supported");
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
// reliable as a base). Make sure useFPForScavengingIndex() does the
// right thing for the emergency spill slot.
bool UseFP = false;
- if (AFI->hasStackFrame()) {
+ if (AFI->hasStackFrame() && !isSVE) {
+ // We shouldn't prefer using the FP when there is an SVE area
+ // in between the FP and the non-SVE locals/spills.
+ PreferFP &= !SVEStackSize;
+
// Note: Keeping the following as multiple 'if' statements rather than
// merging to a single expression for readability.
//
@@ -1666,8 +1668,10 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
bool CanUseBP = RegInfo->hasBasePointer(MF);
if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
UseFP = PreferFP;
- else if (!CanUseBP) // Can't use BP. Forced to use FP.
+ else if (!CanUseBP) { // Can't use BP. Forced to use FP.
+ assert(!SVEStackSize && "Expected BP to be available");
UseFP = true;
+ }
// else we can use BP and FP, but the offset from FP won't fit.
// That will make us scavenge registers which we can probably avoid by
// using BP. If it won't fit for BP either, we'll scavenge anyway.
@@ -1697,9 +1701,36 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
"In the presence of dynamic stack pointer realignment, "
"non-argument/CSR objects cannot be accessed through the frame pointer");
+ if (isSVE) {
+ int64_t OffsetToSVEArea =
+ MFI.getStackSize() - AFI->getCalleeSavedStackSize();
+ StackOffset FPOffset = {ObjectOffset, MVT::nxv1i8};
+ StackOffset SPOffset = SVEStackSize +
+ StackOffset(ObjectOffset, MVT::nxv1i8) +
+ StackOffset(OffsetToSVEArea, MVT::i8);
+ // Always use the FP for SVE spills if available and beneficial.
+ if (hasFP(MF) &&
+ (SPOffset.getBytes() ||
+ FPOffset.getScalableBytes() < SPOffset.getScalableBytes() ||
+ RegInfo->needsStackRealignment(MF))) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+
+ FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
+ : (unsigned)AArch64::SP;
+ return SPOffset;
+ }
+
+ StackOffset ScalableOffset = {};
+ if (UseFP && !(isFixed || isCSR))
+ ScalableOffset = -SVEStackSize;
+ if (!UseFP && (isFixed || isCSR))
+ ScalableOffset = SVEStackSize;
+
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
- return StackOffset(FPOffset, MVT::i8);
+ return StackOffset(FPOffset, MVT::i8) + ScalableOffset;
}
// Use the base pointer if we have one.
@@ -1716,7 +1747,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
Offset -= AFI->getLocalStackSize();
}
- return StackOffset(Offset, MVT::i8);
+ return StackOffset(Offset, MVT::i8) + ScalableOffset;
}
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
@@ -2213,24 +2244,20 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
<< ' ' << printReg(Reg, RegInfo);
dbgs() << "\n";);
- bool HasSVEStackObjects = [&MFI]() {
- for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
- if (MFI.getStackID(I) == TargetStackID::SVEVector &&
- MFI.getObjectOffset(I) < 0)
- return true;
- // Note: We don't take allocatable stack objects into
- // account yet, because allocation for those is not yet
- // implemented.
- return false;
- }();
-
// If any callee-saved registers are used, the frame cannot be eliminated.
- bool CanEliminateFrame = (SavedRegs.count() == 0) && !HasSVEStackObjects;
+ unsigned MaxAlign = getStackAlignment();
+ int64_t SVEStackSize =
+ alignTo(determineSVEStackSize(MFI, MaxAlign), MaxAlign);
+ assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
+ bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
- bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
+
+ // Conservatively always assume BigStack when there are SVE spills.
+ bool BigStack = SVEStackSize ||
+ (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
AFI->setHasStackFrame(true);
@@ -2286,6 +2313,23 @@ bool AArch64FrameLowering::enableStackSlotScavenging(
return AFI->hasCalleeSaveStackFreeSpace();
}
+int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI,
+ unsigned &MaxAlign) const {
+ // Process all fixed stack objects.
+ int64_t Offset = 0;
+ for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
+ if (MFI.getStackID(I) == TargetStackID::SVEVector) {
+ int64_t FixedOffset = -MFI.getObjectOffset(I);
+ if (FixedOffset > Offset)
+ Offset = FixedOffset;
+ }
+
+ // Note: We don't take allocatable stack objects into
+ // account yet, because allocation for those is not yet
+ // implemented.
+ return Offset;
+}
+
void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -2293,22 +2337,11 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
"Upwards growing stack unsupported");
- // Process all fixed stack SVE objects.
- int64_t Offset = 0;
- for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) {
- unsigned StackID = MFI.getStackID(I);
- if (StackID == TargetStackID::SVEVector) {
- int64_t FixedOffset = -MFI.getObjectOffset(I);
- if (FixedOffset > Offset)
- Offset = FixedOffset;
- }
- }
-
unsigned MaxAlign = getStackAlignment();
- uint64_t SVEStackSize = alignTo(Offset, MaxAlign);
+ int64_t SVEStackSize = determineSVEStackSize(MFI, MaxAlign);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- AFI->setStackSizeSVE(SVEStackSize);
+ AFI->setStackSizeSVE(alignTo(SVEStackSize, MaxAlign));
assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
// If this function isn't doing Win64-style C++ EH, we don't need to do
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 99d868a95a7..fb90cd85cb1 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -45,8 +45,8 @@ public:
bool ForSimm) const;
StackOffset resolveFrameOffsetReference(const MachineFunction &MF,
int ObjectOffset, bool isFixed,
- unsigned &FrameReg, bool PreferFP,
- bool ForSimm) const;
+ bool isSVE, unsigned &FrameReg,
+ bool PreferFP, bool ForSimm) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -101,6 +101,7 @@ public:
private:
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
unsigned StackBumpBytes) const;
+ int64_t determineSVEStackSize(MachineFrameInfo &MF, unsigned &MaxAlign) const;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 57782862967..895de9ac7b9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2198,6 +2198,18 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
MinOffset = -256;
MaxOffset = 255;
break;
+ case AArch64::LDR_PXI:
+ case AArch64::STR_PXI:
+ Scale = Width = 2;
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ case AArch64::LDR_ZXI:
+ case AArch64::STR_ZXI:
+ Scale = Width = 16;
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
case AArch64::ST2GOffset:
case AArch64::STZ2GOffset:
Scale = 16;
@@ -3340,6 +3352,18 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
return nullptr;
}
+static bool isSVEScaledImmInstruction(unsigned Opcode) {
+ switch (Opcode) {
+ case AArch64::LDR_ZXI:
+ case AArch64::STR_ZXI:
+ case AArch64::LDR_PXI:
+ case AArch64::STR_PXI:
+ return true;
+ default:
+ return false;
+ }
+}
+
int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
StackOffset &SOffset,
bool *OutUseUnscaledOp,
@@ -3383,9 +3407,13 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
// Construct the complete offset.
+ bool IsMulVL = isSVEScaledImmInstruction(MI.getOpcode());
+ int64_t Offset =
+ IsMulVL ? (SOffset.getScalableBytes()) : (SOffset.getBytes());
+
const MachineOperand &ImmOpnd =
MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
- int Offset = SOffset.getBytes() + ImmOpnd.getImm() * Scale;
+ Offset += ImmOpnd.getImm() * Scale;
// If the offset doesn't match the scale, we rewrite the instruction to
// use the unscaled instruction instead. Likewise, if we have a negative
@@ -3417,9 +3445,14 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
if (OutUnscaledOp && UnscaledOp)
*OutUnscaledOp = *UnscaledOp;
- SOffset = StackOffset(Offset, MVT::i8);
+ if (IsMulVL)
+ SOffset = StackOffset(Offset, MVT::nxv1i8) +
+ StackOffset(SOffset.getBytes(), MVT::i8);
+ else
+ SOffset = StackOffset(Offset, MVT::i8) +
+ StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8);
return AArch64FrameOffsetCanUpdate |
- (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
+ (SOffset ? 0 : AArch64FrameOffsetIsLegal);
}
bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
OpenPOWER on IntegriCloud