summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp330
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll25
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-abi_align.ll12
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll7
-rw-r--r--llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll6
-rw-r--r--llvm/test/CodeGen/AArch64/machine-outliner.ll25
-rw-r--r--llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir471
7 files changed, 824 insertions, 52 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index a0c4a25bb5b..d60dc43d19b 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -32,10 +32,12 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
+#include <functional>
#include <iterator>
#include <limits>
@@ -51,6 +53,9 @@ STATISTIC(NumUnscaledPairCreated,
STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
+DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
+ "Controls which pairs are considered for renaming");
+
// The LdStLimit limits how far we search for load/store pairs.
static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
cl::init(20), cl::Hidden);
@@ -76,6 +81,11 @@ using LdStPairFlags = struct LdStPairFlags {
// to be extended, 0 means I, and 1 means the returned iterator.
int SExtIdx = -1;
+ // If not none, RenameReg can be used to rename the result register of the
+ // first store in a pair. Currently this only works when merging stores
+ // forward.
+ Optional<MCPhysReg> RenameReg = None;
+
LdStPairFlags() = default;
void setMergeForward(bool V = true) { MergeForward = V; }
@@ -83,6 +93,10 @@ using LdStPairFlags = struct LdStPairFlags {
void setSExtIdx(int V) { SExtIdx = V; }
int getSExtIdx() const { return SExtIdx; }
+
+ void setRenameReg(MCPhysReg R) { RenameReg = R; }
+ void clearRenameReg() { RenameReg = None; }
+ Optional<MCPhysReg> getRenameReg() const { return RenameReg; }
};
struct AArch64LoadStoreOpt : public MachineFunctionPass {
@@ -99,6 +113,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Track which register units have been modified and used.
LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+ LiveRegUnits DefinedInBB;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AAResultsWrapperPass>();
@@ -599,8 +614,8 @@ static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
}
}
-static const MachineOperand &getLdStRegOp(const MachineInstr &MI,
- unsigned PairedRegOp = 0) {
+static MachineOperand &getLdStRegOp(MachineInstr &MI,
+ unsigned PairedRegOp = 0) {
assert(PairedRegOp < 2 && "Unexpected register operand idx.");
unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0;
return MI.getOperand(Idx);
@@ -783,6 +798,44 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
return NextI;
}
+// Apply Fn to all instructions between MI and the beginning of the block, until
+// a def for DefReg is reached. Returns true, iff Fn returns true for all
+// visited instructions. Stop after visiting Limit iterations.
+static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg,
+ const TargetRegisterInfo *TRI, unsigned Limit,
+ std::function<bool(MachineInstr &, bool)> &Fn) {
+ auto MBB = MI.getParent();
+ for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(),
+ E = MBB->rend();
+ I != E; I++) {
+ if (!Limit)
+ return false;
+ --Limit;
+
+ bool isDef = any_of(I->operands(), [DefReg, TRI](MachineOperand &MOP) {
+ return MOP.isReg() && MOP.isDef() &&
+ TRI->regsOverlap(MOP.getReg(), DefReg);
+ });
+ if (!Fn(*I, isDef))
+ return false;
+ if (isDef)
+ break;
+ }
+ return true;
+}
+
+static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units,
+ const TargetRegisterInfo *TRI) {
+
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI))
+ if (MOP.isReg() && MOP.isKill())
+ Units.removeReg(MOP.getReg());
+
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI))
+ if (MOP.isReg() && !MOP.isKill())
+ Units.addReg(MOP.getReg());
+}
+
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
@@ -803,6 +856,70 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
int OffsetStride = IsUnscaled ? getMemScale(*I) : 1;
bool MergeForward = Flags.getMergeForward();
+
+ Optional<MCPhysReg> RenameReg = Flags.getRenameReg();
+ if (MergeForward && RenameReg) {
+ MCRegister RegToRename = getLdStRegOp(*I).getReg();
+ DefinedInBB.addReg(*RenameReg);
+
+ // Return the sub/super register for RenameReg, matching the size of
+ // OriginalReg.
+ auto GetMatchingSubReg = [this,
+ RenameReg](MCPhysReg OriginalReg) -> MCPhysReg {
+ for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg))
+ if (TRI->getMinimalPhysRegClass(OriginalReg) ==
+ TRI->getMinimalPhysRegClass(SubOrSuper))
+ return SubOrSuper;
+ llvm_unreachable("Should have found matching sub or super register!");
+ };
+
+ std::function<bool(MachineInstr &, bool)> UpdateMIs =
+ [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) {
+ if (IsDef) {
+ bool SeenDef = false;
+ for (auto &MOP : MI.operands()) {
+ // Rename the first explicit definition and all implicit
+ // definitions matching RegToRename.
+ if (MOP.isReg() &&
+ (!SeenDef || (MOP.isDef() && MOP.isImplicit())) &&
+ TRI->regsOverlap(MOP.getReg(), RegToRename)) {
+ assert((MOP.isImplicit() ||
+ (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
+ "Need renamable operands");
+ MOP.setReg(GetMatchingSubReg(MOP.getReg()));
+ SeenDef = true;
+ }
+ }
+ } else {
+ for (auto &MOP : MI.operands()) {
+ if (MOP.isReg() && TRI->regsOverlap(MOP.getReg(), RegToRename)) {
+ assert(MOP.isImplicit() ||
+ (MOP.isRenamable() && !MOP.isEarlyClobber()) &&
+ "Need renamable operands");
+ MOP.setReg(GetMatchingSubReg(MOP.getReg()));
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n");
+ return true;
+ };
+ forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs);
+
+ // Make sure the register used for renaming is not used between the paired
+ // instructions. That would trash the content before the new paired
+ // instruction.
+ for (auto &MI :
+ iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>(
+ std::next(I), std::next(Paired)))
+ assert(all_of(MI.operands(),
+ [this, &RenameReg](const MachineOperand &MOP) {
+ return !MOP.isReg() ||
+ !TRI->regsOverlap(MOP.getReg(), *RenameReg);
+ }) &&
+ "Rename register used between paired instruction, trashing the "
+ "content");
+ }
+
// Insert our new paired instruction after whichever of the paired
// instructions MergeForward indicates.
MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
@@ -931,6 +1048,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
}
LLVM_DEBUG(dbgs() << "\n");
+ if (MergeForward)
+ for (const MachineOperand &MOP : phys_regs_and_masks(*I))
+ if (MOP.isReg() && MOP.isKill())
+ DefinedInBB.addReg(MOP.getReg());
+
// Erase the old instructions.
I->eraseFromParent();
Paired->eraseFromParent();
@@ -1207,6 +1329,144 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
// FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
}
+static bool
+canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
+ SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
+ const TargetRegisterInfo *TRI) {
+ if (!FirstMI.mayStore())
+ return false;
+
+ // Check if we can find an unused register which we can use to rename
+ // the register used by the first load/store.
+ auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
+ MachineFunction &MF = *FirstMI.getParent()->getParent();
+ if (!RegClass || !MF.getRegInfo().tracksLiveness())
+ return false;
+
+ auto RegToRename = getLdStRegOp(FirstMI).getReg();
+ // For now, we only rename if the store operand gets killed at the store.
+ if (!getLdStRegOp(FirstMI).isKill() &&
+ !any_of(FirstMI.operands(),
+ [TRI, RegToRename](const MachineOperand &MOP) {
+ return MOP.isReg() && MOP.isImplicit() && MOP.isKill() &&
+ TRI->regsOverlap(RegToRename, MOP.getReg());
+ })) {
+ LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n");
+ return false;
+ }
+ auto canRenameMOP = [](const MachineOperand &MOP) {
+ return MOP.isImplicit() ||
+ (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
+ };
+
+ bool FoundDef = false;
+
+ // For each instruction between FirstMI and the previous def for RegToRename,
+ // we
+ // * check if we can rename RegToRename in this instruction
+ // * collect the registers used and required register classes for RegToRename.
+ std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
+ bool IsDef) {
+ LLVM_DEBUG(dbgs() << "Checking " << MI << "\n");
+ // Currently we do not try to rename across frame-setup instructions.
+ if (MI.getFlag(MachineInstr::FrameSetup)) {
+ LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions currently ("
+ << MI << ")\n");
+ return false;
+ }
+
+ UsedInBetween.accumulate(MI);
+
+ // For a definition, check that we can rename the definition and exit the
+ // loop.
+ FoundDef = IsDef;
+
+ // For defs, check if we can rename the first def of RegToRename.
+ if (FoundDef) {
+ for (auto &MOP : MI.operands()) {
+ if (!MOP.isReg() || !MOP.isDef() ||
+ !TRI->regsOverlap(MOP.getReg(), RegToRename))
+ continue;
+ if (!canRenameMOP(MOP)) {
+ LLVM_DEBUG(dbgs()
+ << " Cannot rename " << MOP << " in " << MI << "\n");
+ return false;
+ }
+ RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
+ }
+ return true;
+ } else {
+ for (auto &MOP : MI.operands()) {
+ if (!MOP.isReg() || !TRI->regsOverlap(MOP.getReg(), RegToRename))
+ continue;
+
+ if (!canRenameMOP(MOP)) {
+ LLVM_DEBUG(dbgs()
+ << " Cannot rename " << MOP << " in " << MI << "\n");
+ return false;
+ }
+ RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
+ }
+ }
+ return true;
+ };
+
+ if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
+ return false;
+
+ if (!FoundDef) {
+ LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
+ return false;
+ }
+ return true;
+}
+
+// Check if we can find a physical register for renaming. This register must:
+// * not be defined up to FirstMI (checking DefinedInBB)
+// * not used between the MI and the defining instruction of the register to
+// rename (checked using UsedInBetween).
+// * is available in all used register classes (checked using RequiredClasses).
+static Optional<MCPhysReg> tryToFindRegisterToRename(
+ MachineInstr &FirstMI, MachineInstr &MI, LiveRegUnits &DefinedInBB,
+ LiveRegUnits &UsedInBetween,
+ SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
+ const TargetRegisterInfo *TRI) {
+ auto &MF = *FirstMI.getParent()->getParent();
+
+ // Checks if any sub- or super-register of PR is callee saved.
+ auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
+ return any_of(TRI->sub_and_superregs_inclusive(PR),
+ [&MF, TRI](MCPhysReg SubOrSuper) {
+ return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
+ });
+ };
+
+ // Check if PR or one of its sub- or super-registers can be used for all
+ // required register classes.
+ auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
+ return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
+ return any_of(TRI->sub_and_superregs_inclusive(PR),
+ [C, TRI](MCPhysReg SubOrSuper) {
+ return C == TRI->getMinimalPhysRegClass(SubOrSuper);
+ });
+ });
+ };
+
+ auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
+ for (const MCPhysReg &PR : *RegClass) {
+ if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
+ !AnySubOrSuperRegCalleePreserved(PR) && CanBeUsedForAllClasses(PR)) {
+ DefinedInBB.addReg(PR);
+ LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
+ << "\n");
+ return {PR};
+ }
+ }
+ LLVM_DEBUG(dbgs() << "No rename register found from "
+ << TRI->getRegClassName(RegClass) << "\n");
+ return None;
+}
+
/// Scan the instructions looking for a load/store that can be combined with the
/// current instruction into a wider equivalent or a load/store pair.
MachineBasicBlock::iterator
@@ -1215,6 +1475,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
bool FindNarrowMerge) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
+ MachineBasicBlock::iterator MBBIWithRenameReg;
MachineInstr &FirstMI = *I;
++MBBI;
@@ -1226,6 +1487,13 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
+ Optional<bool> MaybeCanRename = None;
+ SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
+ LiveRegUnits UsedInBetween;
+ UsedInBetween.init(*TRI);
+
+ Flags.clearRenameReg();
+
// Track which register units have been modified and used between the first
// insn (inclusive) and the second insn.
ModifiedRegUnits.clear();
@@ -1237,6 +1505,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
MachineInstr &MI = *MBBI;
+ UsedInBetween.accumulate(MI);
+
// Don't count transient instructions towards the search limit since there
// may be different numbers of them if e.g. debug information is present.
if (!MI.isTransient())
@@ -1329,7 +1599,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
!(MI.mayLoad() &&
!UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
!mayAlias(MI, MemInsns, AA)) {
+
Flags.setMergeForward(false);
+ Flags.clearRenameReg();
return MBBI;
}
@@ -1337,18 +1609,41 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// between the two instructions and none of the instructions between the
// first and the second alias with the first, we can combine the first
// into the second.
- if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg()) &&
- !(MayLoad &&
+ if (!(MayLoad &&
!UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) &&
!mayAlias(FirstMI, MemInsns, AA)) {
- Flags.setMergeForward(true);
- return MBBI;
+
+ if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
+ Flags.setMergeForward(true);
+ Flags.clearRenameReg();
+ return MBBI;
+ }
+
+ if (DebugCounter::shouldExecute(RegRenamingCounter)) {
+ if (!MaybeCanRename)
+ MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
+ RequiredClasses, TRI)};
+
+ if (*MaybeCanRename) {
+ Optional<MCPhysReg> MaybeRenameReg = tryToFindRegisterToRename(
+ FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses,
+ TRI);
+ if (MaybeRenameReg) {
+ Flags.setRenameReg(*MaybeRenameReg);
+ Flags.setMergeForward(true);
+ MBBIWithRenameReg = MBBI;
+ }
+ }
+ }
}
// Unable to combine these instructions due to interference in between.
// Keep looking.
}
}
+ if (Flags.getRenameReg())
+ return MBBIWithRenameReg;
+
// If the instruction wasn't a matching load or store. Stop searching if we
// encounter a call instruction that might modify memory.
if (MI.isCall())
@@ -1680,7 +1975,13 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
++NumUnscaledPairCreated;
// Keeping the iterator straight is a pain, so we let the merge routine tell
// us what the next instruction is after it's done mucking about.
+ auto Prev = std::prev(MBBI);
MBBI = mergePairedInsns(MBBI, Paired, Flags);
+ // Collect liveness info for instructions between Prev and the new position
+ // MBBI.
+ for (auto I = std::next(Prev); I != MBBI; I++)
+ updateDefinedRegisters(*I, DefinedInBB, TRI);
+
return true;
}
return false;
@@ -1742,6 +2043,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
bool EnableNarrowZeroStOpt) {
+
bool Modified = false;
// Four tranformations to do here:
// 1) Find loads that directly read from stores and promote them by
@@ -1786,8 +2088,17 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
// ldr x1, [x2, #8]
// ; becomes
// ldp x0, x1, [x2]
+
+ if (MBB.getParent()->getRegInfo().tracksLiveness()) {
+ DefinedInBB.clear();
+ DefinedInBB.addLiveIns(MBB);
+ }
+
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
+ // Track currently live registers up to this point, to help with
+ // searching for a rename register on demand.
+ updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
Modified = true;
else
@@ -1825,11 +2136,14 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
// or store.
ModifiedRegUnits.init(*TRI);
UsedRegUnits.init(*TRI);
+ DefinedInBB.init(*TRI);
bool Modified = false;
bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
- for (auto &MBB : Fn)
- Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt);
+ for (auto &MBB : Fn) {
+ auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
+ Modified |= M;
+ }
return Modified;
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
index ec3b51bd37a..7caa4c06d69 100644
--- a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
@@ -14,10 +14,9 @@ define void @fn9(i32* %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7,
; CHECK-NEXT: stp w6, w5, [sp, #36]
; CHECK-NEXT: str w7, [sp, #32]
; CHECK-NEXT: str w8, [x0]
-; CHECK-NEXT: ldr w8, [sp, #72]
-; CHECK-NEXT: str w8, [sp, #20]
+; CHECK-NEXT: ldr w9, [sp, #72]
; CHECK-NEXT: ldr w8, [sp, #80]
-; CHECK-NEXT: str w8, [sp, #16]
+; CHECK-NEXT: stp w8, w9, [sp, #16]
; CHECK-NEXT: add x8, sp, #72 ; =72
; CHECK-NEXT: add x8, x8, #24 ; =24
; CHECK-NEXT: str x8, [sp, #24]
@@ -65,22 +64,18 @@ define i32 @main() nounwind ssp {
; CHECK: ; %bb.0:
; CHECK-NEXT: sub sp, sp, #96 ; =96
; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill
-; CHECK-NEXT: mov w8, #1
-; CHECK-NEXT: str w8, [sp, #76]
+; CHECK-NEXT: mov w9, #1
; CHECK-NEXT: mov w8, #2
-; CHECK-NEXT: str w8, [sp, #72]
-; CHECK-NEXT: mov w8, #3
-; CHECK-NEXT: str w8, [sp, #68]
+; CHECK-NEXT: stp w8, w9, [sp, #72]
+; CHECK-NEXT: mov w9, #3
; CHECK-NEXT: mov w8, #4
-; CHECK-NEXT: str w8, [sp, #64]
-; CHECK-NEXT: mov w8, #5
-; CHECK-NEXT: str w8, [sp, #60]
+; CHECK-NEXT: stp w8, w9, [sp, #64]
+; CHECK-NEXT: mov w9, #5
; CHECK-NEXT: mov w8, #6
-; CHECK-NEXT: str w8, [sp, #56]
-; CHECK-NEXT: mov w8, #7
-; CHECK-NEXT: str w8, [sp, #52]
+; CHECK-NEXT: stp w8, w9, [sp, #56]
+; CHECK-NEXT: mov w9, #7
; CHECK-NEXT: mov w8, #8
-; CHECK-NEXT: str w8, [sp, #48]
+; CHECK-NEXT: stp w8, w9, [sp, #48]
; CHECK-NEXT: mov w8, #9
; CHECK-NEXT: mov w9, #10
; CHECK-NEXT: stp w9, w8, [sp, #40]
diff --git a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll
index 7db3ea76de0..189546a4553 100644
--- a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll
@@ -392,10 +392,8 @@ entry:
define i32 @caller43() #3 {
entry:
; CHECK-LABEL: caller43
-; CHECK-DAG: str {{q[0-9]+}}, [sp, #48]
-; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
-; CHECK-DAG: str {{q[0-9]+}}, [sp, #16]
-; CHECK-DAG: str {{q[0-9]+}}, [sp]
+; CHECK-DAG: stp q1, q0, [sp, #32]
+; CHECK-DAG: stp q1, q0, [sp]
; CHECK: add x1, sp, #32
; CHECK: mov x2, sp
; Space for s1 is allocated at sp+32
@@ -434,10 +432,8 @@ entry:
; CHECK-LABEL: caller43_stack
; CHECK: sub sp, sp, #112
; CHECK: add x29, sp, #96
-; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-16]
-; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32]
-; CHECK-DAG: str {{q[0-9]+}}, [sp, #48]
-; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
+; CHECK-DAG: stp q1, q0, [x29, #-32]
+; CHECK-DAG: stp q1, q0, [sp, #32]
; Space for s1 is allocated at x29-32 = sp+64
; Space for s2 is allocated at sp+32
; CHECK: add x[[B:[0-9]+]], sp, #32
diff --git a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
index db87d7fae80..5f4c17d0cfb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
@@ -26,11 +26,11 @@ define void @test_simple(i32 %n, ...) {
; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #56
-; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
+
; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #128
-; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+; CHECK: stp [[GR_TOP]], [[VR_TOP]], [x[[VA_LIST]], #8]
; CHECK: mov [[GRVR:x[0-9]+]], #-56
; CHECK: movk [[GRVR]], #65408, lsl #32
@@ -62,11 +62,10 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #40
-; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #112
-; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+; CHECK: stp [[GR_TOP]], [[VR_TOP]], [x[[VA_LIST]], #8]
; CHECK: mov [[GRVR_OFFS:x[0-9]+]], #-40
; CHECK: movk [[GRVR_OFFS]], #65424, lsl #32
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll b/llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll
index 19351262b82..f188301579e 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll
@@ -4,7 +4,7 @@
; CHECK-SAME: Bytes from outlining all occurrences (16) >=
; CHECK-SAME: Unoutlined instruction bytes (16)
; CHECK-SAME: (Also found at: <UNKNOWN LOCATION>)
-; CHECK: remark: <unknown>:0:0: Saved 48 bytes by outlining 14 instructions
+; CHECK: remark: <unknown>:0:0: Saved 36 bytes by outlining 11 instructions
; CHECK-SAME: from 2 locations. (Found at: <UNKNOWN LOCATION>,
; CHECK-SAME: <UNKNOWN LOCATION>)
; RUN: llc %s -enable-machine-outliner -mtriple=aarch64-unknown-unknown -o /dev/null -pass-remarks-missed=machine-outliner -pass-remarks-output=%t.yaml
@@ -38,10 +38,10 @@
; YAML-NEXT: Function: OUTLINED_FUNCTION_0
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Saved '
-; YAML-NEXT: - OutliningBenefit: '48'
+; YAML-NEXT: - OutliningBenefit: '36'
; YAML-NEXT: - String: ' bytes by '
; YAML-NEXT: - String: 'outlining '
-; YAML-NEXT: - Length: '14'
+; YAML-NEXT: - Length: '11'
; YAML-NEXT: - String: ' instructions '
; YAML-NEXT: - String: 'from '
; YAML-NEXT: - NumOccurrences: '2'
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner.ll b/llvm/test/CodeGen/AArch64/machine-outliner.ll
index 15afdd43d11..13a12f76695 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner.ll
@@ -91,19 +91,16 @@ define void @dog() #0 {
; ODR: [[OUTLINED]]:
; CHECK: .p2align 2
; CHECK-NEXT: [[OUTLINED]]:
-; CHECK: mov w8, #1
-; CHECK-NEXT: str w8, [sp, #28]
-; CHECK-NEXT: mov w8, #2
-; CHECK-NEXT: str w8, [sp, #24]
-; CHECK-NEXT: mov w8, #3
-; CHECK-NEXT: str w8, [sp, #20]
-; CHECK-NEXT: mov w8, #4
-; CHECK-NEXT: str w8, [sp, #16]
-; CHECK-NEXT: mov w8, #5
-; CHECK-NEXT: str w8, [sp, #12]
-; CHECK-NEXT: mov w8, #6
-; CHECK-NEXT: str w8, [sp, #8]
-; CHECK-NEXT: add sp, sp, #32
-; CHECK-NEXT: ret
+; CHECK: mov w9, #1
+; CHECK-DAG: mov w8, #2
+; CHECK-DAG: stp w8, w9, [sp, #24]
+; CHECK-DAG: mov w9, #3
+; CHECK-DAG: mov w8, #4
+; CHECK-DAG: stp w8, w9, [sp, #16]
+; CHECK-DAG: mov w9, #5
+; CHECK-DAG: mov w8, #6
+; CHECK-DAG: stp w8, w9, [sp, #8]
+; CHECK-DAG: add sp, sp, #32
+; CHECK-DAG: ret
attributes #0 = { noredzone "target-cpu"="cyclone" "target-features"="+sse" }
diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir
new file mode 100644
index 00000000000..018827772da
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir
@@ -0,0 +1,471 @@
+# RUN: llc -run-pass=aarch64-ldst-opt -mtriple=arm64-apple-iphoneos -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+# CHECK-LABEL: name: test1
+# CHECK: bb.0:
+# CHECK-NEXT: liveins: $x0, $x1
+# CHECK: $x10, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8)
+# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load 8)
+# CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4)
+# CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8
+# CHECK-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store 8, align 4)
+# CHECK-NEXT: RET undef $lr
+
+name: test1
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x8' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8)
+ STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4)
+ renamable $x9 = LDRXui renamable $x0, 1 :: (load 8)
+ STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4)
+ renamable $x8 = ADDXrr $x8, $x8
+ STRXui renamable $x8, renamable $x0, 10 :: (store 8, align 4)
+ RET undef $lr
+
+...
+---
+# CHECK-LABEL: name: test2
+# CHECK-LABEL: bb.0:
+# CHECK-NEXT: liveins: $x0, $x9, $x1
+
+# CHECK: $x10, renamable $x8 = LDPXi renamable $x9, 0 :: (load 8)
+# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 2 :: (load 8)
+# CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4)
+# CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8
+# CHECK-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store 8, align 4)
+# CHECK-NEXT: RET undef $lr
+
+name: test2
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x9' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x9, $x1
+ renamable $x9, renamable $x8 = LDPXi renamable $x9, 0 :: (load 8)
+ STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4)
+ renamable $x9 = LDRXui renamable $x0, 2 :: (load 8)
+ STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4)
+ renamable $x8 = ADDXrr $x8, $x8
+ STRXui renamable $x8, renamable $x0, 10 :: (store 8, align 4)
+ RET undef $lr
+
+...
+---
+# MOVK has a tied operand and we currently do not rename across tied defs.
+# CHECK-LABEL: bb.0:
+# CHECK-NEXT: liveins: $x0
+#
+# CHECK: renamable $x8 = MRS 58880
+# CHECK-NEXT: renamable $x8 = MOVZXi 15309, 0
+# CHECK-NEXT: renamable $x8 = MOVKXi renamable $x8, 26239, 16
+# CHECK-NEXT: STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store 8)
+# CHECK-NEXT: renamable $x8 = MRS 55840
+# CHECK-NEXT: STRXui killed renamable $x8, killed renamable $x0, 1, implicit killed $x8 :: (store 8)
+# CHECK-NEXT: RET undef $lr
+#
+name: test3
+alignment: 2
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+frameInfo:
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0
+
+ renamable $x8 = MRS 58880
+ renamable $x8 = MOVZXi 15309, 0
+ renamable $x8 = MOVKXi renamable $x8, 26239, 16
+ STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store 8)
+ renamable $x8 = MRS 55840
+ STRXui killed renamable $x8, renamable killed $x0, 1, implicit killed $x8 :: (store 8)
+ RET undef $lr
+
+...
+---
+# CHECK-LABEL: name: test4
+# CHECK-LABEL: bb.0:
+# CHECK-NEXT: liveins: $x0, $x1
+
+# CHECK: $x9 = MRS 58880
+# CHECK-NEXT: renamable $x8 = MRS 55840
+# CHECK-NEXT: STPXi $x9, killed renamable $x8, killed renamable $x0, 0 :: (store 4)
+# CHECK-NEXT: RET undef $lr
+
+name: test4
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x8' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1
+
+ renamable $x8 = MRS 58880
+ STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store 4)
+ renamable $x8 = MRS 55840
+ STRXui killed renamable $x8, renamable killed $x0, 1, implicit killed $x8 :: (store 4)
+ RET undef $lr
+
+...
+---
+# CHECK-LABEL: name: test5
+# CHECK-LABEL: bb.0:
+# CHECK-NEXT: liveins: $x0, $x1
+
+# CHECK: $x9 = MRS 58880
+# CHECK-NEXT: renamable $x8 = MRS 55840
+# CHECK-NEXT: STPWi $w9, killed renamable $w8, killed renamable $x0, 0 :: (store 4)
+# CHECK-NEXT: RET undef $lr
+
+name: test5
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x8' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1
+
+ renamable $x8 = MRS 58880
+ STRWui renamable $w8, renamable $x0, 0, implicit killed $x8 :: (store 4)
+ renamable $x8 = MRS 55840
+ STRWui killed renamable $w8, renamable killed $x0, 1, implicit killed $x8 :: (store 4)
+ RET undef $lr
+
+...
+---
+# CHECK-LABEL: name: test6
+# CHECK-LABEL bb.0:
+# CHECK: liveins: $x0, $x1, $q3
+
+# CHECK: renamable $q9 = LDRQui $x0, 0 :: (load 16)
+# CHECK-NEXT: renamable $q9 = XTNv8i16 renamable $q9, killed renamable $q3
+# CHECK-NEXT: STRQui renamable $q9, renamable $x0, 11 :: (store 16, align 4)
+# CHECK-NEXT: renamable $q9 = FADDv2f64 renamable $q9, renamable $q9
+# CHECK-NEXT: STRQui renamable $q9, renamable $x0, 10 :: (store 16, align 4)
+# CHECK-NEXT: RET undef $lr
+
+# XTN has a tied use-def.
+name: test6
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x8' }
+ - { reg: '$q3' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $q3
+ renamable $q9 = LDRQui $x0, 0 :: (load 16)
+ renamable $q9 = XTNv8i16 renamable $q9, killed renamable $q3
+ STRQui renamable $q9, renamable $x0, 11 :: (store 16, align 4)
+ renamable $q9 = FADDv2f64 renamable $q9, renamable $q9
+ STRQui renamable $q9, renamable $x0, 10 :: (store 16, align 4)
+ RET undef $lr
+
+...
+---
+# Currently we do not rename across frame-setup instructions.
+# CHECK-LABEL: name: test7
+# CHECK-LABEL: bb.0:
+# CHECK-NEXT: liveins: $x0, $x1
+
+# CHECK: $sp = frame-setup SUBXri $sp, 64, 0
+# CHECK-NEXT: renamable $x9 = frame-setup LDRXui renamable $x0, 0 :: (load 8)
+# CHECK-NEXT: STRXui renamable $x9, $x0, 10 :: (store 8, align 4)
+# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load 8)
+# CHECK-NEXT: STRXui renamable $x9, $x0, 11 :: (store 8, align 4)
+# CHECK-NEXT: RET undef $lr
+#
+name: test7
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x8' }
+frameInfo:
+ stackSize: 64
+ maxAlignment: 16
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+stack:
+ - { id: 0, type: spill-slot, offset: -48, size: 16, alignment: 16 }
+ - { id: 1, type: spill-slot, offset: -64, size: 16, alignment: 16 }
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ $sp = frame-setup SUBXri $sp, 64, 0
+ renamable $x9 = frame-setup LDRXui renamable $x0, 0 :: (load 8)
+ STRXui renamable $x9, $x0, 10 :: (store 8, align 4)
+ renamable $x9 = LDRXui renamable $x0, 1 :: (load 8)
+ STRXui renamable $x9, $x0, 11 :: (store 8, align 4)
+ RET undef $lr
+...
+---
+# CHECK-LABEL: name: test8
+# CHECK-LABEL: bb.0:
+# CHECK-NEXT: liveins: $x0, $x1
+
+# CHECK: renamable $x8 = MRS 58880
+# CHECK-NEXT: $w9 = ORRWrs $wzr, killed renamable $w8, 0, implicit-def $x9
+# CHECK-NEXT: renamable $x8 = MRS 55840
+# CHECK-NEXT: STPWi $w9, killed renamable $w8, killed renamable $x0, 0 :: (store 4)
+# CHECK-NEXT: RET undef $lr
+
+name: test8
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x8' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1
+
+ renamable $x8 = MRS 58880
+ renamable $w8 = ORRWrs $wzr, killed renamable $w8, 0, implicit-def $x8
+ STRWui renamable $w8, renamable $x0, 0, implicit killed $x8 :: (store 4)
+ renamable $x8 = MRS 55840
+ STRWui killed renamable $w8, renamable killed $x0, 1, implicit killed $x8 :: (store 4)
+ RET undef $lr
+
+...
+---
+# The reg class returned for $q9 contains only the first 16 Q registers.
+# TODO: Can we check that all instructions that require renaming also support
+# the second 16 Q registers?
+# CHECK-LABEL: name: test9
+# CHECK-LABEL bb.0:
+# CHECK: liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
+
+# CHECK: renamable $q9 = LDRQui $x0, 0 :: (load 16)
+# CHECK-NEXT: STRQui killed renamable $q9, renamable $x0, 10 :: (store 16, align 4)
+# CHECK: renamable $q9 = LDRQui $x0, 1 :: (load 16)
+# CHECK-NEXT: STRQui renamable $q9, renamable $x0, 11 :: (store 16, align 4)
+# CHECK-NEXT: RET undef $lr
+
+name: test9
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x8' }
+ - { reg: '$q3' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
+ renamable $q9 = LDRQui $x0, 0 :: (load 16)
+ STRQui renamable killed $q9, renamable $x0, 10 :: (store 16, align 4)
+ renamable $q9 = LDRQui $x0, 1 :: (load 16)
+ STRQui renamable $q9, renamable $x0, 11 :: (store 16, align 4)
+ RET undef $lr
+
+...
+---
+# The livein $q7 is killed early, so we can re-use it for renaming.
+# CHECK-LABEL: name: test10
+# CHECK-LABEL bb.0:
+# CHECK: liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
+
+# CHECK: renamable $q7 = FADDv2f64 renamable $q7, renamable $q7
+# CHECK-NEXT: STRQui killed renamable $q7, renamable $x0, 100 :: (store 16, align 4)
+# CHECK-NEXT: $q7 = LDRQui $x0, 0 :: (load 16)
+# CHECK-NEXT: renamable $q9 = LDRQui $x0, 1 :: (load 16)
+# CHECK-NEXT: STPQi killed renamable $q9, killed $q7, renamable $x0, 10 :: (store 16, align 4)
+# CHECK-NEXT: RET undef $lr
+
+name: test10
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x8' }
+ - { reg: '$q3' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
+ renamable $q7 = FADDv2f64 renamable $q7, renamable $q7
+ STRQui renamable killed $q7, renamable $x0, 100 :: (store 16, align 4)
+ renamable $q9 = LDRQui $x0, 0 :: (load 16)
+ STRQui renamable killed $q9, renamable $x0, 11 :: (store 16, align 4)
+ renamable $q9 = LDRQui $x0, 1 :: (load 16)
+ STRQui renamable killed $q9, renamable $x0, 10 :: (store 16, align 4)
+ RET undef $lr
+
+...
+---
+# Make sure we do not use any registers that are defined between paired candidates
+# ($x14 in this example)
+# CHECK-LABEL: name: test11
+# CHECK: bb.0:
+# CHECK-NEXT: liveins: $x0, $x1, $x11, $x12, $x13
+
+# CHECK: renamable $w10 = LDRWui renamable $x0, 0 :: (load 8)
+# CHECK-NEXT: $x15, renamable $x8 = LDPXi renamable $x0, 1 :: (load 8)
+# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 3 :: (load 8)
+# CHECK-NEXT: renamable $x14 = LDRXui renamable $x0, 5 :: (load 8)
+# CHECK-NEXT: STPXi renamable $x9, killed $x15, renamable $x0, 10 :: (store 8, align 4)
+# CHECK-NEXT: STRXui killed renamable $x14, renamable $x0, 200 :: (store 8, align 4)
+# CHECK-NEXT: renamable $w8 = ADDWrr $w10, $w10
+# CHECK-NEXT: STRWui renamable $w8, renamable $x0, 100 :: (store 8, align 4)
+# CHECK-NEXT: RET undef $lr
+#
+name: test11
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x8' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x11, $x12, $x13
+ renamable $w10 = LDRWui renamable $x0, 0 :: (load 8)
+ renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load 8)
+ STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4)
+ renamable $x9 = LDRXui renamable $x0, 3 :: (load 8)
+ renamable $x14 = LDRXui renamable $x0, 5 :: (load 8)
+ STRXui renamable $x9, renamable $x0, 10 :: (store 8, align 4)
+ STRXui renamable killed $x14, renamable $x0, 200 :: (store 8, align 4)
+ renamable $w8 = ADDWrr $w10, $w10
+ STRWui renamable $w8, renamable $x0, 100 :: (store 8, align 4)
+ RET undef $lr
+
+...
+---
+# Check that we correctly deal with killed registers in stores that get merged forward,
+# which extends the live range of the first store operand.
+# CHECK-LABEL: name: test12
+# CHECK: bb.0:
+# CHECK-NEXT: liveins: $x0, $x1
+#
+# CHECK: renamable $x10 = LDRXui renamable $x0, 0 :: (load 8)
+# CHECK-NEXT: $x11, renamable $x8 = LDPXi renamable $x0, 3 :: (load 8)
+# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 2 :: (load 8)
+# CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8
+# CHECK-NEXT: STPXi renamable $x8, killed $x11, renamable $x0, 10 :: (store 8, align 4)
+# CHECK-NEXT: STPXi killed renamable $x10, renamable $x9, renamable $x0, 20 :: (store 8, align 4)
+# CHECK-NEXT: RET undef $lr
+
+name: test12
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x8' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ renamable $x10 = LDRXui renamable $x0, 0 :: (load 8)
+ STRXui renamable killed $x10, renamable $x0, 20 :: (store 8, align 4)
+ renamable $x9, renamable $x8 = LDPXi renamable $x0, 3 :: (load 8)
+ STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4)
+ renamable $x9 = LDRXui renamable $x0, 2 :: (load 8)
+ renamable $x8 = ADDXrr $x8, $x8
+ STRXui renamable $x8, renamable $x0, 10 :: (store 8, align 4)
+ STRXui renamable $x9, renamable $x0, 21 :: (store 8, align 4)
+ RET undef $lr
+
+...
+---
+# Make sure we do not use any registers that are defined between def to rename and the first
+# paired store. ($x14 in this example)
+# CHECK-LABEL: name: test13
+# CHECK: bb.0:
+# CHECK-NEXT: liveins: $x0, $x1, $x10, $x11, $x12, $x13
+# CHECK: $x15, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8)
+# CHECK-NEXT: renamable $x14 = LDRXui renamable $x0, 4 :: (load 8)
+# CHECK-NEXT: STRXui killed renamable $x14, renamable $x0, 100 :: (store 8, align 4)
+# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 2 :: (load 8)
+# CHECK-NEXT: STPXi renamable $x9, killed $x15, renamable $x0, 10 :: (store 8, align 4)
+# CHECK-NEXT: RET undef $lr
+#
+name: test13
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x8' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x10, $x11, $x12, $x13
+ renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8)
+ renamable $x14 = LDRXui renamable $x0, 4 :: (load 8)
+ STRXui renamable killed $x14, renamable $x0, 100 :: (store 8, align 4)
+ STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4)
+ renamable $x9 = LDRXui renamable $x0, 2 :: (load 8)
+ STRXui renamable $x9, renamable $x0, 10 :: (store 8)
+ RET undef $lr
+
+...
OpenPOWER on IntegriCloud