summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/RegAllocFast.cpp167
-rw-r--r--llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll33
-rw-r--r--llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll342
-rw-r--r--llvm/test/CodeGen/AMDGPU/spill-m0.ll24
-rw-r--r--llvm/test/CodeGen/Mips/atomic.ll78
-rw-r--r--llvm/test/CodeGen/Mips/atomic64.ll2
-rw-r--r--llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll6
-rw-r--r--llvm/test/CodeGen/X86/atomic32.ll28
-rw-r--r--llvm/test/CodeGen/X86/avx-load-store.ll32
-rwxr-xr-xllvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll42
-rw-r--r--llvm/test/CodeGen/X86/pr30430.ll4
-rw-r--r--llvm/test/CodeGen/X86/pr32284.ll2
-rw-r--r--llvm/test/CodeGen/X86/pr32345.ll2
-rw-r--r--llvm/test/CodeGen/X86/pr34592.ll6
-rw-r--r--llvm/test/CodeGen/X86/pr34653.ll30
15 files changed, 374 insertions, 424 deletions
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index e849bcec199..ea7f247214d 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -149,10 +149,6 @@ namespace {
return false;
}
- /// This flag is set when LiveRegMap will be cleared completely after
- /// spilling all live registers. LiveRegMap entries should not be erased.
- bool isBulkSpilling = false;
-
enum : unsigned {
spillClean = 50,
spillDirty = 100,
@@ -186,9 +182,9 @@ namespace {
bool isLastUseOfLocalReg(const MachineOperand &MO) const;
void addKillFlag(const LiveReg &LRI);
- void killVirtReg(LiveRegMap::iterator LRI);
+ void killVirtReg(LiveReg &LR);
void killVirtReg(unsigned VirtReg);
- void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator);
+ void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
void usePhysReg(MachineOperand &MO);
@@ -205,13 +201,11 @@ namespace {
return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
}
- LiveRegMap::iterator assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg);
- LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator,
- unsigned Hint);
- LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum,
- unsigned VirtReg, unsigned Hint);
- LiveRegMap::iterator reloadVirtReg(MachineInstr &MI, unsigned OpNum,
- unsigned VirtReg, unsigned Hint);
+ void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint);
+ MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
+ unsigned Hint);
+ LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
+ unsigned Hint);
void spillAll(MachineBasicBlock::iterator MI);
bool setPhysReg(MachineInstr &MI, unsigned OpNum, MCPhysReg PhysReg);
@@ -330,14 +324,12 @@ void RegAllocFast::addKillFlag(const LiveReg &LR) {
}
/// Mark virtreg as no longer available.
-void RegAllocFast::killVirtReg(LiveRegMap::iterator LRI) {
- addKillFlag(*LRI);
- assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg &&
+void RegAllocFast::killVirtReg(LiveReg &LR) {
+ addKillFlag(LR);
+ assert(PhysRegState[LR.PhysReg] == LR.VirtReg &&
"Broken RegState mapping");
- setPhysRegState(LRI->PhysReg, regFree);
- // Erase from LiveVirtRegs unless we're spilling in bulk.
- if (!isBulkSpilling)
- LiveVirtRegs.erase(LRI);
+ setPhysRegState(LR.PhysReg, regFree);
+ LR.PhysReg = 0;
}
/// Mark virtreg as no longer available.
@@ -345,8 +337,8 @@ void RegAllocFast::killVirtReg(unsigned VirtReg) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"killVirtReg needs a virtual register");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- if (LRI != LiveVirtRegs.end())
- killVirtReg(LRI);
+ if (LRI != LiveVirtRegs.end() && LRI->PhysReg)
+ killVirtReg(*LRI);
}
/// This method spills the value specified by VirtReg into the corresponding
@@ -356,15 +348,14 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Spilling a physical register is illegal!");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
- spillVirtReg(MI, LRI);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Spilling unmapped virtual register");
+ spillVirtReg(MI, *LRI);
}
/// Do the actual work of spilling.
-void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
- LiveRegMap::iterator LRI) {
- LiveReg &LR = *LRI;
- assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping");
+void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
+ assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping");
if (LR.Dirty) {
// If this physreg is used by the instruction, we want to kill it on the
@@ -372,25 +363,25 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
LR.Dirty = false;
- spill(MI, LRI->VirtReg, LR.PhysReg, SpillKill);
+ spill(MI, LR.VirtReg, LR.PhysReg, SpillKill);
if (SpillKill)
LR.LastUse = nullptr; // Don't kill register again
}
- killVirtReg(LRI);
+ killVirtReg(LR);
}
/// Spill all dirty virtregs without killing them.
void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) {
if (LiveVirtRegs.empty()) return;
- isBulkSpilling = true;
// The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
// of spilling here is deterministic, if arbitrary.
- for (LiveRegMap::iterator I = LiveVirtRegs.begin(), E = LiveVirtRegs.end();
- I != E; ++I)
- spillVirtReg(MI, I);
+ for (LiveReg &LR : LiveVirtRegs) {
+ if (!LR.PhysReg)
+ continue;
+ spillVirtReg(MI, LR);
+ }
LiveVirtRegs.clear();
- isBulkSpilling = false;
}
/// Handle the direct use of a physical register. Check that the register is
@@ -519,9 +510,10 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
<< printReg(PhysReg, TRI) << " is reserved already.\n");
return spillImpossible;
default: {
- LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
- assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
- return I->Dirty ? spillDirty : spillClean;
+ LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Missing VirtReg entry");
+ return LRI->Dirty ? spillDirty : spillClean;
}
}
@@ -539,9 +531,10 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
case regReserved:
return spillImpossible;
default: {
- LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
- assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
- Cost += I->Dirty ? spillDirty : spillClean;
+ LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Missing VirtReg entry");
+ Cost += LRI->Dirty ? spillDirty : spillClean;
break;
}
}
@@ -562,18 +555,9 @@ void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
setPhysRegState(PhysReg, VirtReg);
}
-RegAllocFast::LiveRegMap::iterator
-RegAllocFast::assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg) {
- LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared");
- assignVirtToPhysReg(*LRI, PhysReg);
- return LRI;
-}
-
/// Allocates a physical register for VirtReg.
-RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI,
- LiveRegMap::iterator LRI, unsigned Hint) {
- const unsigned VirtReg = LRI->VirtReg;
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
+ const unsigned VirtReg = LR.VirtReg;
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Can only allocate virtual registers");
@@ -590,9 +574,8 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI,
if (Cost < spillDirty) {
if (Cost)
definePhysReg(MI, Hint, regFree);
- // definePhysReg may kill virtual registers and modify LiveVirtRegs.
- // That invalidates LRI, so run a new lookup for VirtReg.
- return assignVirtToPhysReg(VirtReg, Hint);
+ assignVirtToPhysReg(LR, Hint);
+ return;
}
}
@@ -600,8 +583,8 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI,
ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
for (MCPhysReg PhysReg : AllocationOrder) {
if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) {
- assignVirtToPhysReg(*LRI, PhysReg);
- return LRI;
+ assignVirtToPhysReg(LR, PhysReg);
+ return;
}
}
@@ -616,8 +599,8 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n');
// Cost is 0 when all aliases are already disabled.
if (Cost == 0) {
- assignVirtToPhysReg(*LRI, PhysReg);
- return LRI;
+ assignVirtToPhysReg(LR, PhysReg);
+ return;
}
if (Cost < BestCost) {
BestReg = PhysReg;
@@ -632,26 +615,23 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI,
else
MI.emitError("ran out of registers during register allocation");
definePhysReg(MI, *AllocationOrder.begin(), regFree);
- return assignVirtToPhysReg(VirtReg, *AllocationOrder.begin());
+ assignVirtToPhysReg(LR, *AllocationOrder.begin());
+ return;
}
definePhysReg(MI, BestReg, regFree);
- // definePhysReg may kill virtual registers and modify LiveVirtRegs.
- // That invalidates LRI, so run a new lookup for VirtReg.
- return assignVirtToPhysReg(VirtReg, BestReg);
+ assignVirtToPhysReg(LR, BestReg);
}
/// Allocates a register for VirtReg and mark it as dirty.
-RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI,
- unsigned OpNum,
- unsigned VirtReg,
- unsigned Hint) {
+MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
- if (New) {
+ if (!LRI->PhysReg) {
// If there is no hint, peek at the only use of this register.
if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
MRI->hasOneNonDBGUse(VirtReg)) {
@@ -660,7 +640,7 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI,
if (UseMI.isCopyLike())
Hint = UseMI.getOperand(0).getReg();
}
- LRI = allocVirtReg(MI, LRI, Hint);
+ allocVirtReg(MI, *LRI, Hint);
} else if (LRI->LastUse) {
// Redefining a live register - kill at the last use, unless it is this
// instruction defining VirtReg multiple times.
@@ -672,22 +652,22 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI,
LRI->LastOpNum = OpNum;
LRI->Dirty = true;
markRegUsedInInstr(LRI->PhysReg);
- return LRI;
+ return LRI->PhysReg;
}
/// Make sure VirtReg is available in a physreg and return it.
-RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI,
- unsigned OpNum,
- unsigned VirtReg,
- unsigned Hint) {
+RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI,
+ unsigned OpNum,
+ unsigned VirtReg,
+ unsigned Hint) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
MachineOperand &MO = MI.getOperand(OpNum);
- if (New) {
- LRI = allocVirtReg(MI, LRI, Hint);
+ if (!LRI->PhysReg) {
+ allocVirtReg(MI, *LRI, Hint);
reload(MI, VirtReg, LRI->PhysReg);
} else if (LRI->Dirty) {
if (isLastUseOfLocalReg(MO)) {
@@ -718,7 +698,7 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI,
LRI->LastUse = &MI;
LRI->LastOpNum = OpNum;
markRegUsedInInstr(LRI->PhysReg);
- return LRI;
+ return *LRI;
}
/// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This
@@ -798,8 +778,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO
<< ") is tied to operand " << MI.findTiedOperandIdx(I)
<< ".\n");
- LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0);
- MCPhysReg PhysReg = LRI->PhysReg;
+ LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
+ MCPhysReg PhysReg = LR.PhysReg;
setPhysReg(MI, I, PhysReg);
// Note: we don't update the def operand yet. That would cause the normal
// def-scan to attempt spilling.
@@ -807,8 +787,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << '\n');
// Reload the register, but don't assign to the operand just yet.
// That would confuse the later phys-def processing pass.
- LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0);
- PartialDefs.push_back(LRI->PhysReg);
+ LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
+ PartialDefs.push_back(LR.PhysReg);
}
}
@@ -821,8 +801,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
if (!MO.isEarlyClobber())
continue;
// Note: defineVirtReg may invalidate MO.
- LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, 0);
- MCPhysReg PhysReg = LRI->PhysReg;
+ MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, 0);
if (setPhysReg(MI, I, PhysReg))
VirtDead.push_back(Reg);
}
@@ -856,11 +835,12 @@ void RegAllocFast::dumpState() {
break;
default: {
dbgs() << '=' << printReg(PhysRegState[Reg]);
- LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]);
- assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
- if (I->Dirty)
+ LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Missing VirtReg entry");
+ if (LRI->Dirty)
dbgs() << "*";
- assert(I->PhysReg == Reg && "Bad inverse map");
+ assert(LRI->PhysReg == Reg && "Bad inverse map");
break;
}
}
@@ -869,6 +849,8 @@ void RegAllocFast::dumpState() {
// Check that LiveVirtRegs is the inverse.
for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
e = LiveVirtRegs.end(); i != e; ++i) {
+ if (!i->PhysReg)
+ continue;
assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
"Bad map key");
assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
@@ -916,7 +898,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
// See if this virtual register has already been allocated to a physical
// register or spilled to a stack slot.
LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
- if (LRI != LiveVirtRegs.end())
+ if (LRI != LiveVirtRegs.end() && LRI->PhysReg)
setPhysReg(*DebugMI, 0, LRI->PhysReg);
else {
int SS = StackSlotForVirtReg[Reg];
@@ -1026,11 +1008,11 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (MO.isUse()) {
- LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, CopyDstReg);
- MCPhysReg PhysReg = LRI->PhysReg;
+ LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg);
+ MCPhysReg PhysReg = LR.PhysReg;
CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
if (setPhysReg(MI, I, PhysReg))
- killVirtReg(LRI);
+ killVirtReg(LR);
}
}
@@ -1074,8 +1056,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
continue;
}
- LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, CopySrcReg);
- MCPhysReg PhysReg = LRI->PhysReg;
+ MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
if (setPhysReg(MI, I, PhysReg)) {
VirtDead.push_back(Reg);
CopyDstReg = 0; // cancel coalescing;
diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
index d19072a6c4e..41ecdd403d7 100644
--- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
@@ -21,18 +21,17 @@
; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]]
+; Spill load
+; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
+
; Spill saved exec
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]]
-
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:20 ; 4-byte Folded Spill
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:8 ; 4-byte Folded Spill
-
-; Spill load
-; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:24 ; 4-byte Folded Spill
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
@@ -57,11 +56,11 @@
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:20 ; 4-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:8 ; 4-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:24 ; 4-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]]
@@ -103,7 +102,7 @@ endif:
; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
; Spill load
-; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill
+; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
; Spill saved exec
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
@@ -111,9 +110,9 @@ endif:
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:20 ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:24 ; 4-byte Folded Spill
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:24 ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:28 ; 4-byte Folded Spill
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
@@ -122,7 +121,7 @@ endif:
; GCN: [[LOOP:BB[0-9]+_[0-9]+]]:
-; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]]
; GCN: v_cmp_ne_u32_e32 vcc,
; GCN: s_and_b64 vcc, exec, vcc
@@ -134,11 +133,11 @@ endif:
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:20 ; 4-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:24 ; 4-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:24 ; 4-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:28 ; 4-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]]
@@ -182,7 +181,7 @@ end:
; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}
; Spill load
-; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill
+; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
; Spill saved exec
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
@@ -237,13 +236,13 @@ end:
; GCN: BB{{[0-9]+}}_2: ; %if
; GCN: ds_read_b32
-; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]]
; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill
; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN: [[ELSE]]: ; %else
-; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
; GCN: v_subrev_i32_e32 [[SUB:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]]
; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[FLOW_RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill
; GCN-NEXT: s_branch [[FLOW]]
diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
index d31d636cc41..a38bacd97a6 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
@@ -82,95 +82,95 @@
; GCN-NEXT: v_writelane_b32 v0, s[[TMP_HI]], 47
; GCN: def s{{\[}}[[TMP_LO]]:[[TMP_HI]]{{\]}}
-; GCN: v_writelane_b32 v0, s[[TMP_LO]], 48
-; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 49
-; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 50
-; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 51
-; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 52
-; GCN-NEXT: v_writelane_b32 v0, s9, 53
-; GCN-NEXT: v_writelane_b32 v0, s10, 54
-; GCN-NEXT: v_writelane_b32 v0, s[[TMP_HI]], 55
-
-; GCN-NEXT: v_writelane_b32 v0, s84, 56
-; GCN-NEXT: v_writelane_b32 v0, s85, 57
-; GCN-NEXT: v_writelane_b32 v0, s86, 58
-; GCN-NEXT: v_writelane_b32 v0, s87, 59
-; GCN-NEXT: v_writelane_b32 v0, s88, 60
-; GCN-NEXT: v_writelane_b32 v0, s89, 61
-; GCN-NEXT: v_writelane_b32 v0, s90, 62
-; GCN-NEXT: v_writelane_b32 v0, s91, 63
-; GCN-NEXT: v_writelane_b32 v1, s12, 0
-; GCN-NEXT: v_writelane_b32 v1, s13, 1
-; GCN-NEXT: v_writelane_b32 v1, s14, 2
-; GCN-NEXT: v_writelane_b32 v1, s15, 3
-; GCN-NEXT: v_writelane_b32 v1, s16, 4
-; GCN-NEXT: v_writelane_b32 v1, s17, 5
-; GCN-NEXT: v_writelane_b32 v1, s18, 6
-; GCN-NEXT: v_writelane_b32 v1, s19, 7
-; GCN-NEXT: v_writelane_b32 v1, s20, 8
-; GCN-NEXT: v_writelane_b32 v1, s21, 9
-; GCN-NEXT: v_writelane_b32 v1, s22, 10
-; GCN-NEXT: v_writelane_b32 v1, s23, 11
-; GCN-NEXT: v_writelane_b32 v1, s24, 12
-; GCN-NEXT: v_writelane_b32 v1, s25, 13
-; GCN-NEXT: v_writelane_b32 v1, s26, 14
-; GCN-NEXT: v_writelane_b32 v1, s27, 15
-; GCN-NEXT: v_writelane_b32 v1, s28, 16
-; GCN-NEXT: v_writelane_b32 v1, s29, 17
-; GCN-NEXT: v_writelane_b32 v1, s30, 18
-; GCN-NEXT: v_writelane_b32 v1, s31, 19
-; GCN-NEXT: v_writelane_b32 v1, s32, 20
-; GCN-NEXT: v_writelane_b32 v1, s33, 21
-; GCN-NEXT: v_writelane_b32 v1, s34, 22
-; GCN-NEXT: v_writelane_b32 v1, s35, 23
-; GCN-NEXT: v_writelane_b32 v1, s36, 24
-; GCN-NEXT: v_writelane_b32 v1, s37, 25
-; GCN-NEXT: v_writelane_b32 v1, s38, 26
-; GCN-NEXT: v_writelane_b32 v1, s39, 27
-; GCN-NEXT: v_writelane_b32 v1, s40, 28
-; GCN-NEXT: v_writelane_b32 v1, s41, 29
-; GCN-NEXT: v_writelane_b32 v1, s42, 30
-; GCN-NEXT: v_writelane_b32 v1, s43, 31
-; GCN-NEXT: v_writelane_b32 v1, s44, 32
-; GCN-NEXT: v_writelane_b32 v1, s45, 33
-; GCN-NEXT: v_writelane_b32 v1, s46, 34
-; GCN-NEXT: v_writelane_b32 v1, s47, 35
-; GCN-NEXT: v_writelane_b32 v1, s48, 36
-; GCN-NEXT: v_writelane_b32 v1, s49, 37
-; GCN-NEXT: v_writelane_b32 v1, s50, 38
-; GCN-NEXT: v_writelane_b32 v1, s51, 39
-; GCN-NEXT: v_writelane_b32 v1, s52, 40
-; GCN-NEXT: v_writelane_b32 v1, s53, 41
-; GCN-NEXT: v_writelane_b32 v1, s54, 42
-; GCN-NEXT: v_writelane_b32 v1, s55, 43
-; GCN-NEXT: v_writelane_b32 v1, s56, 44
-; GCN-NEXT: v_writelane_b32 v1, s57, 45
-; GCN-NEXT: v_writelane_b32 v1, s58, 46
-; GCN-NEXT: v_writelane_b32 v1, s59, 47
-; GCN-NEXT: v_writelane_b32 v1, s60, 48
-; GCN-NEXT: v_writelane_b32 v1, s61, 49
-; GCN-NEXT: v_writelane_b32 v1, s62, 50
-; GCN-NEXT: v_writelane_b32 v1, s63, 51
-; GCN-NEXT: v_writelane_b32 v1, s64, 52
-; GCN-NEXT: v_writelane_b32 v1, s65, 53
-; GCN-NEXT: v_writelane_b32 v1, s66, 54
-; GCN-NEXT: v_writelane_b32 v1, s67, 55
-; GCN-NEXT: v_writelane_b32 v1, s68, 56
-; GCN-NEXT: v_writelane_b32 v1, s69, 57
-; GCN-NEXT: v_writelane_b32 v1, s70, 58
-; GCN-NEXT: v_writelane_b32 v1, s71, 59
-; GCN-NEXT: v_writelane_b32 v1, s72, 60
-; GCN-NEXT: v_writelane_b32 v1, s73, 61
-; GCN-NEXT: v_writelane_b32 v1, s74, 62
-; GCN-NEXT: v_writelane_b32 v1, s75, 63
-; GCN-NEXT: v_writelane_b32 v2, s76, 0
-; GCN-NEXT: v_writelane_b32 v2, s77, 1
-; GCN-NEXT: v_writelane_b32 v2, s78, 2
-; GCN-NEXT: v_writelane_b32 v2, s79, 3
-; GCN-NEXT: v_writelane_b32 v2, s80, 4
-; GCN-NEXT: v_writelane_b32 v2, s81, 5
-; GCN-NEXT: v_writelane_b32 v2, s82, 6
-; GCN-NEXT: v_writelane_b32 v2, s83, 7
+; GCN: v_writelane_b32 v0, s12, 48
+; GCN-NEXT: v_writelane_b32 v0, s13, 49
+; GCN-NEXT: v_writelane_b32 v0, s14, 50
+; GCN-NEXT: v_writelane_b32 v0, s15, 51
+; GCN-NEXT: v_writelane_b32 v0, s16, 52
+; GCN-NEXT: v_writelane_b32 v0, s17, 53
+; GCN-NEXT: v_writelane_b32 v0, s18, 54
+; GCN-NEXT: v_writelane_b32 v0, s19, 55
+
+; GCN-NEXT: v_writelane_b32 v0, s20, 56
+; GCN-NEXT: v_writelane_b32 v0, s21, 57
+; GCN-NEXT: v_writelane_b32 v0, s22, 58
+; GCN-NEXT: v_writelane_b32 v0, s23, 59
+; GCN-NEXT: v_writelane_b32 v0, s24, 60
+; GCN-NEXT: v_writelane_b32 v0, s25, 61
+; GCN-NEXT: v_writelane_b32 v0, s26, 62
+; GCN-NEXT: v_writelane_b32 v0, s27, 63
+; GCN-NEXT: v_writelane_b32 v1, s28, 0
+; GCN-NEXT: v_writelane_b32 v1, s29, 1
+; GCN-NEXT: v_writelane_b32 v1, s30, 2
+; GCN-NEXT: v_writelane_b32 v1, s31, 3
+; GCN-NEXT: v_writelane_b32 v1, s32, 4
+; GCN-NEXT: v_writelane_b32 v1, s33, 5
+; GCN-NEXT: v_writelane_b32 v1, s34, 6
+; GCN-NEXT: v_writelane_b32 v1, s35, 7
+; GCN-NEXT: v_writelane_b32 v1, s36, 8
+; GCN-NEXT: v_writelane_b32 v1, s37, 9
+; GCN-NEXT: v_writelane_b32 v1, s38, 10
+; GCN-NEXT: v_writelane_b32 v1, s39, 11
+; GCN-NEXT: v_writelane_b32 v1, s40, 12
+; GCN-NEXT: v_writelane_b32 v1, s41, 13
+; GCN-NEXT: v_writelane_b32 v1, s42, 14
+; GCN-NEXT: v_writelane_b32 v1, s43, 15
+; GCN-NEXT: v_writelane_b32 v1, s44, 16
+; GCN-NEXT: v_writelane_b32 v1, s45, 17
+; GCN-NEXT: v_writelane_b32 v1, s46, 18
+; GCN-NEXT: v_writelane_b32 v1, s47, 19
+; GCN-NEXT: v_writelane_b32 v1, s48, 20
+; GCN-NEXT: v_writelane_b32 v1, s49, 21
+; GCN-NEXT: v_writelane_b32 v1, s50, 22
+; GCN-NEXT: v_writelane_b32 v1, s51, 23
+; GCN-NEXT: v_writelane_b32 v1, s52, 24
+; GCN-NEXT: v_writelane_b32 v1, s53, 25
+; GCN-NEXT: v_writelane_b32 v1, s54, 26
+; GCN-NEXT: v_writelane_b32 v1, s55, 27
+; GCN-NEXT: v_writelane_b32 v1, s56, 28
+; GCN-NEXT: v_writelane_b32 v1, s57, 29
+; GCN-NEXT: v_writelane_b32 v1, s58, 30
+; GCN-NEXT: v_writelane_b32 v1, s59, 31
+; GCN-NEXT: v_writelane_b32 v1, s60, 32
+; GCN-NEXT: v_writelane_b32 v1, s61, 33
+; GCN-NEXT: v_writelane_b32 v1, s62, 34
+; GCN-NEXT: v_writelane_b32 v1, s63, 35
+; GCN-NEXT: v_writelane_b32 v1, s64, 36
+; GCN-NEXT: v_writelane_b32 v1, s65, 37
+; GCN-NEXT: v_writelane_b32 v1, s66, 38
+; GCN-NEXT: v_writelane_b32 v1, s67, 39
+; GCN-NEXT: v_writelane_b32 v1, s68, 40
+; GCN-NEXT: v_writelane_b32 v1, s69, 41
+; GCN-NEXT: v_writelane_b32 v1, s70, 42
+; GCN-NEXT: v_writelane_b32 v1, s71, 43
+; GCN-NEXT: v_writelane_b32 v1, s72, 44
+; GCN-NEXT: v_writelane_b32 v1, s73, 45
+; GCN-NEXT: v_writelane_b32 v1, s74, 46
+; GCN-NEXT: v_writelane_b32 v1, s75, 47
+; GCN-NEXT: v_writelane_b32 v1, s76, 48
+; GCN-NEXT: v_writelane_b32 v1, s77, 49
+; GCN-NEXT: v_writelane_b32 v1, s78, 50
+; GCN-NEXT: v_writelane_b32 v1, s79, 51
+; GCN-NEXT: v_writelane_b32 v1, s80, 52
+; GCN-NEXT: v_writelane_b32 v1, s81, 53
+; GCN-NEXT: v_writelane_b32 v1, s82, 54
+; GCN-NEXT: v_writelane_b32 v1, s83, 55
+; GCN-NEXT: v_writelane_b32 v1, s84, 56
+; GCN-NEXT: v_writelane_b32 v1, s85, 57
+; GCN-NEXT: v_writelane_b32 v1, s86, 58
+; GCN-NEXT: v_writelane_b32 v1, s87, 59
+; GCN-NEXT: v_writelane_b32 v1, s88, 60
+; GCN-NEXT: v_writelane_b32 v1, s89, 61
+; GCN-NEXT: v_writelane_b32 v1, s90, 62
+; GCN-NEXT: v_writelane_b32 v1, s91, 63
+; GCN-NEXT: v_writelane_b32 v2, s4, 0
+; GCN-NEXT: v_writelane_b32 v2, s5, 1
+; GCN-NEXT: v_writelane_b32 v2, s6, 2
+; GCN-NEXT: v_writelane_b32 v2, s7, 3
+; GCN-NEXT: v_writelane_b32 v2, s8, 4
+; GCN-NEXT: v_writelane_b32 v2, s9, 5
+; GCN-NEXT: v_writelane_b32 v2, s10, 6
+; GCN-NEXT: v_writelane_b32 v2, s11, 7
; GCN: s_cbranch_scc1
@@ -184,6 +184,25 @@
; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v0, 7
; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
+; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v0, 48
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 49
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 50
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 51
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 52
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 53
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 54
+; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v0, 55
+; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
+
+; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v0, 56
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 57
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 58
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 59
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 60
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 61
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 62
+; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v0, 63
+; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 0
; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 1
@@ -265,26 +284,6 @@
; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 63
; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
-; GCN: v_readlane_b32 s{{[0-9]+}}, v2, 0
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 1
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 2
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 3
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 4
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 5
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 6
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 7
-; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
-
-; GCN: v_readlane_b32 s{{[0-9]+}}, v0, 56
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 57
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 58
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 59
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 60
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 61
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 62
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 63
-; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
-
; GCN: v_readlane_b32 s{{[0-9]+}}, v0, 8
; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 9
; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 10
@@ -335,14 +334,14 @@
; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 47
; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
-; GCN: v_readlane_b32 s{{[0-9]+}}, v0, 48
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 49
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 50
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 51
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 52
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 53
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 54
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 55
+; GCN: v_readlane_b32 s{{[0-9]+}}, v2, 0
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 1
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 2
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 3
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 4
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 5
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 6
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 7
; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, i32 %in) #0 {
%wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
@@ -396,39 +395,39 @@ ret:
; GCN: def s[4:19]
; GCN: def s[20:35]
-; GCN: v_writelane_b32 v0, s4, 50
-; GCN-NEXT: v_writelane_b32 v0, s5, 51
-; GCN-NEXT: v_writelane_b32 v0, s6, 52
-; GCN-NEXT: v_writelane_b32 v0, s7, 53
-; GCN-NEXT: v_writelane_b32 v0, s8, 54
-; GCN-NEXT: v_writelane_b32 v0, s9, 55
-; GCN-NEXT: v_writelane_b32 v0, s10, 56
-; GCN-NEXT: v_writelane_b32 v0, s11, 57
-; GCN-NEXT: v_writelane_b32 v0, s12, 58
-; GCN-NEXT: v_writelane_b32 v0, s13, 59
-; GCN-NEXT: v_writelane_b32 v0, s14, 60
-; GCN-NEXT: v_writelane_b32 v0, s15, 61
-; GCN-NEXT: v_writelane_b32 v0, s16, 62
-; GCN-NEXT: v_writelane_b32 v0, s17, 63
-; GCN-NEXT: v_writelane_b32 v1, s18, 0
-; GCN-NEXT: v_writelane_b32 v1, s19, 1
-
-; GCN: v_readlane_b32 s4, v0, 50
-; GCN-NEXT: v_readlane_b32 s5, v0, 51
-; GCN-NEXT: v_readlane_b32 s6, v0, 52
-; GCN-NEXT: v_readlane_b32 s7, v0, 53
-; GCN-NEXT: v_readlane_b32 s8, v0, 54
-; GCN-NEXT: v_readlane_b32 s9, v0, 55
-; GCN-NEXT: v_readlane_b32 s10, v0, 56
-; GCN-NEXT: v_readlane_b32 s11, v0, 57
-; GCN-NEXT: v_readlane_b32 s12, v0, 58
-; GCN-NEXT: v_readlane_b32 s13, v0, 59
-; GCN-NEXT: v_readlane_b32 s14, v0, 60
-; GCN-NEXT: v_readlane_b32 s15, v0, 61
-; GCN-NEXT: v_readlane_b32 s16, v0, 62
-; GCN-NEXT: v_readlane_b32 s17, v0, 63
-; GCN-NEXT: v_readlane_b32 s18, v1, 0
-; GCN-NEXT: v_readlane_b32 s19, v1, 1
+; GCN: v_writelane_b32 v0, s4, 48
+; GCN-NEXT: v_writelane_b32 v0, s5, 49
+; GCN-NEXT: v_writelane_b32 v0, s6, 50
+; GCN-NEXT: v_writelane_b32 v0, s7, 51
+; GCN-NEXT: v_writelane_b32 v0, s8, 52
+; GCN-NEXT: v_writelane_b32 v0, s9, 53
+; GCN-NEXT: v_writelane_b32 v0, s10, 54
+; GCN-NEXT: v_writelane_b32 v0, s11, 55
+; GCN-NEXT: v_writelane_b32 v0, s12, 56
+; GCN-NEXT: v_writelane_b32 v0, s13, 57
+; GCN-NEXT: v_writelane_b32 v0, s14, 58
+; GCN-NEXT: v_writelane_b32 v0, s15, 59
+; GCN-NEXT: v_writelane_b32 v0, s16, 60
+; GCN-NEXT: v_writelane_b32 v0, s17, 61
+; GCN-NEXT: v_writelane_b32 v0, s18, 62
+; GCN-NEXT: v_writelane_b32 v0, s19, 63
+
+; GCN: v_readlane_b32 s4, v0, 48
+; GCN-NEXT: v_readlane_b32 s5, v0, 49
+; GCN-NEXT: v_readlane_b32 s6, v0, 50
+; GCN-NEXT: v_readlane_b32 s7, v0, 51
+; GCN-NEXT: v_readlane_b32 s8, v0, 52
+; GCN-NEXT: v_readlane_b32 s9, v0, 53
+; GCN-NEXT: v_readlane_b32 s10, v0, 54
+; GCN-NEXT: v_readlane_b32 s11, v0, 55
+; GCN-NEXT: v_readlane_b32 s12, v0, 56
+; GCN-NEXT: v_readlane_b32 s13, v0, 57
+; GCN-NEXT: v_readlane_b32 s14, v0, 58
+; GCN-NEXT: v_readlane_b32 s15, v0, 59
+; GCN-NEXT: v_readlane_b32 s16, v0, 60
+; GCN-NEXT: v_readlane_b32 s17, v0, 61
+; GCN-NEXT: v_readlane_b32 s18, v0, 62
+; GCN-NEXT: v_readlane_b32 s19, v0, 63
define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 %in) #1 {
%wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
%wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
@@ -493,8 +492,8 @@ ret:
; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 31
; GCN: def s[0:1]
-; GCN: v_writelane_b32 v23, s0, 32
-; GCN-NEXT: v_writelane_b32 v23, s1, 33
+; GCN: v_writelane_b32 v23, s20, 32
+; GCN-NEXT: v_writelane_b32 v23, s21, 33
; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 34
; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 35
@@ -515,20 +514,6 @@ ret:
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
; GCN: s_cbranch_scc1
@@ -551,7 +536,9 @@ ret:
; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
-; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v23, 34
+; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v23, 32
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 33
+; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 34
; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 35
; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 36
; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 37
@@ -564,9 +551,7 @@ ret:
; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 44
; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 45
; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 46
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 47
-; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 48
-; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v23, 49
+; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v23, 47
; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v23, 16
@@ -589,23 +574,8 @@ ret:
; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-; GCN: v_readlane_b32 s0, v23, 32
-; GCN: v_readlane_b32 s1, v23, 33
+; GCN: v_readfirstlane_b32 s1, v0
; GCN: ;;#ASMSTART
; GCN: ; use s[0:1]
define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
index ab54f9096cf..509b7a2dd68 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
@@ -13,29 +13,29 @@
; GCN-DAG: s_cmp_lg_u32
; TOVGPR-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
-; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0
+; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 2
; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
; TOVMEM-DAG: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]]
-; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4 ; 4-byte Folded Spill
+; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:12 ; 4-byte Folded Spill
; TOSMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
-; TOSMEM: s_add_u32 m0, s3, 0x100{{$}}
+; TOSMEM: s_add_u32 m0, s3, 0x300{{$}}
; TOSMEM-NOT: [[M0_COPY]]
; TOSMEM: s_buffer_store_dword [[M0_COPY]], s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Spill
; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN: [[ENDIF]]:
-; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], 0
+; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], 2
; TOVGPR: s_mov_b32 m0, [[M0_RESTORE]]
-; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4 ; 4-byte Folded Reload
+; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:12 ; 4-byte Folded Reload
; TOVMEM: s_waitcnt vmcnt(0)
; TOVMEM: v_readfirstlane_b32 [[M0_RESTORE:s[0-9]+]], [[RELOAD_VREG]]
; TOVMEM: s_mov_b32 m0, [[M0_RESTORE]]
-; TOSMEM: s_add_u32 m0, s3, 0x100{{$}}
+; TOSMEM: s_add_u32 m0, s3, 0x300{{$}}
; TOSMEM: s_buffer_load_dword [[M0_RESTORE:s[0-9]+]], s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Reload
; TOSMEM-NOT: [[M0_RESTORE]]
; TOSMEM: s_mov_b32 m0, [[M0_RESTORE]]
@@ -80,7 +80,7 @@ endif:
; TOSMEM: s_branch
; TOSMEM: BB{{[0-9]+_[0-9]+}}:
-; TOSMEM: s_add_u32 m0, s7, 0x400
+; TOSMEM: s_add_u32 m0, s7, 0x500
; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload
@@ -162,17 +162,17 @@ endif:
; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it
; FIXME-TOSMEM-NOT: m0
; TOSMEM: s_add_u32 m0, s3, 0x100
-; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_add_u32 m0, s3, 0x300
; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill
; FIXME-TOSMEM-NOT: m0
+; TOSMEM: s_add_u32 m0, s3, 0x200
+; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill
+; FIXME-TOSMEM-NOT: m0
; TOSMEM: s_cbranch_scc1
; TOSMEM: s_mov_b32 m0, -1
; TOSMEM: s_mov_b32 s0, m0
-; TOSMEM: s_add_u32 m0, s3, 0x100
+; TOSMEM: s_add_u32 m0, s3, 0x200
; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload
; TOSMEM: s_mov_b32 m0, s0
; TOSMEM: s_waitcnt lgkmcnt(0)
@@ -180,7 +180,7 @@ endif:
; TOSMEM: ds_write_b64
; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_add_u32 m0, s3, 0x300
+; TOSMEM: s_add_u32 m0, s3, 0x100
; TOSMEM: s_buffer_load_dword s0, s[88:91], m0 ; 4-byte Folded Reload
; FIXME-TOSMEM-NOT: m0
; TOSMEM: s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/Mips/atomic.ll b/llvm/test/CodeGen/Mips/atomic.ll
index b58338aa6fd..3d516ea2638 100644
--- a/llvm/test/CodeGen/Mips/atomic.ll
+++ b/llvm/test/CodeGen/Mips/atomic.ll
@@ -2038,10 +2038,10 @@ define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind {
; MIPS32R6O0-NEXT: beqzc $7, $BB7_1
; MIPS32R6O0-NEXT: $BB7_3: # %entry
; MIPS32R6O0-NEXT: move $2, $6
+; MIPS32R6O0-NEXT: sw $25, 12($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $1, 8($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: sw $6, 16($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $3, 8($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $3, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: addiu $sp, $sp, 24
; MIPS32R6O0-NEXT: jrc $ra
;
@@ -4550,11 +4550,11 @@ define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwi
; MIPS32R6O0-NEXT: srlv $8, $10, $2
; MIPS32R6O0-NEXT: seb $8, $8
; MIPS32R6O0-NEXT: # %bb.4: # %entry
-; MIPS32R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $25, 12($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $1, 8($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $8, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.5: # %entry
-; MIPS32R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: addiu $sp, $sp, 16
; MIPS32R6O0-NEXT: jrc $ra
;
@@ -5127,14 +5127,14 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n
; MIPS32R6O0-NEXT: srlv $11, $13, $4
; MIPS32R6O0-NEXT: seb $11, $11
; MIPS32R6O0-NEXT: # %bb.4: # %entry
-; MIPS32R6O0-NEXT: sw $11, 20($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $5, 16($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $3, 12($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $1, 8($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $5, 20($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $1, 16($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $3, 8($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $11, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.5: # %entry
-; MIPS32R6O0-NEXT: lw $1, 20($sp) # 4-byte Folded Reload
-; MIPS32R6O0-NEXT: lw $2, 16($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: xor $1, $1, $2
; MIPS32R6O0-NEXT: sltiu $2, $1, 1
; MIPS32R6O0-NEXT: addiu $sp, $sp, 24
@@ -5282,7 +5282,7 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n
;
; MIPS64R6O0-LABEL: AtomicCmpSwapRes8:
; MIPS64R6O0: # %bb.0: # %entry
-; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
+; MIPS64R6O0-NEXT: daddiu $sp, $sp, -32
; MIPS64R6O0-NEXT: move $1, $6
; MIPS64R6O0-NEXT: move $2, $5
; MIPS64R6O0-NEXT: move $5, $4
@@ -5313,15 +5313,15 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n
; MIPS64R6O0-NEXT: srlv $10, $12, $3
; MIPS64R6O0-NEXT: seb $10, $10
; MIPS64R6O0-NEXT: # %bb.4: # %entry
-; MIPS64R6O0-NEXT: sd $5, 8($sp) # 8-byte Folded Spill
-; MIPS64R6O0-NEXT: sw $10, 4($sp) # 4-byte Folded Spill
-; MIPS64R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $2, 28($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sd $5, 16($sp) # 8-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $10, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5: # %entry
-; MIPS64R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
-; MIPS64R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
+; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
+; MIPS64R6O0-NEXT: lw $2, 28($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: xor $1, $1, $2
; MIPS64R6O0-NEXT: sltiu $2, $1, 1
-; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
+; MIPS64R6O0-NEXT: daddiu $sp, $sp, 32
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicCmpSwapRes8:
@@ -6233,20 +6233,20 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) {
; MIPS32R6O0-NEXT: srlv $12, $14, $4
; MIPS32R6O0-NEXT: seh $12, $12
; MIPS32R6O0-NEXT: # %bb.4:
-; MIPS32R6O0-NEXT: sw $12, 20($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $3, 16($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $8, 12($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $5, 8($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $1, 20($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $3, 12($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $12, 0($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.5:
-; MIPS32R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seh $2, $1
-; MIPS32R6O0-NEXT: lw $3, 20($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: xor $2, $3, $2
; MIPS32R6O0-NEXT: sltiu $3, $2, 1
; MIPS32R6O0-NEXT: sync
-; MIPS32R6O0-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: addiu $sp, $sp, 24
; MIPS32R6O0-NEXT: jrc $ra
;
@@ -6449,17 +6449,17 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) {
; MIPS64R6O0-NEXT: srlv $11, $13, $3
; MIPS64R6O0-NEXT: seh $11, $11
; MIPS64R6O0-NEXT: # %bb.4:
-; MIPS64R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
-; MIPS64R6O0-NEXT: sw $11, 8($sp) # 4-byte Folded Spill
-; MIPS64R6O0-NEXT: sd $5, 0($sp) # 8-byte Folded Spill
+; MIPS64R6O0-NEXT: sd $5, 8($sp) # 8-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $11, 0($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5:
-; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
+; MIPS64R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seh $2, $1
-; MIPS64R6O0-NEXT: lw $3, 8($sp) # 4-byte Folded Reload
+; MIPS64R6O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: xor $2, $3, $2
; MIPS64R6O0-NEXT: sltiu $3, $2, 1
; MIPS64R6O0-NEXT: sync
-; MIPS64R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
+; MIPS64R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
@@ -7016,8 +7016,8 @@ define i32 @zeroreg() nounwind {
; MIPS32O0-NEXT: xor $2, $5, $2
; MIPS32O0-NEXT: sltiu $2, $2, 1
; MIPS32O0-NEXT: andi $2, $2, 1
-; MIPS32O0-NEXT: sw $3, 8($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: sw $5, 12($sp) # 4-byte Folded Spill
+; MIPS32O0-NEXT: sw $3, 8($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: addiu $sp, $sp, 16
; MIPS32O0-NEXT: jr $ra
@@ -7099,8 +7099,8 @@ define i32 @zeroreg() nounwind {
; MIPS32R6O0-NEXT: xor $1, $5, $1
; MIPS32R6O0-NEXT: sltiu $2, $1, 1
; MIPS32R6O0-NEXT: sync
-; MIPS32R6O0-NEXT: sw $3, 0($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $3, 0($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
@@ -7234,8 +7234,8 @@ define i32 @zeroreg() nounwind {
; MIPS64R6O0-NEXT: xor $2, $6, $3
; MIPS64R6O0-NEXT: sltiu $2, $2, 1
; MIPS64R6O0-NEXT: sync
-; MIPS64R6O0-NEXT: sw $4, 8($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $4, 8($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
diff --git a/llvm/test/CodeGen/Mips/atomic64.ll b/llvm/test/CodeGen/Mips/atomic64.ll
index aa8442d488b..8e5002b38b0 100644
--- a/llvm/test/CodeGen/Mips/atomic64.ll
+++ b/llvm/test/CodeGen/Mips/atomic64.ll
@@ -1289,8 +1289,8 @@ define i64 @AtomicCmpSwap64(i64 signext %oldval, i64 signext %newval) nounwind {
; MIPS64R6O0-NEXT: .LBB7_3: # %entry
; MIPS64R6O0-NEXT: sd $2, 24($sp) # 8-byte Folded Spill
; MIPS64R6O0-NEXT: move $2, $6
-; MIPS64R6O0-NEXT: sd $6, 32($sp) # 8-byte Folded Spill
; MIPS64R6O0-NEXT: sd $25, 16($sp) # 8-byte Folded Spill
+; MIPS64R6O0-NEXT: sd $6, 32($sp) # 8-byte Folded Spill
; MIPS64R6O0-NEXT: sd $3, 8($sp) # 8-byte Folded Spill
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 48
; MIPS64R6O0-NEXT: jrc $ra
diff --git a/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll b/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll
index 10610e34e71..973f3a5bf0b 100644
--- a/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll
+++ b/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll
@@ -32,10 +32,10 @@ define void @foo(i32 %new, i32 %old) {
; O32-NEXT: nop
; O32-NEXT: $BB0_3: # %entry
; O32-NEXT: sync
+; O32-NEXT: sw $1, 8($sp) # 4-byte Folded Spill
+; O32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
; O32-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
-; O32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill
-; O32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
-; O32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
+; O32-NEXT: sw $6, 0($sp) # 4-byte Folded Spill
; O32-NEXT: addiu $sp, $sp, 16
; O32-NEXT: jr $ra
; O32-NEXT: nop
diff --git a/llvm/test/CodeGen/X86/atomic32.ll b/llvm/test/CodeGen/X86/atomic32.ll
index 5e78444eea7..519b169c0f9 100644
--- a/llvm/test/CodeGen/X86/atomic32.ll
+++ b/llvm/test/CodeGen/X86/atomic32.ll
@@ -71,8 +71,8 @@ define void @atomic_fetch_and32() nounwind {
; X64-NEXT: sete %dl
; X64-NEXT: testb $1, %dl
; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB2_2
; X64-NEXT: jmp .LBB2_1
; X64-NEXT: .LBB2_2: # %atomicrmw.end
@@ -95,8 +95,8 @@ define void @atomic_fetch_and32() nounwind {
; X86-NEXT: sete %dl
; X86-NEXT: testb $1, %dl
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: jne .LBB2_2
; X86-NEXT: jmp .LBB2_1
; X86-NEXT: .LBB2_2: # %atomicrmw.end
@@ -125,8 +125,8 @@ define void @atomic_fetch_or32() nounwind {
; X64-NEXT: sete %dl
; X64-NEXT: testb $1, %dl
; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB3_2
; X64-NEXT: jmp .LBB3_1
; X64-NEXT: .LBB3_2: # %atomicrmw.end
@@ -149,8 +149,8 @@ define void @atomic_fetch_or32() nounwind {
; X86-NEXT: sete %dl
; X86-NEXT: testb $1, %dl
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: jne .LBB3_2
; X86-NEXT: jmp .LBB3_1
; X86-NEXT: .LBB3_2: # %atomicrmw.end
@@ -179,8 +179,8 @@ define void @atomic_fetch_xor32() nounwind {
; X64-NEXT: sete %dl
; X64-NEXT: testb $1, %dl
; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB4_2
; X64-NEXT: jmp .LBB4_1
; X64-NEXT: .LBB4_2: # %atomicrmw.end
@@ -203,8 +203,8 @@ define void @atomic_fetch_xor32() nounwind {
; X86-NEXT: sete %dl
; X86-NEXT: testb $1, %dl
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: jne .LBB4_2
; X86-NEXT: jmp .LBB4_1
; X86-NEXT: .LBB4_2: # %atomicrmw.end
@@ -285,8 +285,8 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
; X64-NEXT: sete %sil
; X64-NEXT: testb $1, %sil
-; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB6_2
; X64-NEXT: jmp .LBB6_1
; X64-NEXT: .LBB6_2: # %atomicrmw.end
@@ -310,8 +310,8 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
; X86-CMOV-NEXT: sete %bl
; X86-CMOV-NEXT: testb $1, %bl
-; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB6_2
; X86-CMOV-NEXT: jmp .LBB6_1
; X86-CMOV-NEXT: .LBB6_2: # %atomicrmw.end
@@ -381,8 +381,8 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
; X64-NEXT: sete %sil
; X64-NEXT: testb $1, %sil
-; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB7_2
; X64-NEXT: jmp .LBB7_1
; X64-NEXT: .LBB7_2: # %atomicrmw.end
@@ -406,8 +406,8 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
; X86-CMOV-NEXT: sete %bl
; X86-CMOV-NEXT: testb $1, %bl
-; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB7_2
; X86-CMOV-NEXT: jmp .LBB7_1
; X86-CMOV-NEXT: .LBB7_2: # %atomicrmw.end
@@ -477,8 +477,8 @@ define void @atomic_fetch_umax32(i32 %x) nounwind {
; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
; X64-NEXT: sete %sil
; X64-NEXT: testb $1, %sil
-; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB8_2
; X64-NEXT: jmp .LBB8_1
; X64-NEXT: .LBB8_2: # %atomicrmw.end
@@ -502,8 +502,8 @@ define void @atomic_fetch_umax32(i32 %x) nounwind {
; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
; X86-CMOV-NEXT: sete %bl
; X86-CMOV-NEXT: testb $1, %bl
-; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB8_2
; X86-CMOV-NEXT: jmp .LBB8_1
; X86-CMOV-NEXT: .LBB8_2: # %atomicrmw.end
@@ -573,8 +573,8 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
; X64-NEXT: sete %sil
; X64-NEXT: testb $1, %sil
-; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB9_2
; X64-NEXT: jmp .LBB9_1
; X64-NEXT: .LBB9_2: # %atomicrmw.end
@@ -598,8 +598,8 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
; X86-CMOV-NEXT: sete %bl
; X86-CMOV-NEXT: testb $1, %bl
-; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB9_2
; X86-CMOV-NEXT: jmp .LBB9_1
; X86-CMOV-NEXT: .LBB9_2: # %atomicrmw.end
diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll
index d55bbac5dc1..ea42aa34d8c 100644
--- a/llvm/test/CodeGen/X86/avx-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx-load-store.ll
@@ -13,15 +13,15 @@ define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>*
; CHECK-NEXT: movq %rsi, %r15
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: vmovaps (%rdi), %ymm0
-; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
+; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps (%rsi), %ymm1
-; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill
+; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps (%rdx), %ymm2
; CHECK-NEXT: vmovups %ymm2, (%rsp) # 32-byte Spill
; CHECK-NEXT: callq dummy
-; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; CHECK-NEXT: vmovaps %ymm0, (%rbx)
-; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; CHECK-NEXT: vmovaps %ymm0, (%r15)
; CHECK-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; CHECK-NEXT: vmovaps %ymm0, (%r14)
@@ -38,21 +38,21 @@ define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>*
; CHECK_O0-NEXT: vmovapd (%rdi), %ymm0
; CHECK_O0-NEXT: vmovaps (%rsi), %ymm1
; CHECK_O0-NEXT: vmovdqa (%rdx), %ymm2
-; CHECK_O0-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
-; CHECK_O0-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill
-; CHECK_O0-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp) # 32-byte Spill
-; CHECK_O0-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK_O0-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK_O0-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
+; CHECK_O0-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK_O0-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK_O0-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK_O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK_O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK_O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK_O0-NEXT: callq dummy
-; CHECK_O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx # 8-byte Reload
-; CHECK_O0-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; CHECK_O0-NEXT: vmovapd %ymm0, (%rdx)
-; CHECK_O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
-; CHECK_O0-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 # 32-byte Reload
+; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
; CHECK_O0-NEXT: vmovaps %ymm1, (%rsi)
-; CHECK_O0-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; CHECK_O0-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm2 # 32-byte Reload
+; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
; CHECK_O0-NEXT: vmovdqa %ymm2, (%rdi)
; CHECK_O0-NEXT: addq $152, %rsp
; CHECK_O0-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
index 4d939bd5b8c..fed87ebf6eb 100755
--- a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
@@ -17,50 +17,50 @@ declare i32 @check_mask16(i16 zeroext %res_mask, i16 zeroext %exp_mask, i8* %fna
define void @test_xmm(i32 %shift, i32 %mulp, <2 x i64> %a,i8* %arraydecay,i8* %fname){
; CHECK-LABEL: test_xmm:
; CHECK: ## %bb.0:
-; CHECK-NEXT: subq $56, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 80
; CHECK-NEXT: vpmovw2m %xmm0, %k0
; CHECK-NEXT: movl $2, %esi
; CHECK-NEXT: movl $8, %eax
; CHECK-NEXT: movq %rdx, %rdi
-; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: movl %eax, %edx
-; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) ## 16-byte Spill
+; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; CHECK-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; CHECK-NEXT: callq _calc_expected_mask_val
; CHECK-NEXT: movl %eax, %edx
; CHECK-NEXT: movw %dx, %r8w
; CHECK-NEXT: movzwl %r8w, %esi
-; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
+; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload
; CHECK-NEXT: kmovb %k0, %edi
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
; CHECK-NEXT: callq _check_mask16
-; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 ## 16-byte Reload
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
; CHECK-NEXT: vpmovd2m %xmm0, %k0
; CHECK-NEXT: kmovq %k0, %k1
; CHECK-NEXT: kmovd %k0, %esi
; CHECK-NEXT: movb %sil, %r9b
; CHECK-NEXT: movzbl %r9b, %esi
; CHECK-NEXT: movw %si, %r8w
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi ## 8-byte Reload
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
; CHECK-NEXT: movl $4, %esi
-; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp) ## 4-byte Spill
-; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edx ## 4-byte Reload
-; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
-; CHECK-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; CHECK-NEXT: movw %r8w, {{[0-9]+}}(%rsp) ## 2-byte Spill
+; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx ## 4-byte Reload
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; CHECK-NEXT: movw %r8w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; CHECK-NEXT: callq _calc_expected_mask_val
; CHECK-NEXT: movw %ax, %r8w
-; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r10w ## 2-byte Reload
+; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %r10w ## 2-byte Reload
; CHECK-NEXT: movzwl %r10w, %edi
; CHECK-NEXT: movzwl %r8w, %esi
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
; CHECK-NEXT: callq _check_mask16
-; CHECK-NEXT: movl %eax, (%rsp) ## 4-byte Spill
-; CHECK-NEXT: addq $56, %rsp
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: addq $72, %rsp
; CHECK-NEXT: retq
%d2 = bitcast <2 x i64> %a to <8 x i16>
%m2 = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %d2)
diff --git a/llvm/test/CodeGen/X86/pr30430.ll b/llvm/test/CodeGen/X86/pr30430.ll
index 94deca3a292..a81e26c51a1 100644
--- a/llvm/test/CodeGen/X86/pr30430.ll
+++ b/llvm/test/CodeGen/X86/pr30430.ll
@@ -116,14 +116,14 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm24, %zmm24
; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0
-; CHECK-NEXT: vmovss %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: vmovss %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: vmovss %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: vmovss %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: vmovss %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: vmovss %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: vmovss %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: vmovss %xmm14, (%rsp) # 4-byte Spill
+; CHECK-NEXT: vmovss %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: vmovss %xmm15, (%rsp) # 4-byte Spill
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
diff --git a/llvm/test/CodeGen/X86/pr32284.ll b/llvm/test/CodeGen/X86/pr32284.ll
index ab6680cf45a..3998fcec9c7 100644
--- a/llvm/test/CodeGen/X86/pr32284.ll
+++ b/llvm/test/CodeGen/X86/pr32284.ll
@@ -222,8 +222,8 @@ define void @f1() {
; 686-O0-NEXT: movl %ebp, _ZN8struct_210member_2_0E
; 686-O0-NEXT: movl $0, _ZN8struct_210member_2_0E+4
; 686-O0-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; 686-O0-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; 686-O0-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; 686-O0-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; 686-O0-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; 686-O0-NEXT: movl %edi, (%esp) # 4-byte Spill
; 686-O0-NEXT: addl $24, %esp
diff --git a/llvm/test/CodeGen/X86/pr32345.ll b/llvm/test/CodeGen/X86/pr32345.ll
index 3a2db27727a..65fcf055f28 100644
--- a/llvm/test/CodeGen/X86/pr32345.ll
+++ b/llvm/test/CodeGen/X86/pr32345.ll
@@ -77,8 +77,8 @@ define void @foo() {
; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; 6860-NEXT: shrdl %cl, %edi, %esi
; 6860-NEXT: testb $32, %bl
-; 6860-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; 6860-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; 6860-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; 6860-NEXT: jne .LBB0_2
; 6860-NEXT: # %bb.1: # %bb
; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
diff --git a/llvm/test/CodeGen/X86/pr34592.ll b/llvm/test/CodeGen/X86/pr34592.ll
index 34e80fb23c4..b010429d973 100644
--- a/llvm/test/CodeGen/X86/pr34592.ll
+++ b/llvm/test/CodeGen/X86/pr34592.ll
@@ -53,12 +53,12 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1
; CHECK-NEXT: vmovaps %ymm5, %ymm1
; CHECK-NEXT: vmovaps %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm9, %ymm3
-; CHECK-NEXT: vmovaps %ymm15, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm13, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; CHECK-NEXT: vmovaps %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; CHECK-NEXT: vmovaps %ymm14, (%rsp) # 32-byte Spill
+; CHECK-NEXT: vmovaps %ymm14, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vmovaps %ymm15, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vmovaps %ymm4, (%rsp) # 32-byte Spill
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
diff --git a/llvm/test/CodeGen/X86/pr34653.ll b/llvm/test/CodeGen/X86/pr34653.ll
index 54d2e714635..3578806596f 100644
--- a/llvm/test/CodeGen/X86/pr34653.ll
+++ b/llvm/test/CodeGen/X86/pr34653.ll
@@ -130,23 +130,12 @@ define void @pr34653() {
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd %xmm30, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm24, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm25, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm28, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm27, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm31, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm26, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd %xmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd %xmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd %xmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -155,12 +144,23 @@ define void @pr34653() {
; CHECK-NEXT: vmovsd %xmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd %xmm22, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd %xmm23, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd %xmm24, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd %xmm25, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd %xmm26, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd %xmm31, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd %xmm27, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd %xmm28, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd %xmm29, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: vmovsd %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd %xmm7, (%rsp) # 8-byte Spill
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
OpenPOWER on IntegriCloud