diff options
-rw-r--r-- | llvm/lib/CodeGen/RegAllocFast.cpp | 167 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll | 33 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll | 342 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/spill-m0.ll | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/Mips/atomic.ll | 78 | ||||
-rw-r--r-- | llvm/test/CodeGen/Mips/atomic64.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/atomic32.ll | 28 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx-load-store.ll | 32 | ||||
-rwxr-xr-x | llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll | 42 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr30430.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr32284.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr32345.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr34592.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr34653.ll | 30 |
15 files changed, 374 insertions, 424 deletions
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index e849bcec199..ea7f247214d 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -149,10 +149,6 @@ namespace { return false; } - /// This flag is set when LiveRegMap will be cleared completely after - /// spilling all live registers. LiveRegMap entries should not be erased. - bool isBulkSpilling = false; - enum : unsigned { spillClean = 50, spillDirty = 100, @@ -186,9 +182,9 @@ namespace { bool isLastUseOfLocalReg(const MachineOperand &MO) const; void addKillFlag(const LiveReg &LRI); - void killVirtReg(LiveRegMap::iterator LRI); + void killVirtReg(LiveReg &LR); void killVirtReg(unsigned VirtReg); - void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator); + void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR); void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg); void usePhysReg(MachineOperand &MO); @@ -205,13 +201,11 @@ namespace { return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); } - LiveRegMap::iterator assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg); - LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator, - unsigned Hint); - LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum, - unsigned VirtReg, unsigned Hint); - LiveRegMap::iterator reloadVirtReg(MachineInstr &MI, unsigned OpNum, - unsigned VirtReg, unsigned Hint); + void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint); + MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, + unsigned Hint); + LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, + unsigned Hint); void spillAll(MachineBasicBlock::iterator MI); bool setPhysReg(MachineInstr &MI, unsigned OpNum, MCPhysReg PhysReg); @@ -330,14 +324,12 @@ void RegAllocFast::addKillFlag(const LiveReg &LR) { } /// Mark virtreg as no longer available. -void RegAllocFast::killVirtReg(LiveRegMap::iterator LRI) { - addKillFlag(*LRI); - assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg && +void RegAllocFast::killVirtReg(LiveReg &LR) { + addKillFlag(LR); + assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping"); - setPhysRegState(LRI->PhysReg, regFree); - // Erase from LiveVirtRegs unless we're spilling in bulk. - if (!isBulkSpilling) - LiveVirtRegs.erase(LRI); + setPhysRegState(LR.PhysReg, regFree); + LR.PhysReg = 0; } /// Mark virtreg as no longer available. @@ -345,8 +337,8 @@ void RegAllocFast::killVirtReg(unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "killVirtReg needs a virtual register"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); - if (LRI != LiveVirtRegs.end()) - killVirtReg(LRI); + if (LRI != LiveVirtRegs.end() && LRI->PhysReg) + killVirtReg(*LRI); } /// This method spills the value specified by VirtReg into the corresponding @@ -356,15 +348,14 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); - assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register"); - spillVirtReg(MI, LRI); + assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && + "Spilling unmapped virtual register"); + spillVirtReg(MI, *LRI); } /// Do the actual work of spilling. -void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, - LiveRegMap::iterator LRI) { - LiveReg &LR = *LRI; - assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping"); +void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) { + assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping"); if (LR.Dirty) { // If this physreg is used by the instruction, we want to kill it on the @@ -372,25 +363,25 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI; LR.Dirty = false; - spill(MI, LRI->VirtReg, LR.PhysReg, SpillKill); + spill(MI, LR.VirtReg, LR.PhysReg, SpillKill); if (SpillKill) LR.LastUse = nullptr; // Don't kill register again } - killVirtReg(LRI); + killVirtReg(LR); } /// Spill all dirty virtregs without killing them. void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) { if (LiveVirtRegs.empty()) return; - isBulkSpilling = true; // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order // of spilling here is deterministic, if arbitrary. - for (LiveRegMap::iterator I = LiveVirtRegs.begin(), E = LiveVirtRegs.end(); - I != E; ++I) - spillVirtReg(MI, I); + for (LiveReg &LR : LiveVirtRegs) { + if (!LR.PhysReg) + continue; + spillVirtReg(MI, LR); + } LiveVirtRegs.clear(); - isBulkSpilling = false; } /// Handle the direct use of a physical register. Check that the register is @@ -519,9 +510,10 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { << printReg(PhysReg, TRI) << " is reserved already.\n"); return spillImpossible; default: { - LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); - assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); - return I->Dirty ? spillDirty : spillClean; + LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && + "Missing VirtReg entry"); + return LRI->Dirty ? spillDirty : spillClean; } } @@ -539,9 +531,10 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { case regReserved: return spillImpossible; default: { - LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); - assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); - Cost += I->Dirty ? spillDirty : spillClean; + LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && + "Missing VirtReg entry"); + Cost += LRI->Dirty ? spillDirty : spillClean; break; } } @@ -562,18 +555,9 @@ void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) { setPhysRegState(PhysReg, VirtReg); } -RegAllocFast::LiveRegMap::iterator -RegAllocFast::assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg) { - LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); - assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared"); - assignVirtToPhysReg(*LRI, PhysReg); - return LRI; -} - /// Allocates a physical register for VirtReg. -RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI, - LiveRegMap::iterator LRI, unsigned Hint) { - const unsigned VirtReg = LRI->VirtReg; +void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) { + const unsigned VirtReg = LR.VirtReg; assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Can only allocate virtual registers"); @@ -590,9 +574,8 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI, if (Cost < spillDirty) { if (Cost) definePhysReg(MI, Hint, regFree); - // definePhysReg may kill virtual registers and modify LiveVirtRegs. - // That invalidates LRI, so run a new lookup for VirtReg. - return assignVirtToPhysReg(VirtReg, Hint); + assignVirtToPhysReg(LR, Hint); + return; } } @@ -600,8 +583,8 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI, ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC); for (MCPhysReg PhysReg : AllocationOrder) { if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) { - assignVirtToPhysReg(*LRI, PhysReg); - return LRI; + assignVirtToPhysReg(LR, PhysReg); + return; } } @@ -616,8 +599,8 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI, LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n'); // Cost is 0 when all aliases are already disabled. if (Cost == 0) { - assignVirtToPhysReg(*LRI, PhysReg); - return LRI; + assignVirtToPhysReg(LR, PhysReg); + return; } if (Cost < BestCost) { BestReg = PhysReg; @@ -632,26 +615,23 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI, else MI.emitError("ran out of registers during register allocation"); definePhysReg(MI, *AllocationOrder.begin(), regFree); - return assignVirtToPhysReg(VirtReg, *AllocationOrder.begin()); + assignVirtToPhysReg(LR, *AllocationOrder.begin()); + return; } definePhysReg(MI, BestReg, regFree); - // definePhysReg may kill virtual registers and modify LiveVirtRegs. - // That invalidates LRI, so run a new lookup for VirtReg. - return assignVirtToPhysReg(VirtReg, BestReg); + assignVirtToPhysReg(LR, BestReg); } /// Allocates a register for VirtReg and mark it as dirty. -RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI, - unsigned OpNum, - unsigned VirtReg, - unsigned Hint) { +MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); - if (New) { + if (!LRI->PhysReg) { // If there is no hint, peek at the only use of this register. if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && MRI->hasOneNonDBGUse(VirtReg)) { @@ -660,7 +640,7 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI, if (UseMI.isCopyLike()) Hint = UseMI.getOperand(0).getReg(); } - LRI = allocVirtReg(MI, LRI, Hint); + allocVirtReg(MI, *LRI, Hint); } else if (LRI->LastUse) { // Redefining a live register - kill at the last use, unless it is this // instruction defining VirtReg multiple times. @@ -672,22 +652,22 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI, LRI->LastOpNum = OpNum; LRI->Dirty = true; markRegUsedInInstr(LRI->PhysReg); - return LRI; + return LRI->PhysReg; } /// Make sure VirtReg is available in a physreg and return it. -RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI, - unsigned OpNum, - unsigned VirtReg, - unsigned Hint) { +RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI, + unsigned OpNum, + unsigned VirtReg, + unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); MachineOperand &MO = MI.getOperand(OpNum); - if (New) { - LRI = allocVirtReg(MI, LRI, Hint); + if (!LRI->PhysReg) { + allocVirtReg(MI, *LRI, Hint); reload(MI, VirtReg, LRI->PhysReg); } else if (LRI->Dirty) { if (isLastUseOfLocalReg(MO)) { @@ -718,7 +698,7 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI, LRI->LastUse = &MI; LRI->LastOpNum = OpNum; markRegUsedInInstr(LRI->PhysReg); - return LRI; + return *LRI; } /// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This @@ -798,8 +778,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO << ") is tied to operand " << MI.findTiedOperandIdx(I) << ".\n"); - LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0); - MCPhysReg PhysReg = LRI->PhysReg; + LiveReg &LR = reloadVirtReg(MI, I, Reg, 0); + MCPhysReg PhysReg = LR.PhysReg; setPhysReg(MI, I, PhysReg); // Note: we don't update the def operand yet. That would cause the normal // def-scan to attempt spilling. @@ -807,8 +787,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << '\n'); // Reload the register, but don't assign to the operand just yet. // That would confuse the later phys-def processing pass. - LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0); - PartialDefs.push_back(LRI->PhysReg); + LiveReg &LR = reloadVirtReg(MI, I, Reg, 0); + PartialDefs.push_back(LR.PhysReg); } } @@ -821,8 +801,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, if (!MO.isEarlyClobber()) continue; // Note: defineVirtReg may invalidate MO. - LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, 0); - MCPhysReg PhysReg = LRI->PhysReg; + MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, 0); if (setPhysReg(MI, I, PhysReg)) VirtDead.push_back(Reg); } @@ -856,11 +835,12 @@ void RegAllocFast::dumpState() { break; default: { dbgs() << '=' << printReg(PhysRegState[Reg]); - LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]); - assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); - if (I->Dirty) + LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]); + assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && + "Missing VirtReg entry"); + if (LRI->Dirty) dbgs() << "*"; - assert(I->PhysReg == Reg && "Bad inverse map"); + assert(LRI->PhysReg == Reg && "Bad inverse map"); break; } } @@ -869,6 +849,8 @@ void RegAllocFast::dumpState() { // Check that LiveVirtRegs is the inverse. for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); i != e; ++i) { + if (!i->PhysReg) + continue; assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) && "Bad map key"); assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) && @@ -916,7 +898,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { // See if this virtual register has already been allocated to a physical // register or spilled to a stack slot. LiveRegMap::iterator LRI = findLiveVirtReg(Reg); - if (LRI != LiveVirtRegs.end()) + if (LRI != LiveVirtRegs.end() && LRI->PhysReg) setPhysReg(*DebugMI, 0, LRI->PhysReg); else { int SS = StackSlotForVirtReg[Reg]; @@ -1026,11 +1008,11 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (MO.isUse()) { - LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, CopyDstReg); - MCPhysReg PhysReg = LRI->PhysReg; + LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg); + MCPhysReg PhysReg = LR.PhysReg; CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0; if (setPhysReg(MI, I, PhysReg)) - killVirtReg(LRI); + killVirtReg(LR); } } @@ -1074,8 +1056,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); continue; } - LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, CopySrcReg); - MCPhysReg PhysReg = LRI->PhysReg; + MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg); if (setPhysReg(MI, I, PhysReg)) { VirtDead.push_back(Reg); CopyDstReg = 0; // cancel coalescing; diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll index d19072a6c4e..41ecdd403d7 100644 --- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -21,18 +21,17 @@ ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]] +; Spill load +; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill + ; Spill saved exec ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] - ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] -; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:20 ; 4-byte Folded Spill ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] -; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:8 ; 4-byte Folded Spill - -; Spill load -; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:24 ; 4-byte Folded Spill ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} @@ -57,11 +56,11 @@ -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:20 ; 4-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:8 ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:24 ; 4-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] @@ -103,7 +102,7 @@ endif: ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] ; Spill load -; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill +; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; Spill saved exec ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] @@ -111,9 +110,9 @@ endif: ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] -; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:20 ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:24 ; 4-byte Folded Spill ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] -; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:24 ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:28 ; 4-byte Folded Spill ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} @@ -122,7 +121,7 @@ endif: ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: -; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload +; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload ; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]] ; GCN: v_cmp_ne_u32_e32 vcc, ; GCN: s_and_b64 vcc, exec, vcc @@ -134,11 +133,11 @@ endif: ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:20 ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:24 ; 4-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:24 ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:28 ; 4-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] @@ -182,7 +181,7 @@ end: ; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}} ; Spill load -; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill +; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; Spill saved exec ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] @@ -237,13 +236,13 @@ end: ; GCN: BB{{[0-9]+}}_2: ; %if ; GCN: ds_read_b32 -; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload +; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] ; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill ; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: [[ELSE]]: ; %else -; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload +; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload ; GCN: v_subrev_i32_e32 [[SUB:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] ; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[FLOW_RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN-NEXT: s_branch [[FLOW]] diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll index d31d636cc41..a38bacd97a6 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -82,95 +82,95 @@ ; GCN-NEXT: v_writelane_b32 v0, s[[TMP_HI]], 47 ; GCN: def s{{\[}}[[TMP_LO]]:[[TMP_HI]]{{\]}} -; GCN: v_writelane_b32 v0, s[[TMP_LO]], 48 -; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 49 -; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 50 -; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 51 -; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 52 -; GCN-NEXT: v_writelane_b32 v0, s9, 53 -; GCN-NEXT: v_writelane_b32 v0, s10, 54 -; GCN-NEXT: v_writelane_b32 v0, s[[TMP_HI]], 55 - -; GCN-NEXT: v_writelane_b32 v0, s84, 56 -; GCN-NEXT: v_writelane_b32 v0, s85, 57 -; GCN-NEXT: v_writelane_b32 v0, s86, 58 -; GCN-NEXT: v_writelane_b32 v0, s87, 59 -; GCN-NEXT: v_writelane_b32 v0, s88, 60 -; GCN-NEXT: v_writelane_b32 v0, s89, 61 -; GCN-NEXT: v_writelane_b32 v0, s90, 62 -; GCN-NEXT: v_writelane_b32 v0, s91, 63 -; GCN-NEXT: v_writelane_b32 v1, s12, 0 -; GCN-NEXT: v_writelane_b32 v1, s13, 1 -; GCN-NEXT: v_writelane_b32 v1, s14, 2 -; GCN-NEXT: v_writelane_b32 v1, s15, 3 -; GCN-NEXT: v_writelane_b32 v1, s16, 4 -; GCN-NEXT: v_writelane_b32 v1, s17, 5 -; GCN-NEXT: v_writelane_b32 v1, s18, 6 -; GCN-NEXT: v_writelane_b32 v1, s19, 7 -; GCN-NEXT: v_writelane_b32 v1, s20, 8 -; GCN-NEXT: v_writelane_b32 v1, s21, 9 -; GCN-NEXT: v_writelane_b32 v1, s22, 10 -; GCN-NEXT: v_writelane_b32 v1, s23, 11 -; GCN-NEXT: v_writelane_b32 v1, s24, 12 -; GCN-NEXT: v_writelane_b32 v1, s25, 13 -; GCN-NEXT: v_writelane_b32 v1, s26, 14 -; GCN-NEXT: v_writelane_b32 v1, s27, 15 -; GCN-NEXT: v_writelane_b32 v1, s28, 16 -; GCN-NEXT: v_writelane_b32 v1, s29, 17 -; GCN-NEXT: v_writelane_b32 v1, s30, 18 -; GCN-NEXT: v_writelane_b32 v1, s31, 19 -; GCN-NEXT: v_writelane_b32 v1, s32, 20 -; GCN-NEXT: v_writelane_b32 v1, s33, 21 -; GCN-NEXT: v_writelane_b32 v1, s34, 22 -; GCN-NEXT: v_writelane_b32 v1, s35, 23 -; GCN-NEXT: v_writelane_b32 v1, s36, 24 -; GCN-NEXT: v_writelane_b32 v1, s37, 25 -; GCN-NEXT: v_writelane_b32 v1, s38, 26 -; GCN-NEXT: v_writelane_b32 v1, s39, 27 -; GCN-NEXT: v_writelane_b32 v1, s40, 28 -; GCN-NEXT: v_writelane_b32 v1, s41, 29 -; GCN-NEXT: v_writelane_b32 v1, s42, 30 -; GCN-NEXT: v_writelane_b32 v1, s43, 31 -; GCN-NEXT: v_writelane_b32 v1, s44, 32 -; GCN-NEXT: v_writelane_b32 v1, s45, 33 -; GCN-NEXT: v_writelane_b32 v1, s46, 34 -; GCN-NEXT: v_writelane_b32 v1, s47, 35 -; GCN-NEXT: v_writelane_b32 v1, s48, 36 -; GCN-NEXT: v_writelane_b32 v1, s49, 37 -; GCN-NEXT: v_writelane_b32 v1, s50, 38 -; GCN-NEXT: v_writelane_b32 v1, s51, 39 -; GCN-NEXT: v_writelane_b32 v1, s52, 40 -; GCN-NEXT: v_writelane_b32 v1, s53, 41 -; GCN-NEXT: v_writelane_b32 v1, s54, 42 -; GCN-NEXT: v_writelane_b32 v1, s55, 43 -; GCN-NEXT: v_writelane_b32 v1, s56, 44 -; GCN-NEXT: v_writelane_b32 v1, s57, 45 -; GCN-NEXT: v_writelane_b32 v1, s58, 46 -; GCN-NEXT: v_writelane_b32 v1, s59, 47 -; GCN-NEXT: v_writelane_b32 v1, s60, 48 -; GCN-NEXT: v_writelane_b32 v1, s61, 49 -; GCN-NEXT: v_writelane_b32 v1, s62, 50 -; GCN-NEXT: v_writelane_b32 v1, s63, 51 -; GCN-NEXT: v_writelane_b32 v1, s64, 52 -; GCN-NEXT: v_writelane_b32 v1, s65, 53 -; GCN-NEXT: v_writelane_b32 v1, s66, 54 -; GCN-NEXT: v_writelane_b32 v1, s67, 55 -; GCN-NEXT: v_writelane_b32 v1, s68, 56 -; GCN-NEXT: v_writelane_b32 v1, s69, 57 -; GCN-NEXT: v_writelane_b32 v1, s70, 58 -; GCN-NEXT: v_writelane_b32 v1, s71, 59 -; GCN-NEXT: v_writelane_b32 v1, s72, 60 -; GCN-NEXT: v_writelane_b32 v1, s73, 61 -; GCN-NEXT: v_writelane_b32 v1, s74, 62 -; GCN-NEXT: v_writelane_b32 v1, s75, 63 -; GCN-NEXT: v_writelane_b32 v2, s76, 0 -; GCN-NEXT: v_writelane_b32 v2, s77, 1 -; GCN-NEXT: v_writelane_b32 v2, s78, 2 -; GCN-NEXT: v_writelane_b32 v2, s79, 3 -; GCN-NEXT: v_writelane_b32 v2, s80, 4 -; GCN-NEXT: v_writelane_b32 v2, s81, 5 -; GCN-NEXT: v_writelane_b32 v2, s82, 6 -; GCN-NEXT: v_writelane_b32 v2, s83, 7 +; GCN: v_writelane_b32 v0, s12, 48 +; GCN-NEXT: v_writelane_b32 v0, s13, 49 +; GCN-NEXT: v_writelane_b32 v0, s14, 50 +; GCN-NEXT: v_writelane_b32 v0, s15, 51 +; GCN-NEXT: v_writelane_b32 v0, s16, 52 +; GCN-NEXT: v_writelane_b32 v0, s17, 53 +; GCN-NEXT: v_writelane_b32 v0, s18, 54 +; GCN-NEXT: v_writelane_b32 v0, s19, 55 + +; GCN-NEXT: v_writelane_b32 v0, s20, 56 +; GCN-NEXT: v_writelane_b32 v0, s21, 57 +; GCN-NEXT: v_writelane_b32 v0, s22, 58 +; GCN-NEXT: v_writelane_b32 v0, s23, 59 +; GCN-NEXT: v_writelane_b32 v0, s24, 60 +; GCN-NEXT: v_writelane_b32 v0, s25, 61 +; GCN-NEXT: v_writelane_b32 v0, s26, 62 +; GCN-NEXT: v_writelane_b32 v0, s27, 63 +; GCN-NEXT: v_writelane_b32 v1, s28, 0 +; GCN-NEXT: v_writelane_b32 v1, s29, 1 +; GCN-NEXT: v_writelane_b32 v1, s30, 2 +; GCN-NEXT: v_writelane_b32 v1, s31, 3 +; GCN-NEXT: v_writelane_b32 v1, s32, 4 +; GCN-NEXT: v_writelane_b32 v1, s33, 5 +; GCN-NEXT: v_writelane_b32 v1, s34, 6 +; GCN-NEXT: v_writelane_b32 v1, s35, 7 +; GCN-NEXT: v_writelane_b32 v1, s36, 8 +; GCN-NEXT: v_writelane_b32 v1, s37, 9 +; GCN-NEXT: v_writelane_b32 v1, s38, 10 +; GCN-NEXT: v_writelane_b32 v1, s39, 11 +; GCN-NEXT: v_writelane_b32 v1, s40, 12 +; GCN-NEXT: v_writelane_b32 v1, s41, 13 +; GCN-NEXT: v_writelane_b32 v1, s42, 14 +; GCN-NEXT: v_writelane_b32 v1, s43, 15 +; GCN-NEXT: v_writelane_b32 v1, s44, 16 +; GCN-NEXT: v_writelane_b32 v1, s45, 17 +; GCN-NEXT: v_writelane_b32 v1, s46, 18 +; GCN-NEXT: v_writelane_b32 v1, s47, 19 +; GCN-NEXT: v_writelane_b32 v1, s48, 20 +; GCN-NEXT: v_writelane_b32 v1, s49, 21 +; GCN-NEXT: v_writelane_b32 v1, s50, 22 +; GCN-NEXT: v_writelane_b32 v1, s51, 23 +; GCN-NEXT: v_writelane_b32 v1, s52, 24 +; GCN-NEXT: v_writelane_b32 v1, s53, 25 +; GCN-NEXT: v_writelane_b32 v1, s54, 26 +; GCN-NEXT: v_writelane_b32 v1, s55, 27 +; GCN-NEXT: v_writelane_b32 v1, s56, 28 +; GCN-NEXT: v_writelane_b32 v1, s57, 29 +; GCN-NEXT: v_writelane_b32 v1, s58, 30 +; GCN-NEXT: v_writelane_b32 v1, s59, 31 +; GCN-NEXT: v_writelane_b32 v1, s60, 32 +; GCN-NEXT: v_writelane_b32 v1, s61, 33 +; GCN-NEXT: v_writelane_b32 v1, s62, 34 +; GCN-NEXT: v_writelane_b32 v1, s63, 35 +; GCN-NEXT: v_writelane_b32 v1, s64, 36 +; GCN-NEXT: v_writelane_b32 v1, s65, 37 +; GCN-NEXT: v_writelane_b32 v1, s66, 38 +; GCN-NEXT: v_writelane_b32 v1, s67, 39 +; GCN-NEXT: v_writelane_b32 v1, s68, 40 +; GCN-NEXT: v_writelane_b32 v1, s69, 41 +; GCN-NEXT: v_writelane_b32 v1, s70, 42 +; GCN-NEXT: v_writelane_b32 v1, s71, 43 +; GCN-NEXT: v_writelane_b32 v1, s72, 44 +; GCN-NEXT: v_writelane_b32 v1, s73, 45 +; GCN-NEXT: v_writelane_b32 v1, s74, 46 +; GCN-NEXT: v_writelane_b32 v1, s75, 47 +; GCN-NEXT: v_writelane_b32 v1, s76, 48 +; GCN-NEXT: v_writelane_b32 v1, s77, 49 +; GCN-NEXT: v_writelane_b32 v1, s78, 50 +; GCN-NEXT: v_writelane_b32 v1, s79, 51 +; GCN-NEXT: v_writelane_b32 v1, s80, 52 +; GCN-NEXT: v_writelane_b32 v1, s81, 53 +; GCN-NEXT: v_writelane_b32 v1, s82, 54 +; GCN-NEXT: v_writelane_b32 v1, s83, 55 +; GCN-NEXT: v_writelane_b32 v1, s84, 56 +; GCN-NEXT: v_writelane_b32 v1, s85, 57 +; GCN-NEXT: v_writelane_b32 v1, s86, 58 +; GCN-NEXT: v_writelane_b32 v1, s87, 59 +; GCN-NEXT: v_writelane_b32 v1, s88, 60 +; GCN-NEXT: v_writelane_b32 v1, s89, 61 +; GCN-NEXT: v_writelane_b32 v1, s90, 62 +; GCN-NEXT: v_writelane_b32 v1, s91, 63 +; GCN-NEXT: v_writelane_b32 v2, s4, 0 +; GCN-NEXT: v_writelane_b32 v2, s5, 1 +; GCN-NEXT: v_writelane_b32 v2, s6, 2 +; GCN-NEXT: v_writelane_b32 v2, s7, 3 +; GCN-NEXT: v_writelane_b32 v2, s8, 4 +; GCN-NEXT: v_writelane_b32 v2, s9, 5 +; GCN-NEXT: v_writelane_b32 v2, s10, 6 +; GCN-NEXT: v_writelane_b32 v2, s11, 7 ; GCN: s_cbranch_scc1 @@ -184,6 +184,25 @@ ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v0, 7 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} +; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v0, 48 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 49 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 50 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 51 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 52 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 53 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 54 +; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v0, 55 +; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} + +; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v0, 56 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 57 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 58 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 59 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 60 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 61 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 62 +; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v0, 63 +; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} ; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 0 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 1 @@ -265,26 +284,6 @@ ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 63 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} -; GCN: v_readlane_b32 s{{[0-9]+}}, v2, 0 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 1 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 2 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 3 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 4 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 5 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 6 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 7 -; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} - -; GCN: v_readlane_b32 s{{[0-9]+}}, v0, 56 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 57 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 58 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 59 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 60 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 61 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 62 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 63 -; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} - ; GCN: v_readlane_b32 s{{[0-9]+}}, v0, 8 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 9 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 10 @@ -335,14 +334,14 @@ ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 47 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} -; GCN: v_readlane_b32 s{{[0-9]+}}, v0, 48 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 49 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 50 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 51 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 52 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 53 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 54 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 55 +; GCN: v_readlane_b32 s{{[0-9]+}}, v2, 0 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 1 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 2 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 3 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 4 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 5 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 6 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 7 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, i32 %in) #0 { %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 @@ -396,39 +395,39 @@ ret: ; GCN: def s[4:19] ; GCN: def s[20:35] -; GCN: v_writelane_b32 v0, s4, 50 -; GCN-NEXT: v_writelane_b32 v0, s5, 51 -; GCN-NEXT: v_writelane_b32 v0, s6, 52 -; GCN-NEXT: v_writelane_b32 v0, s7, 53 -; GCN-NEXT: v_writelane_b32 v0, s8, 54 -; GCN-NEXT: v_writelane_b32 v0, s9, 55 -; GCN-NEXT: v_writelane_b32 v0, s10, 56 -; GCN-NEXT: v_writelane_b32 v0, s11, 57 -; GCN-NEXT: v_writelane_b32 v0, s12, 58 -; GCN-NEXT: v_writelane_b32 v0, s13, 59 -; GCN-NEXT: v_writelane_b32 v0, s14, 60 -; GCN-NEXT: v_writelane_b32 v0, s15, 61 -; GCN-NEXT: v_writelane_b32 v0, s16, 62 -; GCN-NEXT: v_writelane_b32 v0, s17, 63 -; GCN-NEXT: v_writelane_b32 v1, s18, 0 -; GCN-NEXT: v_writelane_b32 v1, s19, 1 - -; GCN: v_readlane_b32 s4, v0, 50 -; GCN-NEXT: v_readlane_b32 s5, v0, 51 -; GCN-NEXT: v_readlane_b32 s6, v0, 52 -; GCN-NEXT: v_readlane_b32 s7, v0, 53 -; GCN-NEXT: v_readlane_b32 s8, v0, 54 -; GCN-NEXT: v_readlane_b32 s9, v0, 55 -; GCN-NEXT: v_readlane_b32 s10, v0, 56 -; GCN-NEXT: v_readlane_b32 s11, v0, 57 -; GCN-NEXT: v_readlane_b32 s12, v0, 58 -; GCN-NEXT: v_readlane_b32 s13, v0, 59 -; GCN-NEXT: v_readlane_b32 s14, v0, 60 -; GCN-NEXT: v_readlane_b32 s15, v0, 61 -; GCN-NEXT: v_readlane_b32 s16, v0, 62 -; GCN-NEXT: v_readlane_b32 s17, v0, 63 -; GCN-NEXT: v_readlane_b32 s18, v1, 0 -; GCN-NEXT: v_readlane_b32 s19, v1, 1 +; GCN: v_writelane_b32 v0, s4, 48 +; GCN-NEXT: v_writelane_b32 v0, s5, 49 +; GCN-NEXT: v_writelane_b32 v0, s6, 50 +; GCN-NEXT: v_writelane_b32 v0, s7, 51 +; GCN-NEXT: v_writelane_b32 v0, s8, 52 +; GCN-NEXT: v_writelane_b32 v0, s9, 53 +; GCN-NEXT: v_writelane_b32 v0, s10, 54 +; GCN-NEXT: v_writelane_b32 v0, s11, 55 +; GCN-NEXT: v_writelane_b32 v0, s12, 56 +; GCN-NEXT: v_writelane_b32 v0, s13, 57 +; GCN-NEXT: v_writelane_b32 v0, s14, 58 +; GCN-NEXT: v_writelane_b32 v0, s15, 59 +; GCN-NEXT: v_writelane_b32 v0, s16, 60 +; GCN-NEXT: v_writelane_b32 v0, s17, 61 +; GCN-NEXT: v_writelane_b32 v0, s18, 62 +; GCN-NEXT: v_writelane_b32 v0, s19, 63 + +; GCN: v_readlane_b32 s4, v0, 48 +; GCN-NEXT: v_readlane_b32 s5, v0, 49 +; GCN-NEXT: v_readlane_b32 s6, v0, 50 +; GCN-NEXT: v_readlane_b32 s7, v0, 51 +; GCN-NEXT: v_readlane_b32 s8, v0, 52 +; GCN-NEXT: v_readlane_b32 s9, v0, 53 +; GCN-NEXT: v_readlane_b32 s10, v0, 54 +; GCN-NEXT: v_readlane_b32 s11, v0, 55 +; GCN-NEXT: v_readlane_b32 s12, v0, 56 +; GCN-NEXT: v_readlane_b32 s13, v0, 57 +; GCN-NEXT: v_readlane_b32 s14, v0, 58 +; GCN-NEXT: v_readlane_b32 s15, v0, 59 +; GCN-NEXT: v_readlane_b32 s16, v0, 60 +; GCN-NEXT: v_readlane_b32 s17, v0, 61 +; GCN-NEXT: v_readlane_b32 s18, v0, 62 +; GCN-NEXT: v_readlane_b32 s19, v0, 63 define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 %in) #1 { %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 @@ -493,8 +492,8 @@ ret: ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 31 ; GCN: def s[0:1] -; GCN: v_writelane_b32 v23, s0, 32 -; GCN-NEXT: v_writelane_b32 v23, s1, 33 +; GCN: v_writelane_b32 v23, s20, 32 +; GCN-NEXT: v_writelane_b32 v23, s21, 33 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 34 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 35 @@ -515,20 +514,6 @@ ret: ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; GCN: s_cbranch_scc1 @@ -551,7 +536,9 @@ ret: ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} -; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v23, 34 +; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v23, 32 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 33 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 34 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 35 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 36 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 37 @@ -564,9 +551,7 @@ ret: ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 44 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 45 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 46 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 47 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 48 -; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v23, 49 +; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v23, 47 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} ; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v23, 16 @@ -589,23 +574,8 @@ ret: ; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: v_readlane_b32 s0, v23, 32 -; GCN: v_readlane_b32 s1, v23, 33 +; GCN: v_readfirstlane_b32 s1, v0 ; GCN: ;;#ASMSTART ; GCN: ; use s[0:1] define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 { diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll index ab54f9096cf..509b7a2dd68 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll @@ -13,29 +13,29 @@ ; GCN-DAG: s_cmp_lg_u32 ; TOVGPR-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 -; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0 +; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 2 ; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 ; TOVMEM-DAG: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]] -; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4 ; 4-byte Folded Spill +; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:12 ; 4-byte Folded Spill ; TOSMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 -; TOSMEM: s_add_u32 m0, s3, 0x100{{$}} +; TOSMEM: s_add_u32 m0, s3, 0x300{{$}} ; TOSMEM-NOT: [[M0_COPY]] ; TOSMEM: s_buffer_store_dword [[M0_COPY]], s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Spill ; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: [[ENDIF]]: -; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], 0 +; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], 2 ; TOVGPR: s_mov_b32 m0, [[M0_RESTORE]] -; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4 ; 4-byte Folded Reload +; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:12 ; 4-byte Folded Reload ; TOVMEM: s_waitcnt vmcnt(0) ; TOVMEM: v_readfirstlane_b32 [[M0_RESTORE:s[0-9]+]], [[RELOAD_VREG]] ; TOVMEM: s_mov_b32 m0, [[M0_RESTORE]] -; TOSMEM: s_add_u32 m0, s3, 0x100{{$}} +; TOSMEM: s_add_u32 m0, s3, 0x300{{$}} ; TOSMEM: s_buffer_load_dword [[M0_RESTORE:s[0-9]+]], s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Reload ; TOSMEM-NOT: [[M0_RESTORE]] ; TOSMEM: s_mov_b32 m0, [[M0_RESTORE]] @@ -80,7 +80,7 @@ endif: ; TOSMEM: s_branch ; TOSMEM: BB{{[0-9]+_[0-9]+}}: -; TOSMEM: s_add_u32 m0, s7, 0x400 +; TOSMEM: s_add_u32 m0, s7, 0x500 ; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload @@ -162,17 +162,17 @@ endif: ; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it ; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_add_u32 m0, s3, 0x100 -; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill -; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_add_u32 m0, s3, 0x300 ; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill ; FIXME-TOSMEM-NOT: m0 +; TOSMEM: s_add_u32 m0, s3, 0x200 +; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill +; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_cbranch_scc1 ; TOSMEM: s_mov_b32 m0, -1 ; TOSMEM: s_mov_b32 s0, m0 -; TOSMEM: s_add_u32 m0, s3, 0x100 +; TOSMEM: s_add_u32 m0, s3, 0x200 ; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload ; TOSMEM: s_mov_b32 m0, s0 ; TOSMEM: s_waitcnt lgkmcnt(0) @@ -180,7 +180,7 @@ endif: ; TOSMEM: ds_write_b64 ; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_add_u32 m0, s3, 0x300 +; TOSMEM: s_add_u32 m0, s3, 0x100 ; TOSMEM: s_buffer_load_dword s0, s[88:91], m0 ; 4-byte Folded Reload ; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/Mips/atomic.ll b/llvm/test/CodeGen/Mips/atomic.ll index b58338aa6fd..3d516ea2638 100644 --- a/llvm/test/CodeGen/Mips/atomic.ll +++ b/llvm/test/CodeGen/Mips/atomic.ll @@ -2038,10 +2038,10 @@ define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind { ; MIPS32R6O0-NEXT: beqzc $7, $BB7_1 ; MIPS32R6O0-NEXT: $BB7_3: # %entry ; MIPS32R6O0-NEXT: move $2, $6 +; MIPS32R6O0-NEXT: sw $25, 12($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 8($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: sw $6, 16($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $3, 8($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $3, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: addiu $sp, $sp, 24 ; MIPS32R6O0-NEXT: jrc $ra ; @@ -4550,11 +4550,11 @@ define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwi ; MIPS32R6O0-NEXT: srlv $8, $10, $2 ; MIPS32R6O0-NEXT: seb $8, $8 ; MIPS32R6O0-NEXT: # %bb.4: # %entry -; MIPS32R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $25, 12($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 8($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $8, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.5: # %entry -; MIPS32R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: addiu $sp, $sp, 16 ; MIPS32R6O0-NEXT: jrc $ra ; @@ -5127,14 +5127,14 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n ; MIPS32R6O0-NEXT: srlv $11, $13, $4 ; MIPS32R6O0-NEXT: seb $11, $11 ; MIPS32R6O0-NEXT: # %bb.4: # %entry -; MIPS32R6O0-NEXT: sw $11, 20($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $5, 16($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $3, 12($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $1, 8($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $5, 20($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 16($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $3, 8($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $11, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.5: # %entry -; MIPS32R6O0-NEXT: lw $1, 20($sp) # 4-byte Folded Reload -; MIPS32R6O0-NEXT: lw $2, 16($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: xor $1, $1, $2 ; MIPS32R6O0-NEXT: sltiu $2, $1, 1 ; MIPS32R6O0-NEXT: addiu $sp, $sp, 24 @@ -5282,7 +5282,7 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n ; ; MIPS64R6O0-LABEL: AtomicCmpSwapRes8: ; MIPS64R6O0: # %bb.0: # %entry -; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6O0-NEXT: daddiu $sp, $sp, -32 ; MIPS64R6O0-NEXT: move $1, $6 ; MIPS64R6O0-NEXT: move $2, $5 ; MIPS64R6O0-NEXT: move $5, $4 @@ -5313,15 +5313,15 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n ; MIPS64R6O0-NEXT: srlv $10, $12, $3 ; MIPS64R6O0-NEXT: seb $10, $10 ; MIPS64R6O0-NEXT: # %bb.4: # %entry -; MIPS64R6O0-NEXT: sd $5, 8($sp) # 8-byte Folded Spill -; MIPS64R6O0-NEXT: sw $10, 4($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $2, 28($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sd $5, 16($sp) # 8-byte Folded Spill +; MIPS64R6O0-NEXT: sw $10, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: # %entry -; MIPS64R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS64R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $2, 28($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: xor $1, $1, $2 ; MIPS64R6O0-NEXT: sltiu $2, $1, 1 -; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 +; MIPS64R6O0-NEXT: daddiu $sp, $sp, 32 ; MIPS64R6O0-NEXT: jrc $ra ; ; MM32-LABEL: AtomicCmpSwapRes8: @@ -6233,20 +6233,20 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) { ; MIPS32R6O0-NEXT: srlv $12, $14, $4 ; MIPS32R6O0-NEXT: seh $12, $12 ; MIPS32R6O0-NEXT: # %bb.4: -; MIPS32R6O0-NEXT: sw $12, 20($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $3, 16($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $8, 12($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $5, 8($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 20($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $3, 12($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $12, 0($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.5: -; MIPS32R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seh $2, $1 -; MIPS32R6O0-NEXT: lw $3, 20($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: xor $2, $3, $2 ; MIPS32R6O0-NEXT: sltiu $3, $2, 1 ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: addiu $sp, $sp, 24 ; MIPS32R6O0-NEXT: jrc $ra ; @@ -6449,17 +6449,17 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) { ; MIPS64R6O0-NEXT: srlv $11, $13, $3 ; MIPS64R6O0-NEXT: seh $11, $11 ; MIPS64R6O0-NEXT: # %bb.4: -; MIPS64R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $11, 8($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sd $5, 0($sp) # 8-byte Folded Spill +; MIPS64R6O0-NEXT: sd $5, 8($sp) # 8-byte Folded Spill +; MIPS64R6O0-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $11, 0($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: -; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seh $2, $1 -; MIPS64R6O0-NEXT: lw $3, 8($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: xor $2, $3, $2 ; MIPS64R6O0-NEXT: sltiu $3, $2, 1 ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; @@ -7016,8 +7016,8 @@ define i32 @zeroreg() nounwind { ; MIPS32O0-NEXT: xor $2, $5, $2 ; MIPS32O0-NEXT: sltiu $2, $2, 1 ; MIPS32O0-NEXT: andi $2, $2, 1 -; MIPS32O0-NEXT: sw $3, 8($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: sw $5, 12($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $3, 8($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: addiu $sp, $sp, 16 ; MIPS32O0-NEXT: jr $ra @@ -7099,8 +7099,8 @@ define i32 @zeroreg() nounwind { ; MIPS32R6O0-NEXT: xor $1, $5, $1 ; MIPS32R6O0-NEXT: sltiu $2, $1, 1 ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: sw $3, 0($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $3, 0($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; @@ -7234,8 +7234,8 @@ define i32 @zeroreg() nounwind { ; MIPS64R6O0-NEXT: xor $2, $6, $3 ; MIPS64R6O0-NEXT: sltiu $2, $2, 1 ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: sw $4, 8($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $4, 8($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; diff --git a/llvm/test/CodeGen/Mips/atomic64.ll b/llvm/test/CodeGen/Mips/atomic64.ll index aa8442d488b..8e5002b38b0 100644 --- a/llvm/test/CodeGen/Mips/atomic64.ll +++ b/llvm/test/CodeGen/Mips/atomic64.ll @@ -1289,8 +1289,8 @@ define i64 @AtomicCmpSwap64(i64 signext %oldval, i64 signext %newval) nounwind { ; MIPS64R6O0-NEXT: .LBB7_3: # %entry ; MIPS64R6O0-NEXT: sd $2, 24($sp) # 8-byte Folded Spill ; MIPS64R6O0-NEXT: move $2, $6 -; MIPS64R6O0-NEXT: sd $6, 32($sp) # 8-byte Folded Spill ; MIPS64R6O0-NEXT: sd $25, 16($sp) # 8-byte Folded Spill +; MIPS64R6O0-NEXT: sd $6, 32($sp) # 8-byte Folded Spill ; MIPS64R6O0-NEXT: sd $3, 8($sp) # 8-byte Folded Spill ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 48 ; MIPS64R6O0-NEXT: jrc $ra diff --git a/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll b/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll index 10610e34e71..973f3a5bf0b 100644 --- a/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll +++ b/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll @@ -32,10 +32,10 @@ define void @foo(i32 %new, i32 %old) { ; O32-NEXT: nop ; O32-NEXT: $BB0_3: # %entry ; O32-NEXT: sync +; O32-NEXT: sw $1, 8($sp) # 4-byte Folded Spill +; O32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill ; O32-NEXT: sw $7, 12($sp) # 4-byte Folded Spill -; O32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill -; O32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill -; O32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; O32-NEXT: sw $6, 0($sp) # 4-byte Folded Spill ; O32-NEXT: addiu $sp, $sp, 16 ; O32-NEXT: jr $ra ; O32-NEXT: nop diff --git a/llvm/test/CodeGen/X86/atomic32.ll b/llvm/test/CodeGen/X86/atomic32.ll index 5e78444eea7..519b169c0f9 100644 --- a/llvm/test/CodeGen/X86/atomic32.ll +++ b/llvm/test/CodeGen/X86/atomic32.ll @@ -71,8 +71,8 @@ define void @atomic_fetch_and32() nounwind { ; X64-NEXT: sete %dl ; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB2_2 ; X64-NEXT: jmp .LBB2_1 ; X64-NEXT: .LBB2_2: # %atomicrmw.end @@ -95,8 +95,8 @@ define void @atomic_fetch_and32() nounwind { ; X86-NEXT: sete %dl ; X86-NEXT: testb $1, %dl ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: jne .LBB2_2 ; X86-NEXT: jmp .LBB2_1 ; X86-NEXT: .LBB2_2: # %atomicrmw.end @@ -125,8 +125,8 @@ define void @atomic_fetch_or32() nounwind { ; X64-NEXT: sete %dl ; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB3_2 ; X64-NEXT: jmp .LBB3_1 ; X64-NEXT: .LBB3_2: # %atomicrmw.end @@ -149,8 +149,8 @@ define void @atomic_fetch_or32() nounwind { ; X86-NEXT: sete %dl ; X86-NEXT: testb $1, %dl ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: jne .LBB3_2 ; X86-NEXT: jmp .LBB3_1 ; X86-NEXT: .LBB3_2: # %atomicrmw.end @@ -179,8 +179,8 @@ define void @atomic_fetch_xor32() nounwind { ; X64-NEXT: sete %dl ; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB4_2 ; X64-NEXT: jmp .LBB4_1 ; X64-NEXT: .LBB4_2: # %atomicrmw.end @@ -203,8 +203,8 @@ define void @atomic_fetch_xor32() nounwind { ; X86-NEXT: sete %dl ; X86-NEXT: testb $1, %dl ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: jne .LBB4_2 ; X86-NEXT: jmp .LBB4_1 ; X86-NEXT: .LBB4_2: # %atomicrmw.end @@ -285,8 +285,8 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) ; X64-NEXT: sete %sil ; X64-NEXT: testb $1, %sil -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB6_2 ; X64-NEXT: jmp .LBB6_1 ; X64-NEXT: .LBB6_2: # %atomicrmw.end @@ -310,8 +310,8 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 ; X86-CMOV-NEXT: sete %bl ; X86-CMOV-NEXT: testb $1, %bl -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB6_2 ; X86-CMOV-NEXT: jmp .LBB6_1 ; X86-CMOV-NEXT: .LBB6_2: # %atomicrmw.end @@ -381,8 +381,8 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) ; X64-NEXT: sete %sil ; X64-NEXT: testb $1, %sil -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB7_2 ; X64-NEXT: jmp .LBB7_1 ; X64-NEXT: .LBB7_2: # %atomicrmw.end @@ -406,8 +406,8 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 ; X86-CMOV-NEXT: sete %bl ; X86-CMOV-NEXT: testb $1, %bl -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB7_2 ; X86-CMOV-NEXT: jmp .LBB7_1 ; X86-CMOV-NEXT: .LBB7_2: # %atomicrmw.end @@ -477,8 +477,8 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) ; X64-NEXT: sete %sil ; X64-NEXT: testb $1, %sil -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB8_2 ; X64-NEXT: jmp .LBB8_1 ; X64-NEXT: .LBB8_2: # %atomicrmw.end @@ -502,8 +502,8 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 ; X86-CMOV-NEXT: sete %bl ; X86-CMOV-NEXT: testb $1, %bl -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB8_2 ; X86-CMOV-NEXT: jmp .LBB8_1 ; X86-CMOV-NEXT: .LBB8_2: # %atomicrmw.end @@ -573,8 +573,8 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) ; X64-NEXT: sete %sil ; X64-NEXT: testb $1, %sil -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB9_2 ; X64-NEXT: jmp .LBB9_1 ; X64-NEXT: .LBB9_2: # %atomicrmw.end @@ -598,8 +598,8 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 ; X86-CMOV-NEXT: sete %bl ; X86-CMOV-NEXT: testb $1, %bl -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB9_2 ; X86-CMOV-NEXT: jmp .LBB9_1 ; X86-CMOV-NEXT: .LBB9_2: # %atomicrmw.end diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll index d55bbac5dc1..ea42aa34d8c 100644 --- a/llvm/test/CodeGen/X86/avx-load-store.ll +++ b/llvm/test/CodeGen/X86/avx-load-store.ll @@ -13,15 +13,15 @@ define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>* ; CHECK-NEXT: movq %rsi, %r15 ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: vmovaps (%rdi), %ymm0 -; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill +; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps (%rsi), %ymm1 -; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill +; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps (%rdx), %ymm2 ; CHECK-NEXT: vmovups %ymm2, (%rsp) # 32-byte Spill ; CHECK-NEXT: callq dummy -; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-NEXT: vmovaps %ymm0, (%rbx) -; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-NEXT: vmovaps %ymm0, (%r15) ; CHECK-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; CHECK-NEXT: vmovaps %ymm0, (%r14) @@ -38,21 +38,21 @@ define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>* ; CHECK_O0-NEXT: vmovapd (%rdi), %ymm0 ; CHECK_O0-NEXT: vmovaps (%rsi), %ymm1 ; CHECK_O0-NEXT: vmovdqa (%rdx), %ymm2 -; CHECK_O0-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill -; CHECK_O0-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill -; CHECK_O0-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp) # 32-byte Spill -; CHECK_O0-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK_O0-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK_O0-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK_O0-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK_O0-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK_O0-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK_O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK_O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK_O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK_O0-NEXT: callq dummy -; CHECK_O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx # 8-byte Reload -; CHECK_O0-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK_O0-NEXT: vmovapd %ymm0, (%rdx) -; CHECK_O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; CHECK_O0-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 # 32-byte Reload +; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload ; CHECK_O0-NEXT: vmovaps %ymm1, (%rsi) -; CHECK_O0-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; CHECK_O0-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm2 # 32-byte Reload +; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload ; CHECK_O0-NEXT: vmovdqa %ymm2, (%rdi) ; CHECK_O0-NEXT: addq $152, %rsp ; CHECK_O0-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll index 4d939bd5b8c..fed87ebf6eb 100755 --- a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll @@ -17,50 +17,50 @@ declare i32 @check_mask16(i16 zeroext %res_mask, i16 zeroext %exp_mask, i8* %fna define void @test_xmm(i32 %shift, i32 %mulp, <2 x i64> %a,i8* %arraydecay,i8* %fname){ ; CHECK-LABEL: test_xmm: ; CHECK: ## %bb.0: -; CHECK-NEXT: subq $56, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: subq $72, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: vpmovw2m %xmm0, %k0 ; CHECK-NEXT: movl $2, %esi ; CHECK-NEXT: movl $8, %eax ; CHECK-NEXT: movq %rdx, %rdi -; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill -; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill -; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) ## 16-byte Spill +; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; CHECK-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; CHECK-NEXT: callq _calc_expected_mask_val ; CHECK-NEXT: movl %eax, %edx ; CHECK-NEXT: movw %dx, %r8w ; CHECK-NEXT: movzwl %r8w, %esi -; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload +; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload ; CHECK-NEXT: kmovb %k0, %edi -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload ; CHECK-NEXT: callq _check_mask16 -; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 ## 16-byte Reload +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; CHECK-NEXT: vpmovd2m %xmm0, %k0 ; CHECK-NEXT: kmovq %k0, %k1 ; CHECK-NEXT: kmovd %k0, %esi ; CHECK-NEXT: movb %sil, %r9b ; CHECK-NEXT: movzbl %r9b, %esi ; CHECK-NEXT: movw %si, %r8w -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload ; CHECK-NEXT: movl $4, %esi -; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp) ## 4-byte Spill -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edx ## 4-byte Reload -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill -; CHECK-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill -; CHECK-NEXT: movw %r8w, {{[0-9]+}}(%rsp) ## 2-byte Spill +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx ## 4-byte Reload +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: movw %r8w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; CHECK-NEXT: callq _calc_expected_mask_val ; CHECK-NEXT: movw %ax, %r8w -; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r10w ## 2-byte Reload +; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %r10w ## 2-byte Reload ; CHECK-NEXT: movzwl %r10w, %edi ; CHECK-NEXT: movzwl %r8w, %esi -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload ; CHECK-NEXT: callq _check_mask16 -; CHECK-NEXT: movl %eax, (%rsp) ## 4-byte Spill -; CHECK-NEXT: addq $56, %rsp +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: addq $72, %rsp ; CHECK-NEXT: retq %d2 = bitcast <2 x i64> %a to <8 x i16> %m2 = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %d2) diff --git a/llvm/test/CodeGen/X86/pr30430.ll b/llvm/test/CodeGen/X86/pr30430.ll index 94deca3a292..a81e26c51a1 100644 --- a/llvm/test/CodeGen/X86/pr30430.ll +++ b/llvm/test/CodeGen/X86/pr30430.ll @@ -116,14 +116,14 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm24, %zmm24 ; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0 -; CHECK-NEXT: vmovss %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: vmovss %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: vmovss %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: vmovss %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: vmovss %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: vmovss %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: vmovss %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: vmovss %xmm14, (%rsp) # 4-byte Spill +; CHECK-NEXT: vmovss %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: vmovss %xmm15, (%rsp) # 4-byte Spill ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 diff --git a/llvm/test/CodeGen/X86/pr32284.ll b/llvm/test/CodeGen/X86/pr32284.ll index ab6680cf45a..3998fcec9c7 100644 --- a/llvm/test/CodeGen/X86/pr32284.ll +++ b/llvm/test/CodeGen/X86/pr32284.ll @@ -222,8 +222,8 @@ define void @f1() { ; 686-O0-NEXT: movl %ebp, _ZN8struct_210member_2_0E ; 686-O0-NEXT: movl $0, _ZN8struct_210member_2_0E+4 ; 686-O0-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; 686-O0-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 686-O0-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; 686-O0-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 686-O0-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 686-O0-NEXT: movl %edi, (%esp) # 4-byte Spill ; 686-O0-NEXT: addl $24, %esp diff --git a/llvm/test/CodeGen/X86/pr32345.ll b/llvm/test/CodeGen/X86/pr32345.ll index 3a2db27727a..65fcf055f28 100644 --- a/llvm/test/CodeGen/X86/pr32345.ll +++ b/llvm/test/CodeGen/X86/pr32345.ll @@ -77,8 +77,8 @@ define void @foo() { ; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; 6860-NEXT: shrdl %cl, %edi, %esi ; 6860-NEXT: testb $32, %bl -; 6860-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; 6860-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: jne .LBB0_2 ; 6860-NEXT: # %bb.1: # %bb ; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload diff --git a/llvm/test/CodeGen/X86/pr34592.ll b/llvm/test/CodeGen/X86/pr34592.ll index 34e80fb23c4..b010429d973 100644 --- a/llvm/test/CodeGen/X86/pr34592.ll +++ b/llvm/test/CodeGen/X86/pr34592.ll @@ -53,12 +53,12 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1 ; CHECK-NEXT: vmovaps %ymm5, %ymm1 ; CHECK-NEXT: vmovaps %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps %ymm9, %ymm3 -; CHECK-NEXT: vmovaps %ymm15, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps %ymm13, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm14, (%rsp) # 32-byte Spill +; CHECK-NEXT: vmovaps %ymm14, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: vmovaps %ymm15, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: vmovaps %ymm4, (%rsp) # 32-byte Spill ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 diff --git a/llvm/test/CodeGen/X86/pr34653.ll b/llvm/test/CodeGen/X86/pr34653.ll index 54d2e714635..3578806596f 100644 --- a/llvm/test/CodeGen/X86/pr34653.ll +++ b/llvm/test/CodeGen/X86/pr34653.ll @@ -130,23 +130,12 @@ define void @pr34653() { ; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd %xmm30, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm24, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm25, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm28, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm27, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm31, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm26, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd %xmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd %xmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd %xmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -155,12 +144,23 @@ define void @pr34653() { ; CHECK-NEXT: vmovsd %xmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd %xmm22, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd %xmm23, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm24, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm25, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm26, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm31, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm27, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm28, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd %xmm29, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd %xmm7, (%rsp) # 8-byte Spill ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp |