diff options
| author | Tom Stellard <thomas.stellard@amd.com> | 2013-11-27 21:23:35 +0000 |
|---|---|---|
| committer | Tom Stellard <thomas.stellard@amd.com> | 2013-11-27 21:23:35 +0000 |
| commit | c149dc02d36d1d4ed210b460f947dc5a3651454c (patch) | |
| tree | 7df11ea1f7be99b990ca4ed14ca25c11e19b0a75 /llvm/lib/Target | |
| parent | 859199dad89cefbf2c42962afccd07e532f1cc7f (diff) | |
| download | bcm5719-llvm-c149dc02d36d1d4ed210b460f947dc5a3651454c.tar.gz bcm5719-llvm-c149dc02d36d1d4ed210b460f947dc5a3651454c.zip | |
R600/SI: Implement spilling of SGPRs v5
SGPRs are spilled into VGPRs using the {READ,WRITE}LANE_B32 instructions.
v2:
- Fix encoding of Lane Mask
- Use correct register flags, so we don't overwrite the low dword
when restoring multi-dword registers.
v3:
- Register spilling seems to hang the GPU, so replace all shaders
that need spilling with a dummy shader.
v4:
- Fix *LANE definitions
- Change destination reg class for 32-bit SMRD instructions
v5:
- Remove small optimization that was crashing Serious Sam 3.
https://bugs.freedesktop.org/show_bug.cgi?id=68224
https://bugs.freedesktop.org/show_bug.cgi?id=71285
NOTE: This is a candidate for the 3.4 branch.
llvm-svn: 195880
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUInstrInfo.h | 20 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIInstrInfo.cpp | 62 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIInstrInfo.h | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 17 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIMachineFunctionInfo.cpp | 35 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIMachineFunctionInfo.h | 28 |
6 files changed, 161 insertions, 13 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUInstrInfo.h b/llvm/lib/Target/R600/AMDGPUInstrInfo.h index ce5b58c6923..426910c8fc2 100644 --- a/llvm/lib/Target/R600/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/R600/AMDGPUInstrInfo.h @@ -78,18 +78,18 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const = 0; - void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; protected: MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp index ab55c1b173c..cf84df860a6 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.cpp +++ b/llvm/lib/Target/R600/SIInstrInfo.cpp @@ -16,6 +16,7 @@ #include "SIInstrInfo.h" #include "AMDGPUTargetMachine.h" #include "SIDefines.h" +#include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrDesc.h" @@ -185,6 +186,67 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { return Opcode; } +void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>(); + DebugLoc DL = MBB.findDebugLoc(MI); + unsigned KillFlag = isKill ? RegState::Kill : 0; + + if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) { + unsigned Lane = MFI->SpillTracker.getNextLane(MRI); + BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), + MFI->SpillTracker.LaneVGPR) + .addReg(SrcReg, KillFlag) + .addImm(Lane); + MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, + Lane); + } else { + for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) { + unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(MBB, MI, MBB.findDebugLoc(MI), get(AMDGPU::COPY), SubReg) + .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); + storeRegToStackSlot(MBB, MI, SubReg, isKill, FrameIndex + i, + &AMDGPU::SReg_32RegClass, TRI); + } + } +} + +void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>(); + DebugLoc DL = MBB.findDebugLoc(MI); + if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) { + SIMachineFunctionInfo::SpilledReg Spill = + MFI->SpillTracker.getSpilledReg(FrameIndex); + assert(Spill.VGPR); + BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg) + .addReg(Spill.VGPR) + .addImm(Spill.Lane); + } else { + for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) { + unsigned Flags = RegState::Define; + if (i == 0) { + Flags |= RegState::Undef; + } + unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + loadRegFromStackSlot(MBB, MI, SubReg, FrameIndex + i, + &AMDGPU::SReg_32RegClass, TRI); + BuildMI(MBB, MI, DL, get(AMDGPU::COPY)) + .addReg(DestReg, Flags, RI.getSubRegFromChannel(i)) + .addReg(SubReg); + } + } +} + MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { diff --git a/llvm/lib/Target/R600/SIInstrInfo.h b/llvm/lib/Target/R600/SIInstrInfo.h index 4af63481e3a..618deafbde5 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.h +++ b/llvm/lib/Target/R600/SIInstrInfo.h @@ -43,6 +43,18 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + unsigned commuteOpcode(unsigned Opcode) const; virtual MachineInstr *commuteInstruction(MachineInstr *MI, diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 76f05eb4965..a0d49d7bccf 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -876,8 +876,21 @@ def : Pat < $src2), sub1) >; -defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; -defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>; +def V_READLANE_B32 : VOP2 < + 0x00000001, + (outs SReg_32:$vdst), + (ins VReg_32:$src0, SSrc_32:$vsrc1), + "V_READLANE_B32 $vdst, $src0, $vsrc1", + [] +>; + +def V_WRITELANE_B32 : VOP2 < + 0x00000002, + (outs VReg_32:$vdst), + (ins SReg_32:$src0, SSrc_32:$vsrc1), + "V_WRITELANE_B32 $vdst, $src0, $vsrc1", + [] +>; let isCommutable = 1 in { defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", diff --git a/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp b/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp index 071f9fa43a1..ea04346e509 100644 --- a/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp @@ -10,6 +10,10 @@ #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +#define MAX_LANES 64 using namespace llvm; @@ -19,4 +23,33 @@ void SIMachineFunctionInfo::anchor() {} SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), - PSInputAddr(0) { } + PSInputAddr(0), + SpillTracker() { } + +static unsigned createLaneVGPR(MachineRegisterInfo &MRI) { + return MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); +} + +unsigned SIMachineFunctionInfo::RegSpillTracker::getNextLane(MachineRegisterInfo &MRI) { + if (!LaneVGPR) { + LaneVGPR = createLaneVGPR(MRI); + } else { + CurrentLane++; + if (CurrentLane == MAX_LANES) { + CurrentLane = 0; + LaneVGPR = createLaneVGPR(MRI); + } + } + return CurrentLane; +} + +void SIMachineFunctionInfo::RegSpillTracker::addSpilledReg(unsigned FrameIndex, + unsigned Reg, + int Lane) { + SpilledRegisters[FrameIndex] = SpilledReg(Reg, Lane); +} + +const SIMachineFunctionInfo::SpilledReg& +SIMachineFunctionInfo::RegSpillTracker::getSpilledReg(unsigned FrameIndex) { + return SpilledRegisters[FrameIndex]; +} diff --git a/llvm/lib/Target/R600/SIMachineFunctionInfo.h b/llvm/lib/Target/R600/SIMachineFunctionInfo.h index 2f1961cafdb..8dc82a0b425 100644 --- a/llvm/lib/Target/R600/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/R600/SIMachineFunctionInfo.h @@ -16,16 +16,44 @@ #define SIMACHINEFUNCTIONINFO_H_ #include "AMDGPUMachineFunction.h" +#include <map> namespace llvm { +class MachineRegisterInfo; + /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which /// tells the hardware which interpolation parameters to load. class SIMachineFunctionInfo : public AMDGPUMachineFunction { virtual void anchor(); public: + + struct SpilledReg { + unsigned VGPR; + int Lane; + SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { } + SpilledReg() : VGPR(0), Lane(-1) { } + bool hasLane() { return Lane != -1;} + }; + + struct RegSpillTracker { + private: + unsigned CurrentLane; + std::map<unsigned, SpilledReg> SpilledRegisters; + public: + unsigned LaneVGPR; + RegSpillTracker() : CurrentLane(0), SpilledRegisters(), LaneVGPR(0) { } + unsigned getNextLane(MachineRegisterInfo &MRI); + void addSpilledReg(unsigned FrameIndex, unsigned Reg, int Lane = -1); + const SpilledReg& getSpilledReg(unsigned FrameIndex); + bool programSpillsRegisters() { return !SpilledRegisters.empty(); } + }; + + // SIMachineFunctionInfo definition + SIMachineFunctionInfo(const MachineFunction &MF); unsigned PSInputAddr; + struct RegSpillTracker SpillTracker; }; } // End namespace llvm |

