summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-11-27 21:23:35 +0000
committerTom Stellard <thomas.stellard@amd.com>2013-11-27 21:23:35 +0000
commitc149dc02d36d1d4ed210b460f947dc5a3651454c (patch)
tree7df11ea1f7be99b990ca4ed14ca25c11e19b0a75 /llvm/lib/Target
parent859199dad89cefbf2c42962afccd07e532f1cc7f (diff)
downloadbcm5719-llvm-c149dc02d36d1d4ed210b460f947dc5a3651454c.tar.gz
bcm5719-llvm-c149dc02d36d1d4ed210b460f947dc5a3651454c.zip
R600/SI: Implement spilling of SGPRs v5
SGPRs are spilled into VGPRs using the {READ,WRITE}LANE_B32 instructions. v2: - Fix encoding of Lane Mask - Use correct register flags, so we don't overwrite the low dword when restoring multi-dword registers. v3: - Register spilling seems to hang the GPU, so replace all shaders that need spilling with a dummy shader. v4: - Fix *LANE definitions - Change destination reg class for 32-bit SMRD instructions v5: - Remove small optimization that was crashing Serious Sam 3. https://bugs.freedesktop.org/show_bug.cgi?id=68224 https://bugs.freedesktop.org/show_bug.cgi?id=71285 NOTE: This is a candidate for the 3.4 branch. llvm-svn: 195880
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/R600/AMDGPUInstrInfo.h20
-rw-r--r--llvm/lib/Target/R600/SIInstrInfo.cpp62
-rw-r--r--llvm/lib/Target/R600/SIInstrInfo.h12
-rw-r--r--llvm/lib/Target/R600/SIInstructions.td17
-rw-r--r--llvm/lib/Target/R600/SIMachineFunctionInfo.cpp35
-rw-r--r--llvm/lib/Target/R600/SIMachineFunctionInfo.h28
6 files changed, 161 insertions, 13 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUInstrInfo.h b/llvm/lib/Target/R600/AMDGPUInstrInfo.h
index ce5b58c6923..426910c8fc2 100644
--- a/llvm/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/R600/AMDGPUInstrInfo.h
@@ -78,18 +78,18 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const = 0;
- void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
- void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
protected:
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp
index ab55c1b173c..cf84df860a6 100644
--- a/llvm/lib/Target/R600/SIInstrInfo.cpp
+++ b/llvm/lib/Target/R600/SIInstrInfo.cpp
@@ -16,6 +16,7 @@
#include "SIInstrInfo.h"
#include "AMDGPUTargetMachine.h"
#include "SIDefines.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -185,6 +186,67 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
return Opcode;
}
+void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ unsigned KillFlag = isKill ? RegState::Kill : 0;
+
+ if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
+ unsigned Lane = MFI->SpillTracker.getNextLane(MRI);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
+ MFI->SpillTracker.LaneVGPR)
+ .addReg(SrcReg, KillFlag)
+ .addImm(Lane);
+ MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
+ Lane);
+ } else {
+ for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
+ unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), get(AMDGPU::COPY), SubReg)
+ .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
+ storeRegToStackSlot(MBB, MI, SubReg, isKill, FrameIndex + i,
+ &AMDGPU::SReg_32RegClass, TRI);
+ }
+ }
+}
+
+void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) {
+ SIMachineFunctionInfo::SpilledReg Spill =
+ MFI->SpillTracker.getSpilledReg(FrameIndex);
+ assert(Spill.VGPR);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg)
+ .addReg(Spill.VGPR)
+ .addImm(Spill.Lane);
+ } else {
+ for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
+ unsigned Flags = RegState::Define;
+ if (i == 0) {
+ Flags |= RegState::Undef;
+ }
+ unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ loadRegFromStackSlot(MBB, MI, SubReg, FrameIndex + i,
+ &AMDGPU::SReg_32RegClass, TRI);
+ BuildMI(MBB, MI, DL, get(AMDGPU::COPY))
+ .addReg(DestReg, Flags, RI.getSubRegFromChannel(i))
+ .addReg(SubReg);
+ }
+ }
+}
+
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
bool NewMI) const {
diff --git a/llvm/lib/Target/R600/SIInstrInfo.h b/llvm/lib/Target/R600/SIInstrInfo.h
index 4af63481e3a..618deafbde5 100644
--- a/llvm/lib/Target/R600/SIInstrInfo.h
+++ b/llvm/lib/Target/R600/SIInstrInfo.h
@@ -43,6 +43,18 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
unsigned commuteOpcode(unsigned Opcode) const;
virtual MachineInstr *commuteInstruction(MachineInstr *MI,
diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td
index 76f05eb4965..a0d49d7bccf 100644
--- a/llvm/lib/Target/R600/SIInstructions.td
+++ b/llvm/lib/Target/R600/SIInstructions.td
@@ -876,8 +876,21 @@ def : Pat <
$src2), sub1)
>;
-defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
-defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
+def V_READLANE_B32 : VOP2 <
+ 0x00000001,
+ (outs SReg_32:$vdst),
+ (ins VReg_32:$src0, SSrc_32:$vsrc1),
+ "V_READLANE_B32 $vdst, $src0, $vsrc1",
+ []
+>;
+
+def V_WRITELANE_B32 : VOP2 <
+ 0x00000002,
+ (outs VReg_32:$vdst),
+ (ins SReg_32:$src0, SSrc_32:$vsrc1),
+ "V_WRITELANE_B32 $vdst, $src0, $vsrc1",
+ []
+>;
let isCommutable = 1 in {
defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
diff --git a/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp b/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
index 071f9fa43a1..ea04346e509 100644
--- a/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -10,6 +10,10 @@
#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define MAX_LANES 64
using namespace llvm;
@@ -19,4 +23,33 @@ void SIMachineFunctionInfo::anchor() {}
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
- PSInputAddr(0) { }
+ PSInputAddr(0),
+ SpillTracker() { }
+
+static unsigned createLaneVGPR(MachineRegisterInfo &MRI) {
+ return MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+}
+
+unsigned SIMachineFunctionInfo::RegSpillTracker::getNextLane(MachineRegisterInfo &MRI) {
+ if (!LaneVGPR) {
+ LaneVGPR = createLaneVGPR(MRI);
+ } else {
+ CurrentLane++;
+ if (CurrentLane == MAX_LANES) {
+ CurrentLane = 0;
+ LaneVGPR = createLaneVGPR(MRI);
+ }
+ }
+ return CurrentLane;
+}
+
+void SIMachineFunctionInfo::RegSpillTracker::addSpilledReg(unsigned FrameIndex,
+ unsigned Reg,
+ int Lane) {
+ SpilledRegisters[FrameIndex] = SpilledReg(Reg, Lane);
+}
+
+const SIMachineFunctionInfo::SpilledReg&
+SIMachineFunctionInfo::RegSpillTracker::getSpilledReg(unsigned FrameIndex) {
+ return SpilledRegisters[FrameIndex];
+}
diff --git a/llvm/lib/Target/R600/SIMachineFunctionInfo.h b/llvm/lib/Target/R600/SIMachineFunctionInfo.h
index 2f1961cafdb..8dc82a0b425 100644
--- a/llvm/lib/Target/R600/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/R600/SIMachineFunctionInfo.h
@@ -16,16 +16,44 @@
#define SIMACHINEFUNCTIONINFO_H_
#include "AMDGPUMachineFunction.h"
+#include <map>
namespace llvm {
+class MachineRegisterInfo;
+
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo : public AMDGPUMachineFunction {
virtual void anchor();
public:
+
+ struct SpilledReg {
+ unsigned VGPR;
+ int Lane;
+ SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
+ SpilledReg() : VGPR(0), Lane(-1) { }
+ bool hasLane() { return Lane != -1;}
+ };
+
+ struct RegSpillTracker {
+ private:
+ unsigned CurrentLane;
+ std::map<unsigned, SpilledReg> SpilledRegisters;
+ public:
+ unsigned LaneVGPR;
+ RegSpillTracker() : CurrentLane(0), SpilledRegisters(), LaneVGPR(0) { }
+ unsigned getNextLane(MachineRegisterInfo &MRI);
+ void addSpilledReg(unsigned FrameIndex, unsigned Reg, int Lane = -1);
+ const SpilledReg& getSpilledReg(unsigned FrameIndex);
+ bool programSpillsRegisters() { return !SpilledRegisters.empty(); }
+ };
+
+ // SIMachineFunctionInfo definition
+
SIMachineFunctionInfo(const MachineFunction &MF);
unsigned PSInputAddr;
+ struct RegSpillTracker SpillTracker;
};
} // End namespace llvm
OpenPOWER on IntegriCloud