diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-06-24 06:30:11 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-06-24 06:30:11 +0000 |
commit | 43e92fe306ac1fa4fb36062a458a18a9aed23855 (patch) | |
tree | 275b08407e8fb1478bd185b851b497c43fbe0877 /llvm/lib/Target | |
parent | f11b9798f4cd1d3dbcae7e0003d79c7b428b4d04 (diff) | |
download | bcm5719-llvm-43e92fe306ac1fa4fb36062a458a18a9aed23855.tar.gz bcm5719-llvm-43e92fe306ac1fa4fb36062a458a18a9aed23855.zip |
AMDGPU: Cleanup subtarget handling.
Split AMDGPUSubtarget into amdgcn/r600 specific subclasses.
This removes most of the static_casting of the basic codegen
classes everywhere, and tries to restrict the features
visible on the wrong target.
llvm-svn: 273652
Diffstat (limited to 'llvm/lib/Target')
58 files changed, 879 insertions, 706 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index eba5e4a8938..b18c97acd72 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -63,7 +63,7 @@ using namespace llvm; // instructions to run at the double precision rate for the device so it's // probably best to just report no single precision denormals. static uint32_t getFPMode(const MachineFunction &F) { - const AMDGPUSubtarget& ST = F.getSubtarget<AMDGPUSubtarget>(); + const SISubtarget& ST = F.getSubtarget<SISubtarget>(); // TODO: Is there any real use for the flush in only / flush out only modes? uint32_t FP32Denormals = @@ -243,9 +243,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) { unsigned MaxGPR = 0; bool killPixel = false; - const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>(); - const R600RegisterInfo *RI = - static_cast<const R600RegisterInfo *>(STM.getRegisterInfo()); + const R600Subtarget &STM = MF.getSubtarget<R600Subtarget>(); + const R600RegisterInfo *RI = STM.getRegisterInfo(); const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); for (const MachineBasicBlock &MBB : MF) { @@ -268,7 +267,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) { } unsigned RsrcReg; - if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) { + if (STM.getGeneration() >= R600Subtarget::EVERGREEN) { // Evergreen / Northern Islands switch (MF.getFunction()->getCallingConv()) { default: // Fall through @@ -302,17 +301,15 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) { void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, const MachineFunction &MF) const { - const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>(); + const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); uint64_t CodeSize = 0; unsigned MaxSGPR = 0; unsigned MaxVGPR = 0; bool VCCUsed = false; bool FlatUsed = false; - const SIRegisterInfo *RI = - static_cast<const SIRegisterInfo *>(STM.getRegisterInfo()); - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(STM.getInstrInfo()); + const SIRegisterInfo *RI = STM.getRegisterInfo(); + const SIInstrInfo *TII = STM.getInstrInfo(); for (const MachineBasicBlock &MBB : MF) { for (const MachineInstr &MI : MBB) { @@ -425,7 +422,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, if (VCCUsed) ExtraSGPRs = 2; - if (STM.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (STM.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) { if (FlatUsed) ExtraSGPRs = 4; } else { @@ -453,7 +450,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.NumSGPR = MaxSGPR + 1; if (STM.hasSGPRInitBug()) { - if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) { + if (ProgInfo.NumSGPR > SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) { LLVMContext &Ctx = MF.getFunction()->getContext(); DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "SGPRs with SGPR init bug", @@ -461,7 +458,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, Ctx.diagnose(Diag); } - ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; + ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; } if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) { @@ -497,7 +494,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.CodeLen = CodeSize; unsigned LDSAlignShift; - if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { + if (STM.getGeneration() < SISubtarget::SEA_ISLANDS) { // LDS is allocated in 64 dword blocks. LDSAlignShift = 8; } else { @@ -564,7 +561,7 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) { void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo) { - const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>(); + const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); unsigned RsrcReg = getRsrcReg(MF.getFunction()->getCallingConv()); @@ -618,7 +615,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) { void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, const SIProgramInfo &KernelInfo) const { const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>(); + const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); amd_kernel_code_t header; AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp index f7582f42c35..bbc28b88572 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp @@ -12,7 +12,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPUFrameLowering.h" #include "AMDGPURegisterInfo.h" -#include "R600MachineFunctionInfo.h" +#include "AMDGPUSubtarget.h" + #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Instructions.h" @@ -75,7 +76,8 @@ int AMDGPUFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + const AMDGPURegisterInfo *RI + = MF.getSubtarget<AMDGPUSubtarget>().getRegisterInfo(); // Fill in FrameReg output argument. FrameReg = RI->getFrameRegister(MF); @@ -100,19 +102,3 @@ int AMDGPUFrameLowering::getFrameIndexReference(const MachineFunction &MF, return OffsetBytes / (getStackWidth(MF) * 4); } -const TargetFrameLowering::SpillSlot * -AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { - NumEntries = 0; - return nullptr; -} -void AMDGPUFrameLowering::emitPrologue(MachineFunction &MF, - MachineBasicBlock &MBB) const {} -void -AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { -} - -bool -AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const { - return false; -} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h index 257a3da4058..44196e2db49 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h @@ -32,13 +32,18 @@ public: /// \returns The number of 32-bit sub-registers that are used when storing /// values to the stack. unsigned getStackWidth(const MachineFunction &MF) const; + int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; - const SpillSlot * - getCalleeSavedSpillSlots(unsigned &NumEntries) const override; - void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - bool hasFP(const MachineFunction &MF) const override; + + const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const override { + NumEntries = 0; + return nullptr; + } + + bool hasFP(const MachineFunction &MF) const override { + return false; + } }; } // namespace llvm #endif diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 99c2f977c7d..cfecd29853b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -177,7 +177,7 @@ AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) : SelectionDAGISel(TM) {} bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget()); + Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); return SelectionDAGISel::runOnMachineFunction(MF); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index ddc86da742f..6d162bede1d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -64,7 +64,7 @@ EVT AMDGPUTargetLowering::getEquivalentLoadRegType(LLVMContext &Ctx, EVT VT) { return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32); } -AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, +AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { // Lower floating point store/load to integer store/load to reduce the number diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index e5acf364b54..4648438d856 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -116,7 +116,7 @@ protected: const SmallVectorImpl<ISD::OutputArg> &Outs) const; public: - AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI); + AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); bool isFAbsFree(EVT VT) const override; bool isFNegFree(EVT VT) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index 60dba1a063a..82d4eef67c6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -30,12 +30,8 @@ using namespace llvm; // Pin the vtable to this file. void AMDGPUInstrInfo::anchor() {} -AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st) - : AMDGPUGenInstrInfo(-1, -1), ST(st) {} - -const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const { - return RI; -} +AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) + : AMDGPUGenInstrInfo(-1, -1), ST(ST) {} bool AMDGPUInstrInfo::enableClusterLoads() const { return true; @@ -111,9 +107,11 @@ int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { return -1; } + const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); + const AMDGPUFrameLowering *TFL = ST.getFrameLowering(); + unsigned IgnoredFrameReg; - Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexReference( - MF, -1, IgnoredFrameReg); + Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg); return getIndirectIndexBegin(MF) + Offset; } @@ -127,35 +125,42 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const { } } +// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td +enum SIEncodingFamily { + SI = 0, + VI = 1 +}; + // Wrapper for Tablegen'd function. enum Subtarget is not defined in any // header files, so we need to wrap it in a function that takes unsigned // instead. namespace llvm { namespace AMDGPU { static int getMCOpcode(uint16_t Opcode, unsigned Gen) { - return getMCOpcodeGen(Opcode, (enum Subtarget)Gen); + return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); } } } -// This must be kept in sync with the SISubtarget class in SIInstrInfo.td -enum SISubtarget { - SI = 0, - VI = 1 -}; - -static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) { - switch (Gen) { - default: - return SI; +static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) { + switch (ST.getGeneration()) { + case AMDGPUSubtarget::SOUTHERN_ISLANDS: + case AMDGPUSubtarget::SEA_ISLANDS: + return SIEncodingFamily::SI; case AMDGPUSubtarget::VOLCANIC_ISLANDS: - return VI; + return SIEncodingFamily::VI; + + // FIXME: This should never be called for r600 GPUs. + case AMDGPUSubtarget::R600: + case AMDGPUSubtarget::R700: + case AMDGPUSubtarget::EVERGREEN: + case AMDGPUSubtarget::NORTHERN_ISLANDS: + return SIEncodingFamily::SI; } } int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { - int MCOp = AMDGPU::getMCOpcode( - Opcode, AMDGPUSubtargetToSISubtarget(ST.getGeneration())); + int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST)); // -1 means that Opcode is already a native instruction. if (MCOp == -1) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 461dd99a4f5..6203e575bfa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -16,7 +16,6 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H -#include "AMDGPURegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER @@ -38,16 +37,13 @@ class MachineInstrBuilder; class AMDGPUInstrInfo : public AMDGPUGenInstrInfo { private: - const AMDGPURegisterInfo RI; - virtual void anchor(); -protected: const AMDGPUSubtarget &ST; -public: - explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st); - virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0; + virtual void anchor(); public: + explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st); + /// \returns the smallest register index that will be accessed by an indirect /// read or write or -1 if indirect addressing is not used by this program. int getIndirectIndexBegin(const MachineFunction &MF) const; @@ -80,7 +76,6 @@ public: /// \brief Given a MIMG \p Opcode that writes all 4 channels, return the /// equivalent opcode that writes \p Channels Channels. int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const; - }; namespace AMDGPU { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 154e992590e..9453fb06bb5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -15,6 +15,7 @@ #include "AMDGPUMCInstLower.h" #include "AMDGPUAsmPrinter.h" +#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "InstPrinter/AMDGPUInstPrinter.h" #include "SIInstrInfo.h" @@ -36,8 +37,7 @@ using namespace llvm; AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st): - Ctx(ctx), ST(st) -{ } + Ctx(ctx), ST(st) { } void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { @@ -140,10 +140,9 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { raw_string_ostream DisasmStream(DisasmLine); AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), - *MF->getSubtarget().getInstrInfo(), - *MF->getSubtarget().getRegisterInfo()); - InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(), - MF->getSubtarget()); + *STI.getInstrInfo(), + *STI.getRegisterInfo()); + InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(), STI); // Disassemble instruction/operands to hex representation. SmallVector<MCFixup, 4> Fixups; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp index 3ca0eca3417..2157c8faa1d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp @@ -24,10 +24,11 @@ AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {} // they are not supported at this time. //===----------------------------------------------------------------------===// -const MCPhysReg AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister; +// Dummy to not crash RegisterClassInfo. +static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister; -const MCPhysReg* -AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { +const MCPhysReg *AMDGPURegisterInfo::getCalleeSavedRegs( + const MachineFunction *) const { return &CalleeSavedReg; } @@ -55,7 +56,6 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const { } unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const { - return getSubRegFromChannel(IndirectIndex); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h index 441d53625b9..e780ca0ad0d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h @@ -29,18 +29,8 @@ class AMDGPUSubtarget; class TargetInstrInfo; struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { - static const MCPhysReg CalleeSavedReg; - AMDGPURegisterInfo(); - BitVector getReservedRegs(const MachineFunction &MF) const override { - assert(!"Unimplemented"); return BitVector(); - } - - virtual unsigned getHWRegIndex(unsigned Reg) const { - assert(!"Unimplemented"); return 0; - } - /// \returns the sub reg enum value for the given \p Channel /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) unsigned getSubRegFromChannel(unsigned Channel) const; @@ -52,7 +42,6 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { unsigned getFrameRegister(const MachineFunction &MF) const override; unsigned getIndirectSubReg(unsigned IndirectIndex) const; - }; } // End namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 70603374bc9..6b5a88db477 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -44,6 +44,8 @@ struct AMDGPUGISelActualAccessor : public GISelAccessor { } // End anonymous namespace. #endif +AMDGPUSubtarget::~AMDGPUSubtarget() {} + AMDGPUSubtarget & AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS) { @@ -79,82 +81,56 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, } AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, - TargetMachine &TM) - : AMDGPUGenSubtargetInfo(TT, GPU, FS), - DumpCode(false), R600ALUInst(false), HasVertexCache(false), - TexVTXClauseSize(0), - Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), - FP64(false), - FP64Denormals(false), FP32Denormals(false), FPExceptions(false), - FastFMAF32(false), HalfRate64Ops(false), CaymanISA(false), - FlatAddressSpace(false), FlatForGlobal(false), EnableIRStructurizer(true), - EnablePromoteAlloca(false), - EnableIfCvt(true), EnableLoadStoreOpt(false), - EnableUnsafeDSOffsetFolding(false), - EnableXNACK(false), - WavefrontSize(64), CFALUBug(false), - LocalMemorySize(0), MaxPrivateElementSize(0), - EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), - GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), - HasSMemRealTime(false), Has16BitInsts(false), - LDSBankCount(0), - IsaVersion(ISAVersion0_0_0), - EnableSIScheduler(false), - DebuggerInsertNops(false), DebuggerReserveRegs(false), - FrameLowering(nullptr), - GISel(), - InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) { - + const TargetMachine &TM) + : AMDGPUGenSubtargetInfo(TT, GPU, FS), + TargetTriple(TT), + Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), + IsaVersion(ISAVersion0_0_0), + WavefrontSize(64), + LocalMemorySize(0), + LDSBankCount(0), + MaxPrivateElementSize(0), + + FastFMAF32(false), + HalfRate64Ops(false), + + FP32Denormals(false), + FP64Denormals(false), + FPExceptions(false), + FlatForGlobal(false), + EnableXNACK(false), + DebuggerInsertNops(false), + DebuggerReserveRegs(false), + + EnableVGPRSpilling(false), + EnableIRStructurizer(true), + EnablePromoteAlloca(false), + EnableIfCvt(true), + EnableLoadStoreOpt(false), + EnableUnsafeDSOffsetFolding(false), + EnableSIScheduler(false), + DumpCode(false), + + FP64(false), + IsGCN(false), + GCN1Encoding(false), + GCN3Encoding(false), + CIInsts(false), + SGPRInitBug(false), + HasSMemRealTime(false), + Has16BitInsts(false), + FlatAddressSpace(false), + + R600ALUInst(false), + CaymanISA(false), + CFALUBug(false), + HasVertexCache(false), + TexVTXClauseSize(0), + + FeatureDisable(false), + + InstrItins(getInstrItineraryForCPU(GPU)) { initializeSubtargetDependencies(TT, GPU, FS); - - // Scratch is allocated in 256 dword per wave blocks. - const unsigned StackAlign = 4 * 256 / getWavefrontSize(); - - if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - InstrInfo.reset(new R600InstrInfo(*this)); - TLInfo.reset(new R600TargetLowering(TM, *this)); - - // FIXME: Should have R600 specific FrameLowering - FrameLowering.reset(new AMDGPUFrameLowering( - TargetFrameLowering::StackGrowsUp, - StackAlign, - 0)); - } else { - InstrInfo.reset(new SIInstrInfo(*this)); - TLInfo.reset(new SITargetLowering(TM, *this)); - FrameLowering.reset(new SIFrameLowering( - TargetFrameLowering::StackGrowsUp, - StackAlign, - 0)); -#ifndef LLVM_BUILD_GLOBAL_ISEL - GISelAccessor *GISel = new GISelAccessor(); -#else - AMDGPUGISelActualAccessor *GISel = - new AMDGPUGISelActualAccessor(); - GISel->CallLoweringInfo.reset( - new AMDGPUCallLowering(*getTargetLowering())); -#endif - setGISelAccessor(*GISel); - } -} - -const CallLowering *AMDGPUSubtarget::getCallLowering() const { - assert(GISel && "Access to GlobalISel APIs not set"); - return GISel->getCallLowering(); -} - -unsigned AMDGPUSubtarget::getStackEntrySize() const { - assert(getGeneration() <= NORTHERN_ISLANDS); - switch(getWavefrontSize()) { - case 16: - return 8; - case 32: - return hasCaymanISA() ? 4 : 8; - case 64: - return 4; - default: - llvm_unreachable("Illegal wavefront size."); - } } // FIXME: These limits are for SI. Did they change with the larger maximum LDS @@ -215,40 +191,75 @@ unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const { return 1; } -unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const { - switch(getGeneration()) { - default: llvm_unreachable("ChipID unknown"); - case SEA_ISLANDS: return 12; - } -} - -AMDGPU::IsaVersion AMDGPUSubtarget::getIsaVersion() const { - return AMDGPU::getIsaVersion(getFeatureBits()); +R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, + const TargetMachine &TM) : + AMDGPUSubtarget(TT, GPU, FS, TM), + InstrInfo(*this), + FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), + TLInfo(TM, *this) {} + +SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, + const TargetMachine &TM) : + AMDGPUSubtarget(TT, GPU, FS, TM), + InstrInfo(*this), + FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), + TLInfo(TM, *this) { +#ifndef LLVM_BUILD_GLOBAL_ISEL + GISelAccessor *GISel = new GISelAccessor(); +#else + AMDGPUGISelActualAccessor *GISel = + new AMDGPUGISelActualAccessor(); + GISel->CallLoweringInfo.reset( + new AMDGPUCallLowering(*getTargetLowering())); +#endif + setGISelAccessor(*GISel); } -bool AMDGPUSubtarget::isVGPRSpillingEnabled(const Function& F) const { - return !AMDGPU::isShader(F.getCallingConv()) || EnableVGPRSpilling; +unsigned R600Subtarget::getStackEntrySize() const { + switch (getWavefrontSize()) { + case 16: + return 8; + case 32: + return hasCaymanISA() ? 4 : 8; + case 64: + return 4; + default: + llvm_unreachable("Illegal wavefront size."); + } } -void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, +void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, MachineInstr *end, unsigned NumRegionInstrs) const { - if (getGeneration() >= SOUTHERN_ISLANDS) { - - // Track register pressure so the scheduler can try to decrease - // pressure once register usage is above the threshold defined by - // SIRegisterInfo::getRegPressureSetLimit() - Policy.ShouldTrackPressure = true; + // Track register pressure so the scheduler can try to decrease + // pressure once register usage is above the threshold defined by + // SIRegisterInfo::getRegPressureSetLimit() + Policy.ShouldTrackPressure = true; + + // Enabling both top down and bottom up scheduling seems to give us less + // register spills than just using one of these approaches on its own. + Policy.OnlyTopDown = false; + Policy.OnlyBottomUp = false; + + // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. + if (!enableSIScheduler()) + Policy.ShouldTrackLaneMasks = true; +} - // Enabling both top down and bottom up scheduling seems to give us less - // register spills than just using one of these approaches on its own. - Policy.OnlyTopDown = false; - Policy.OnlyBottomUp = false; +bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { + return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); +} - // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. - if (!enableSIScheduler()) - Policy.ShouldTrackLaneMasks = true; +unsigned SISubtarget::getAmdKernelCodeChipID() const { + switch (getGeneration()) { + case SEA_ISLANDS: + return 12; + default: + llvm_unreachable("ChipID unknown"); } } +AMDGPU::IsaVersion SISubtarget::getIsaVersion() const { + return AMDGPU::getIsaVersion(getFeatureBits()); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 96cdc9f84b7..300a92e4bec 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -16,10 +16,12 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H #include "AMDGPU.h" -#include "AMDGPUFrameLowering.h" -#include "AMDGPUISelLowering.h" -#include "AMDGPUInstrInfo.h" -#include "AMDGPUSubtarget.h" +#include "R600InstrInfo.h" +#include "R600ISelLowering.h" +#include "R600FrameLowering.h" +#include "SIInstrInfo.h" +#include "SIISelLowering.h" +#include "SIFrameLowering.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/CodeGen/GlobalISel/GISelAccessor.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -29,11 +31,10 @@ namespace llvm { -class StringRef; class SIMachineFunctionInfo; +class StringRef; class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { - public: enum Generation { R600 = 0, @@ -46,10 +47,6 @@ public: }; enum { - FIXED_SGPR_COUNT_FOR_INIT_BUG = 80 - }; - - enum { ISAVersion0_0_0, ISAVersion7_0_0, ISAVersion7_0_1, @@ -58,113 +55,104 @@ public: ISAVersion8_0_3 }; -private: - bool DumpCode; - bool R600ALUInst; - bool HasVertexCache; - short TexVTXClauseSize; +protected: + // Basic subtarget description. + Triple TargetTriple; Generation Gen; - bool FP64; - bool FP64Denormals; - bool FP32Denormals; - bool FPExceptions; + unsigned IsaVersion; + unsigned WavefrontSize; + int LocalMemorySize; + int LDSBankCount; + unsigned MaxPrivateElementSize; + + // Possibly statically set by tablegen, but may want to be overridden. bool FastFMAF32; bool HalfRate64Ops; - bool CaymanISA; - bool FlatAddressSpace; + + // Dynamially set bits that enable features. + bool FP32Denormals; + bool FP64Denormals; + bool FPExceptions; bool FlatForGlobal; + bool EnableXNACK; + bool DebuggerInsertNops; + bool DebuggerReserveRegs; + + // Used as options. + bool EnableVGPRSpilling; bool EnableIRStructurizer; bool EnablePromoteAlloca; bool EnableIfCvt; bool EnableLoadStoreOpt; bool EnableUnsafeDSOffsetFolding; - bool EnableXNACK; - unsigned WavefrontSize; - bool CFALUBug; - int LocalMemorySize; - unsigned MaxPrivateElementSize; - bool EnableVGPRSpilling; - bool SGPRInitBug; + bool EnableSIScheduler; + bool DumpCode; + + // Subtarget statically properties set by tablegen + bool FP64; bool IsGCN; bool GCN1Encoding; bool GCN3Encoding; bool CIInsts; + bool SGPRInitBug; bool HasSMemRealTime; bool Has16BitInsts; + bool FlatAddressSpace; + bool R600ALUInst; + bool CaymanISA; + bool CFALUBug; + bool HasVertexCache; + short TexVTXClauseSize; + + // Dummy feature to use for assembler in tablegen. bool FeatureDisable; - int LDSBankCount; - unsigned IsaVersion; - bool EnableSIScheduler; - bool DebuggerInsertNops; - bool DebuggerReserveRegs; - std::unique_ptr<AMDGPUFrameLowering> FrameLowering; - std::unique_ptr<AMDGPUTargetLowering> TLInfo; - std::unique_ptr<AMDGPUInstrInfo> InstrInfo; - std::unique_ptr<GISelAccessor> GISel; InstrItineraryData InstrItins; - Triple TargetTriple; public: - AMDGPUSubtarget(const Triple &TT, StringRef CPU, StringRef FS, - TargetMachine &TM); + AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, + const TargetMachine &TM); + virtual ~AMDGPUSubtarget(); AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS); - void setGISelAccessor(GISelAccessor &GISel) { - this->GISel.reset(&GISel); - } + const AMDGPUInstrInfo *getInstrInfo() const override; + const AMDGPUFrameLowering *getFrameLowering() const override; + const AMDGPUTargetLowering *getTargetLowering() const override; + const AMDGPURegisterInfo *getRegisterInfo() const override; - const AMDGPUFrameLowering *getFrameLowering() const override { - return FrameLowering.get(); - } - const AMDGPUInstrInfo *getInstrInfo() const override { - return InstrInfo.get(); - } - const AMDGPURegisterInfo *getRegisterInfo() const override { - return &InstrInfo->getRegisterInfo(); - } - AMDGPUTargetLowering *getTargetLowering() const override { - return TLInfo.get(); - } const InstrItineraryData *getInstrItineraryData() const override { return &InstrItins; } - const CallLowering *getCallLowering() const override; - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - bool hasVertexCache() const { - return HasVertexCache; - } - - short getTexVTXClauseSize() const { - return TexVTXClauseSize; + bool isAmdHsaOS() const { + return TargetTriple.getOS() == Triple::AMDHSA; } Generation getGeneration() const { return Gen; } - bool hasHWFP64() const { - return FP64; + unsigned getWavefrontSize() const { + return WavefrontSize; } - bool hasCaymanISA() const { - return CaymanISA; + int getLocalMemorySize() const { + return LocalMemorySize; } - bool hasFP32Denormals() const { - return FP32Denormals; + int getLDSBankCount() const { + return LDSBankCount; } - bool hasFP64Denormals() const { - return FP64Denormals; + unsigned getMaxPrivateElementSize() const { + return MaxPrivateElementSize; } - bool hasFPExceptions() const { - return FPExceptions; + bool hasHWFP64() const { + return FP64; } bool hasFastFMAF32() const { @@ -175,22 +163,6 @@ public: return HalfRate64Ops; } - bool hasFlatAddressSpace() const { - return FlatAddressSpace; - } - - bool hasSMemRealTime() const { - return HasSMemRealTime; - } - - bool has16BitInsts() const { - return Has16BitInsts; - } - - bool useFlatForGlobal() const { - return FlatForGlobal; - } - bool hasAddr64() const { return (getGeneration() < VOLCANIC_ISLANDS); } @@ -242,6 +214,10 @@ public: return (getGeneration() >= EVERGREEN); } + bool hasCaymanISA() const { + return CaymanISA; + } + bool IsIRStructurizerEnabled() const { return EnableIRStructurizer; } @@ -254,23 +230,12 @@ public: return EnableIfCvt; } - bool loadStoreOptEnabled() const { - return EnableLoadStoreOpt; - } - bool unsafeDSOffsetFoldingEnabled() const { return EnableUnsafeDSOffsetFolding; } - unsigned getWavefrontSize() const { - return WavefrontSize; - } - - unsigned getStackEntrySize() const; - - bool hasCFAluBug() const { - assert(getGeneration() <= NORTHERN_ISLANDS); - return CFALUBug; + bool dumpCode() const { + return DumpCode; } /// Return the amount of LDS that can be used that will not restrict the @@ -282,89 +247,212 @@ public: unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const; - int getLocalMemorySize() const { - return LocalMemorySize; + bool hasFP32Denormals() const { + return FP32Denormals; } - unsigned getMaxPrivateElementSize() const { - return MaxPrivateElementSize; + bool hasFP64Denormals() const { + return FP64Denormals; } - bool hasSGPRInitBug() const { - return SGPRInitBug; + bool hasFPExceptions() const { + return FPExceptions; } - int getLDSBankCount() const { - return LDSBankCount; + bool useFlatForGlobal() const { + return FlatForGlobal; } - unsigned getAmdKernelCodeChipID() const; + bool isXNACKEnabled() const { + return EnableXNACK; + } - AMDGPU::IsaVersion getIsaVersion() const; + unsigned getMaxWavesPerCU() const { + if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) + return 10; + + // FIXME: Not sure what this is for other subtagets. + return 8; + } + + /// \brief Returns the offset in bytes from the start of the input buffer + /// of the first explicit kernel argument. + unsigned getExplicitKernelArgOffset() const { + return isAmdHsaOS() ? 0 : 36; + } + + unsigned getStackAlignment() const { + // Scratch is allocated in 256 dword per wave blocks. + return 4 * 256 / getWavefrontSize(); + } bool enableMachineScheduler() const override { return true; } - void overrideSchedPolicy(MachineSchedPolicy &Policy, - MachineInstr *begin, MachineInstr *end, - unsigned NumRegionInstrs) const override; + bool enableSubRegLiveness() const override { + return true; + } +}; - // Helper functions to simplify if statements - bool isTargetELF() const { - return false; +class R600Subtarget final : public AMDGPUSubtarget { +private: + R600InstrInfo InstrInfo; + R600FrameLowering FrameLowering; + R600TargetLowering TLInfo; + +public: + R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, + const TargetMachine &TM); + + const R600InstrInfo *getInstrInfo() const override { + return &InstrInfo; } - bool enableSIScheduler() const { - return EnableSIScheduler; + const R600FrameLowering *getFrameLowering() const override { + return &FrameLowering; } - bool debuggerInsertNops() const { - return DebuggerInsertNops; + const R600TargetLowering *getTargetLowering() const override { + return &TLInfo; } - bool debuggerReserveRegs() const { - return DebuggerReserveRegs; + const R600RegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); } - bool dumpCode() const { - return DumpCode; + bool hasCFAluBug() const { + return CFALUBug; } - bool r600ALUEncoding() const { - return R600ALUInst; + + bool hasVertexCache() const { + return HasVertexCache; } - bool isAmdHsaOS() const { - return TargetTriple.getOS() == Triple::AMDHSA; + + short getTexVTXClauseSize() const { + return TexVTXClauseSize; } - bool isVGPRSpillingEnabled(const Function& F) const; - bool isXNACKEnabled() const { - return EnableXNACK; + unsigned getStackEntrySize() const; +}; + +class SISubtarget final : public AMDGPUSubtarget { +public: + enum { + FIXED_SGPR_COUNT_FOR_INIT_BUG = 80 + }; + +private: + SIInstrInfo InstrInfo; + SIFrameLowering FrameLowering; + SITargetLowering TLInfo; + std::unique_ptr<GISelAccessor> GISel; + +public: + SISubtarget(const Triple &TT, StringRef CPU, StringRef FS, + const TargetMachine &TM); + + const SIInstrInfo *getInstrInfo() const override { + return &InstrInfo; } - unsigned getMaxWavesPerCU() const { - if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) - return 10; + const SIFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } - // FIXME: Not sure what this is for other subtagets. - return 8; + const SITargetLowering *getTargetLowering() const override { + return &TLInfo; } - bool enableSubRegLiveness() const override { - return true; + const CallLowering *getCallLowering() const override { + assert(GISel && "Access to GlobalISel APIs not set"); + return GISel->getCallLowering(); } - /// \brief Returns the offset in bytes from the start of the input buffer - /// of the first explicit kernel argument. - unsigned getExplicitKernelArgOffset() const { - return isAmdHsaOS() ? 0 : 36; + const SIRegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); } + void setGISelAccessor(GISelAccessor &GISel) { + this->GISel.reset(&GISel); + } + + void overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *Begin, MachineInstr *End, + unsigned NumRegionInstrs) const override; + + bool isVGPRSpillingEnabled(const Function& F) const; + + unsigned getAmdKernelCodeChipID() const; + + AMDGPU::IsaVersion getIsaVersion() const; + unsigned getMaxNumUserSGPRs() const { return 16; } + + bool hasFlatAddressSpace() const { + return FlatAddressSpace; + } + + bool hasSMemRealTime() const { + return HasSMemRealTime; + } + + bool has16BitInsts() const { + return Has16BitInsts; + } + + bool enableSIScheduler() const { + return EnableSIScheduler; + } + + bool debuggerInsertNops() const { + return DebuggerInsertNops; + } + + bool debuggerReserveRegs() const { + return DebuggerReserveRegs; + } + + bool loadStoreOptEnabled() const { + return EnableLoadStoreOpt; + } + + bool hasSGPRInitBug() const { + return SGPRInitBug; + } }; + +inline const AMDGPUInstrInfo *AMDGPUSubtarget::getInstrInfo() const { + if (getGeneration() >= SOUTHERN_ISLANDS) + return static_cast<const SISubtarget *>(this)->getInstrInfo(); + + return static_cast<const R600Subtarget *>(this)->getInstrInfo(); +} + +inline const AMDGPUFrameLowering *AMDGPUSubtarget::getFrameLowering() const { + if (getGeneration() >= SOUTHERN_ISLANDS) + return static_cast<const SISubtarget *>(this)->getFrameLowering(); + + return static_cast<const R600Subtarget *>(this)->getFrameLowering(); +} + +inline const AMDGPUTargetLowering *AMDGPUSubtarget::getTargetLowering() const { + if (getGeneration() >= SOUTHERN_ISLANDS) + return static_cast<const SISubtarget *>(this)->getTargetLowering(); + + return static_cast<const R600Subtarget *>(this)->getTargetLowering(); +} + +inline const AMDGPURegisterInfo *AMDGPUSubtarget::getRegisterInfo() const { + if (getGeneration() >= SOUTHERN_ISLANDS) + return static_cast<const SISubtarget *>(this)->getRegisterInfo(); + + return static_cast<const R600Subtarget *>(this)->getRegisterInfo(); +} + } // End namespace llvm #endif diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index a8428ade53c..8bc999b9b15 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -37,7 +37,6 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" -#include "llvm/CodeGen/Passes.h" using namespace llvm; @@ -118,10 +117,10 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, Optional<Reloc::Model> RM, CodeModel::Model CM, CodeGenOpt::Level OptLevel) - : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU), - FS, Options, getEffectiveRelocModel(RM), CM, OptLevel), - TLOF(createTLOF(getTargetTriple())), - Subtarget(TT, getTargetCPU(), FS, *this), IntrinsicInfo() { + : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU), + FS, Options, getEffectiveRelocModel(RM), CM, OptLevel), + TLOF(createTLOF(getTargetTriple())), + IntrinsicInfo() { setRequiresStructuredCFG(true); initAsmInfo(); } @@ -137,7 +136,8 @@ R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, TargetOptions Options, Optional<Reloc::Model> RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} + : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, getTargetCPU(), FS, *this) {} //===----------------------------------------------------------------------===// // GCN Target Machine (SI+) @@ -148,7 +148,8 @@ GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, TargetOptions Options, Optional<Reloc::Model> RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} + : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, getTargetCPU(), FS, *this) {} //===----------------------------------------------------------------------===// // AMDGPU Pass Setup @@ -171,16 +172,6 @@ public: return getTM<AMDGPUTargetMachine>(); } - ScheduleDAGInstrs * - createMachineScheduler(MachineSchedContext *C) const override { - const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); - if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) - return createR600MachineScheduler(C); - else if (ST.enableSIScheduler()) - return createSIMachineScheduler(C); - return nullptr; - } - void addEarlyCSEOrGVNPass(); void addStraightLineScalarOptimizationPasses(); void addIRPasses() override; @@ -194,6 +185,11 @@ public: R600PassConfig(TargetMachine *TM, PassManagerBase &PM) : AMDGPUPassConfig(TM, PM) { } + ScheduleDAGInstrs *createMachineScheduler( + MachineSchedContext *C) const override { + return createR600MachineScheduler(C); + } + bool addPreISel() override; void addPreRegAlloc() override; void addPreSched2() override; @@ -204,6 +200,19 @@ class GCNPassConfig final : public AMDGPUPassConfig { public: GCNPassConfig(TargetMachine *TM, PassManagerBase &PM) : AMDGPUPassConfig(TM, PM) { } + + GCNTargetMachine &getGCNTargetMachine() const { + return getTM<GCNTargetMachine>(); + } + + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + const SISubtarget *ST = getGCNTargetMachine().getSubtargetImpl(); + if (ST->enableSIScheduler()) + return createSIMachineScheduler(C); + return nullptr; + } + bool addPreISel() override; void addMachineSSAOptimization() override; bool addInstSelector() override; @@ -296,8 +305,7 @@ void AMDGPUPassConfig::addIRPasses() { addEarlyCSEOrGVNPass(); } -bool -AMDGPUPassConfig::addPreISel() { +bool AMDGPUPassConfig::addPreISel() { addPass(createFlattenCFGPass()); return false; } @@ -401,7 +409,7 @@ bool GCNPassConfig::addRegBankSelect() { #endif void GCNPassConfig::addPreRegAlloc() { - const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); + const SISubtarget &ST = *getGCNTargetMachine().getSubtargetImpl(); // This needs to be run directly before register allocation because // earlier passes might recompute live intervals. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 639d65cc255..77dfd4fdc06 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -15,12 +15,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H -#include "AMDGPUFrameLowering.h" -#include "AMDGPUInstrInfo.h" #include "AMDGPUIntrinsicInfo.h" #include "AMDGPUSubtarget.h" -#include "R600ISelLowering.h" -#include "llvm/IR/DataLayout.h" namespace llvm { @@ -29,11 +25,8 @@ namespace llvm { //===----------------------------------------------------------------------===// class AMDGPUTargetMachine : public LLVMTargetMachine { -private: - protected: std::unique_ptr<TargetLoweringObjectFile> TLOF; - AMDGPUSubtarget Subtarget; AMDGPUIntrinsicInfo IntrinsicInfo; public: @@ -43,10 +36,9 @@ public: CodeGenOpt::Level OL); ~AMDGPUTargetMachine(); - const AMDGPUSubtarget *getSubtargetImpl() const { return &Subtarget; } - const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override { - return &Subtarget; - } + const AMDGPUSubtarget *getSubtargetImpl() const; + const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override; + const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override { return &IntrinsicInfo; } @@ -62,6 +54,8 @@ public: //===----------------------------------------------------------------------===// class R600TargetMachine final : public AMDGPUTargetMachine { +private: + R600Subtarget Subtarget; public: R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, @@ -70,6 +64,14 @@ public: CodeGenOpt::Level OL); TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + const R600Subtarget *getSubtargetImpl() const { + return &Subtarget; + } + + const R600Subtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } }; //===----------------------------------------------------------------------===// @@ -77,6 +79,8 @@ public: //===----------------------------------------------------------------------===// class GCNTargetMachine final : public AMDGPUTargetMachine { +private: + SISubtarget Subtarget; public: GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, @@ -85,8 +89,29 @@ public: CodeGenOpt::Level OL); TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + const SISubtarget *getSubtargetImpl() const { + return &Subtarget; + } + + const SISubtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } }; +inline const AMDGPUSubtarget *AMDGPUTargetMachine::getSubtargetImpl() const { + if (getTargetTriple().getArch() == Triple::amdgcn) + return static_cast<const GCNTargetMachine *>(this)->getSubtargetImpl(); + return static_cast<const R600TargetMachine *>(this)->getSubtargetImpl(); +} + +inline const AMDGPUSubtarget *AMDGPUTargetMachine::getSubtargetImpl( + const Function &F) const { + if (getTargetTriple().getArch() == Triple::amdgcn) + return static_cast<const GCNTargetMachine *>(this)->getSubtargetImpl(F); + return static_cast<const R600TargetMachine *>(this)->getSubtargetImpl(F); +} + } // End namespace llvm #endif diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp index 45f700db33c..16f19d840f6 100644 --- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -160,7 +160,7 @@ public: bool prepare(); bool runOnMachineFunction(MachineFunction &MF) override { - TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo()); + TII = MF.getSubtarget<R600Subtarget>().getInstrInfo(); TRI = &TII->getRegisterInfo(); DEBUG(MF.dump();); OrderedBlks.clear(); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index ad70664473d..f3701022ed2 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -52,6 +52,7 @@ add_llvm_target(AMDGPUCodeGen R600ControlFlowFinalizer.cpp R600EmitClauseMarkers.cpp R600ExpandSpecialInstrs.cpp + R600FrameLowering.cpp R600InstrInfo.cpp R600ISelLowering.cpp R600MachineFunctionInfo.cpp diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 4b6cc6524f5..29b1f79187d 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -25,7 +25,8 @@ using namespace llvm; GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : CurrCycleInstr(nullptr), - MF(MF) { + MF(MF), + ST(MF.getSubtarget<SISubtarget>()) { MaxLookAhead = 5; } @@ -81,8 +82,7 @@ void GCNHazardRecognizer::AdvanceCycle() { if (!CurrCycleInstr) return; - const SIInstrInfo *TII = - static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo()); + const SIInstrInfo *TII = ST.getInstrInfo(); unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr); // Keep track of emitted instructions @@ -114,8 +114,7 @@ void GCNHazardRecognizer::RecedeCycle() { int GCNHazardRecognizer::getWaitStatesSinceDef( unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) { - const TargetRegisterInfo *TRI = - MF.getSubtarget<AMDGPUSubtarget>().getRegisterInfo(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); int WaitStates = -1; for (MachineInstr *MI : EmittedInstrs) { @@ -141,10 +140,8 @@ static void addRegsToSet(iterator_range<MachineInstr::const_mop_iterator> Ops, } int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) { - const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); - // SMEM soft clause are only present on VI+ - if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) return 0; // A soft-clause is any group of consecutive SMEM instructions. The @@ -198,14 +195,14 @@ int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) { } int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { - const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); - const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo()); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); int WaitStatesNeeded = 0; WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD); // This SMRD hazard only affects SI. - if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS) + if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS) return WaitStatesNeeded; // A read of an SGPR by SMRD instruction requires 4 wait states when the @@ -224,10 +221,9 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { } int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { - const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); - const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo()); + const SIInstrInfo *TII = ST.getInstrInfo(); - if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) return 0; const SIRegisterInfo &TRI = TII->getRegisterInfo(); @@ -250,9 +246,7 @@ int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { } int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { - const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); - const SIRegisterInfo *TRI = - static_cast<const SIRegisterInfo*>(ST.getRegisterInfo()); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); // Check for DPP VGPR read after VALU VGPR write. int DppVgprWaitStates = 2; diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index 3c0a80844b5..d82041c5f17 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -24,15 +24,16 @@ class MachineFunction; class MachineInstr; class ScheduleDAG; class SIInstrInfo; +class SISubtarget; class GCNHazardRecognizer final : public ScheduleHazardRecognizer { - - // This variable stores the instruction that has been emitted this cycle. - // It will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is + // This variable stores the instruction that has been emitted this cycle. It + // will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is // called. MachineInstr *CurrCycleInstr; std::list<MachineInstr*> EmittedInstrs; const MachineFunction &MF; + const SISubtarget &ST; int getWaitStatesSinceDef(unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef = diff --git a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp index ca2c7ee4fc9..ef645f908b3 100644 --- a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp +++ b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp @@ -171,7 +171,9 @@ bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; - TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo()); + const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); + TII = ST.getInstrInfo(); + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); BB != BB_E; ++BB) { MachineBasicBlock &MBB = *BB; diff --git a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index 560e2e21224..fd2a688852d 100644 --- a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -39,14 +39,14 @@ struct CFStack { FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3 }; - const AMDGPUSubtarget *ST; + const R600Subtarget *ST; std::vector<StackItem> BranchStack; std::vector<StackItem> LoopStack; unsigned MaxStackSize; unsigned CurrentEntries; unsigned CurrentSubEntries; - CFStack(const AMDGPUSubtarget *st, CallingConv::ID cc) : ST(st), + CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st), // We need to reserve a stack entry for CALL_FS in vertex shaders. MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0), CurrentEntries(0), CurrentSubEntries(0) { } @@ -119,7 +119,7 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { return 0; case CFStack::FIRST_NON_WQM_PUSH: assert(!ST->hasCaymanISA()); - if (ST->getGeneration() <= AMDGPUSubtarget::R700) { + if (ST->getGeneration() <= R600Subtarget::R700) { // +1 For the push operation. // +2 Extra space required. return 3; @@ -132,7 +132,7 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { return 2; } case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: - assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); + assert(ST->getGeneration() >= R600Subtarget::EVERGREEN); // +1 For the push operation. // +1 Extra space required. return 2; @@ -159,7 +159,7 @@ void CFStack::pushBranch(unsigned Opcode, bool isWQM) { // See comment in // CFStack::getSubEntrySize() else if (CurrentEntries > 0 && - ST->getGeneration() > AMDGPUSubtarget::EVERGREEN && + ST->getGeneration() > R600Subtarget::EVERGREEN && !ST->hasCaymanISA() && !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; @@ -220,7 +220,7 @@ private: const R600InstrInfo *TII; const R600RegisterInfo *TRI; unsigned MaxFetchInst; - const AMDGPUSubtarget *ST; + const R600Subtarget *ST; bool IsTrivialInst(MachineInstr *MI) const { switch (MI->getOpcode()) { @@ -234,7 +234,7 @@ private: const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { unsigned Opcode = 0; - bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); + bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN); switch (CFI) { case CF_TC: Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600; @@ -491,10 +491,11 @@ public: : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {} bool runOnMachineFunction(MachineFunction &MF) override { - ST = &MF.getSubtarget<AMDGPUSubtarget>(); + ST = &MF.getSubtarget<R600Subtarget>(); MaxFetchInst = ST->getTexVTXClauseSize(); - TII = static_cast<const R600InstrInfo *>(ST->getInstrInfo()); - TRI = static_cast<const R600RegisterInfo *>(ST->getRegisterInfo()); + TII = ST->getInstrInfo(); + TRI = ST->getRegisterInfo(); + R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); CFStack CFStack(ST, MF.getFunction()->getCallingConv()); diff --git a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp index 8c690a7613c..a2d5f5be280 100644 --- a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp +++ b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp @@ -298,7 +298,8 @@ public: } bool runOnMachineFunction(MachineFunction &MF) override { - TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo()); + const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); + TII = ST.getInstrInfo(); for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); BB != BB_E; ++BB) { diff --git a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp index 211d392e8fc..81219aec3ce 100644 --- a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -29,7 +29,6 @@ using namespace llvm; namespace { class R600ExpandSpecialInstrsPass : public MachineFunctionPass { - private: static char ID; const R600InstrInfo *TII; @@ -66,7 +65,8 @@ void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI, } bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { - TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo()); + const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); + TII = ST.getInstrInfo(); const R600RegisterInfo &TRI = TII->getRegisterInfo(); diff --git a/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp b/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp new file mode 100644 index 00000000000..dd5681ff5e8 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp @@ -0,0 +1,15 @@ +//===----------------------- R600FrameLowering.cpp ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// + +#include "R600FrameLowering.h" + +using namespace llvm; + +R600FrameLowering::~R600FrameLowering() { +} diff --git a/llvm/lib/Target/AMDGPU/R600FrameLowering.h b/llvm/lib/Target/AMDGPU/R600FrameLowering.h new file mode 100644 index 00000000000..5fe4e0d201a --- /dev/null +++ b/llvm/lib/Target/AMDGPU/R600FrameLowering.h @@ -0,0 +1,30 @@ +//===--------------------- R600FrameLowering.h ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_R600FRAMELOWERING_H +#define LLVM_LIB_TARGET_AMDGPU_R600FRAMELOWERING_H + +#include "AMDGPUFrameLowering.h" + +namespace llvm { + +class R600FrameLowering : public AMDGPUFrameLowering { +public: + R600FrameLowering(StackDirection D, unsigned StackAl, int LAO, + unsigned TransAl = 1) : + AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {} + virtual ~R600FrameLowering(); + + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const {} + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {} +}; + +} + +#endif diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 8cbd2d5c28f..f6e40ca3670 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -30,8 +30,8 @@ using namespace llvm; -R600TargetLowering::R600TargetLowering(TargetMachine &TM, - const AMDGPUSubtarget &STI) +R600TargetLowering::R600TargetLowering(const TargetMachine &TM, + const R600Subtarget &STI) : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) { addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); @@ -199,6 +199,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM, setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); } +const R600Subtarget *R600TargetLowering::getSubtarget() const { + return static_cast<const R600Subtarget *>(Subtarget); +} + static inline bool isEOP(MachineBasicBlock::iterator I) { return std::next(I)->getOpcode() == AMDGPU::RETURN; } @@ -208,8 +212,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( MachineFunction * MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock::iterator I = *MI; - const R600InstrInfo *TII = - static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo()); + const R600InstrInfo *TII = getSubtarget()->getInstrInfo(); switch (MI->getOpcode()) { default: @@ -966,7 +969,7 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { SDValue TrigVal = DAG.getNode(TrigNode, DL, VT, DAG.getNode(ISD::FADD, DL, VT, FractPart, DAG.getConstantFP(-0.5, DL, MVT::f32))); - if (Gen >= AMDGPUSubtarget::R700) + if (Gen >= R600Subtarget::R700) return TrigVal; // On R600 hw, COS/SIN input must be between -Pi and Pi. return DAG.getNode(ISD::FMUL, DL, VT, TrigVal, @@ -1439,8 +1442,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // Lowering for indirect addressing const MachineFunction &MF = DAG.getMachineFunction(); - const AMDGPUFrameLowering *TFL = - static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering()); + const R600FrameLowering *TFL = getSubtarget()->getFrameLowering(); unsigned StackWidth = TFL->getStackWidth(MF); Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); @@ -1677,8 +1679,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // Lowering for indirect addressing const MachineFunction &MF = DAG.getMachineFunction(); - const AMDGPUFrameLowering *TFL = - static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering()); + const R600FrameLowering *TFL = getSubtarget()->getFrameLowering(); unsigned StackWidth = TFL->getStackWidth(MF); Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); @@ -1731,7 +1732,7 @@ SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue R600TargetLowering::lowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - const AMDGPUFrameLowering *TFL = Subtarget->getFrameLowering(); + const R600FrameLowering *TFL = getSubtarget()->getFrameLowering(); FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op); @@ -2179,13 +2180,14 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); } -static bool -FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg, - SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) { - const R600InstrInfo *TII = - static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo()); +bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx, + SDValue &Src, SDValue &Neg, SDValue &Abs, + SDValue &Sel, SDValue &Imm, + SelectionDAG &DAG) const { + const R600InstrInfo *TII = getSubtarget()->getInstrInfo(); if (!Src.isMachineOpcode()) return false; + switch (Src.getMachineOpcode()) { case AMDGPU::FNEG_R600: if (!Neg.getNode()) @@ -2310,14 +2312,13 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg, } } - /// \brief Fold the instructions after selecting them SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, SelectionDAG &DAG) const { - const R600InstrInfo *TII = - static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo()); + const R600InstrInfo *TII = getSubtarget()->getInstrInfo(); if (!Node->isMachineOpcode()) return Node; + unsigned Opcode = Node->getMachineOpcode(); SDValue FakeOp; diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h index 8c400c19b88..3fbd72713e3 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h @@ -20,10 +20,14 @@ namespace llvm { class R600InstrInfo; +class R600Subtarget; class R600TargetLowering final : public AMDGPUTargetLowering { public: - R600TargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI); + R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI); + + const R600Subtarget *getSubtarget() const; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock * BB) const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; @@ -86,6 +90,10 @@ private: bool isHWTrueValue(SDValue Op) const; bool isHWFalseValue(SDValue Op) const; + bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, + SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm, + SelectionDAG &DAG) const; + SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override; }; diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp index 9adb800b8fd..85478e00522 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -28,12 +28,8 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "AMDGPUGenDFAPacketizer.inc" -R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st) - : AMDGPUInstrInfo(st), RI() {} - -const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { - return RI; -} +R600InstrInfo::R600InstrInfo(const R600Subtarget &ST) + : AMDGPUInstrInfo(ST), RI(), ST(ST) {} bool R600InstrInfo::isTrig(const MachineInstr &MI) const { return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; @@ -90,10 +86,9 @@ bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, } bool R600InstrInfo::isMov(unsigned Opcode) const { - - switch(Opcode) { - default: return false; + default: + return false; case AMDGPU::MOV: case AMDGPU::MOV_IMM_F32: case AMDGPU::MOV_IMM_I32: @@ -651,7 +646,7 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs) DFAPacketizer * R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const { const InstrItineraryData *II = STI.getInstrItineraryData(); - return static_cast<const AMDGPUSubtarget &>(STI).createDFAPacketizer(II); + return static_cast<const R600Subtarget &>(STI).createDFAPacketizer(II); } static bool @@ -1113,8 +1108,8 @@ bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, const MachineFunction &MF) const { - const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>( - MF.getSubtarget().getFrameLowering()); + const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); + const R600FrameLowering *TFL = ST.getFrameLowering(); unsigned StackWidth = TFL->getStackWidth(MF); int End = getIndirectIndexEnd(MF); @@ -1290,7 +1285,7 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( const { assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); unsigned Opcode; - if (ST.getGeneration() <= AMDGPUSubtarget::R700) + if (ST.getGeneration() <= R600Subtarget::R700) Opcode = AMDGPU::DOT4_r600; else Opcode = AMDGPU::DOT4_eg; diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h index de586e74eaf..420bec89f26 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h @@ -16,23 +16,25 @@ #define LLVM_LIB_TARGET_AMDGPU_R600INSTRINFO_H #include "AMDGPUInstrInfo.h" -#include "R600Defines.h" #include "R600RegisterInfo.h" namespace llvm { - class AMDGPUTargetMachine; - class DFAPacketizer; - class MachineFunction; - class MachineInstr; - class MachineInstrBuilder; - - class R600InstrInfo final : public AMDGPUInstrInfo { - private: +class AMDGPUTargetMachine; +class DFAPacketizer; +class MachineFunction; +class MachineInstr; +class MachineInstrBuilder; +class R600Subtarget; + +class R600InstrInfo final : public AMDGPUInstrInfo { +private: const R600RegisterInfo RI; + const R600Subtarget &ST; - std::vector<std::pair<int, unsigned> > - ExtractSrcs(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV, unsigned &ConstCount) const; - + std::vector<std::pair<int, unsigned>> + ExtractSrcs(MachineInstr *MI, + const DenseMap<unsigned, unsigned> &PV, + unsigned &ConstCount) const; MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, @@ -41,11 +43,11 @@ namespace llvm { unsigned AddrChan) const; MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, - MachineBasicBlock::iterator I, - unsigned ValueReg, unsigned Address, - unsigned OffsetReg, - unsigned AddrChan) const; - public: + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg, + unsigned AddrChan) const; +public: enum BankSwizzle { ALU_VEC_012_SCL_210 = 0, ALU_VEC_021_SCL_122, @@ -55,9 +57,12 @@ namespace llvm { ALU_VEC_210 }; - explicit R600InstrInfo(const AMDGPUSubtarget &st); + explicit R600InstrInfo(const R600Subtarget &); + + const R600RegisterInfo &getRegisterInfo() const { + return RI; + } - const R600RegisterInfo &getRegisterInfo() const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; @@ -109,7 +114,7 @@ namespace llvm { /// If register is ALU_LITERAL, second member is IMM. /// Otherwise, second member value is undefined. SmallVector<std::pair<MachineOperand *, int64_t>, 3> - getSrcs(MachineInstr *MI) const; + getSrcs(MachineInstr *MI) const; unsigned isLegalUpTo( const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, @@ -153,10 +158,14 @@ namespace llvm { DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const override; - bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; + bool ReverseBranchCondition( + SmallVectorImpl<MachineOperand> &Cond) const override; - bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const override; + bool AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const override; unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, @@ -168,20 +177,18 @@ namespace llvm { bool isPredicable(MachineInstr &MI) const override; - bool - isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, - BranchProbability Probability) const override; + bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, + BranchProbability Probability) const override; bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, unsigned ExtraPredCycles, BranchProbability Probability) const override ; - bool - isProfitableToIfCvt(MachineBasicBlock &TMBB, - unsigned NumTCycles, unsigned ExtraTCycles, - MachineBasicBlock &FMBB, - unsigned NumFCycles, unsigned ExtraFCycles, - BranchProbability Probability) const override; + bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, unsigned ExtraTCycles, + MachineBasicBlock &FMBB, + unsigned NumFCycles, unsigned ExtraFCycles, + BranchProbability Probability) const override; bool DefinesPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred) const override; @@ -190,7 +197,7 @@ namespace llvm { ArrayRef<MachineOperand> Pred2) const override; bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, - MachineBasicBlock &FMBB) const override; + MachineBasicBlock &FMBB) const override; bool PredicateInstruction(MachineInstr &MI, ArrayRef<MachineOperand> Pred) const override; @@ -240,10 +247,10 @@ namespace llvm { unsigned getMaxAlusPerClause() const; - ///buildDefaultInstruction - This function returns a MachineInstr with - /// all the instruction modifiers initialized to their default values. - /// You can use this function to avoid manually specifying each instruction - /// modifier operand when building a new instruction. + /// buildDefaultInstruction - This function returns a MachineInstr with all + /// the instruction modifiers initialized to their default values. You can + /// use this function to avoid manually specifying each instruction modifier + /// operand when building a new instruction. /// /// \returns a MachineInstr with all the instruction modifiers initialized /// to their default values. @@ -260,9 +267,9 @@ namespace llvm { unsigned DstReg) const; MachineInstr *buildMovImm(MachineBasicBlock &BB, - MachineBasicBlock::iterator I, - unsigned DstReg, - uint64_t Imm) const; + MachineBasicBlock::iterator I, + unsigned DstReg, + uint64_t Imm) const; MachineInstr *buildMovInstr(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, @@ -303,7 +310,6 @@ namespace llvm { // Helper functions that check the opcode for status information bool isRegisterStore(const MachineInstr &MI) const; bool isRegisterLoad(const MachineInstr &MI) const; - }; namespace AMDGPU { diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index b7a8a808099..431f923f70c 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -336,11 +336,11 @@ def load_param : LoadParamFrag<load>; def load_param_exti8 : LoadParamFrag<az_extloadi8>; def load_param_exti16 : LoadParamFrag<az_extloadi16>; -def isR600 : Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::R700">; +def isR600 : Predicate<"Subtarget->getGeneration() <= R600Subtarget::R700">; def isR600toCayman : Predicate< - "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">; + "Subtarget->getGeneration() <= R600Subtarget::NORTHERN_ISLANDS">; //===----------------------------------------------------------------------===// // R600 SDNodes diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp index b3da0d3b6d8..62318fd3781 100644 --- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "R600MachineScheduler.h" +#include "R600InstrInfo.h" #include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Pass.h" @@ -26,7 +27,7 @@ using namespace llvm; void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness"); DAG = static_cast<ScheduleDAGMILive*>(dag); - const AMDGPUSubtarget &ST = DAG->MF.getSubtarget<AMDGPUSubtarget>(); + const R600Subtarget &ST = DAG->MF.getSubtarget<R600Subtarget>(); TII = static_cast<const R600InstrInfo*>(DAG->TII); TRI = static_cast<const R600RegisterInfo*>(DAG->TRI); VLIW5 = !ST.hasCaymanISA(); @@ -48,8 +49,7 @@ void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc, QSrc.clear(); } -static -unsigned getWFCountLimitedByGPR(unsigned GPRCount) { +static unsigned getWFCountLimitedByGPR(unsigned GPRCount) { assert (GPRCount && "GPRCount cannot be 0"); return 248 / GPRCount; } @@ -349,7 +349,7 @@ void R600SchedStrategy::PrepareNextSlot() { DEBUG(dbgs() << "New Slot\n"); assert (OccupedSlotsMask && "Slot wasn't filled"); OccupedSlotsMask = 0; -// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS) +// if (HwGen == R600Subtarget::NORTHERN_ISLANDS) // OccupedSlotsMask |= 16; InstructionsGroupCandidate.clear(); LoadAlu(); diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.h b/llvm/lib/Target/AMDGPU/R600MachineScheduler.h index bc3eafe31f9..16d5d939708 100644 --- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.h +++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.h @@ -15,15 +15,16 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_R600MACHINESCHEDULER_H #define LLVM_LIB_TARGET_AMDGPU_R600MACHINESCHEDULER_H -#include "R600InstrInfo.h" #include "llvm/CodeGen/MachineScheduler.h" using namespace llvm; namespace llvm { -class R600SchedStrategy final : public MachineSchedStrategy { +class R600InstrInfo; +struct R600RegisterInfo; +class R600SchedStrategy final : public MachineSchedStrategy { const ScheduleDAGMILive *DAG; const R600InstrInfo *TII; const R600RegisterInfo *TRI; diff --git a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp index 50bae54e160..d46ff312997 100644 --- a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -29,6 +29,7 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" +#include "R600Defines.h" #include "R600InstrInfo.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineDominators.h" @@ -317,8 +318,10 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { if (skipFunction(*Fn.getFunction())) return false; - TII = static_cast<const R600InstrInfo *>(Fn.getSubtarget().getInstrInfo()); - MRI = &(Fn.getRegInfo()); + const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); + TII = ST.getInstrInfo(); + MRI = &Fn.getRegInfo(); + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); MBB != MBBe; ++MBB) { MachineBasicBlock *MB = &*MBB; diff --git a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp index 6a34df20fb1..2f16b117de8 100644 --- a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp @@ -56,7 +56,6 @@ public: char R600Packetizer::ID = 0; class R600PacketizerList : public VLIWPacketizerList { - private: const R600InstrInfo *TII; const R600RegisterInfo &TRI; @@ -148,12 +147,12 @@ private: } public: // Ctor. - R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI) + R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST, + MachineLoopInfo &MLI) : VLIWPacketizerList(MF, MLI, nullptr), - TII(static_cast<const R600InstrInfo *>( - MF.getSubtarget().getInstrInfo())), + TII(ST.getInstrInfo()), TRI(TII->getRegisterInfo()) { - VLIW5 = !MF.getSubtarget<AMDGPUSubtarget>().hasCaymanISA(); + VLIW5 = !ST.hasCaymanISA(); } // initPacketizerState - initialize some internal flags. @@ -327,11 +326,13 @@ public: }; bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { - const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); + const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); + const R600InstrInfo *TII = ST.getInstrInfo(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); // Instantiate the packetizer. - R600PacketizerList Packetizer(Fn, MLI); + R600PacketizerList Packetizer(Fn, ST, MLI); // DFA state table should not be empty. assert(Packetizer.getResourceTracker() && "Empty DFA table!"); diff --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp index fb0359cfc65..4c3a3f730fe 100644 --- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp @@ -28,8 +28,8 @@ R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() { BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - const R600InstrInfo *TII = - static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo()); + const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); + const R600InstrInfo *TII = ST.getInstrInfo(); Reserved.set(AMDGPU::ZERO); Reserved.set(AMDGPU::HALF); diff --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.h b/llvm/lib/Target/AMDGPU/R600RegisterInfo.h index d8102e321d8..c1a1402ebe1 100644 --- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.h @@ -31,7 +31,7 @@ struct R600RegisterInfo final : public AMDGPURegisterInfo { /// \brief get the HW encoding for a register's channel. unsigned getHWRegChan(unsigned reg) const; - unsigned getHWRegIndex(unsigned Reg) const override; + unsigned getHWRegIndex(unsigned Reg) const; /// \brief get the register class of the specified type to use in the /// CFGStructurizer diff --git a/llvm/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/llvm/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp index 8883770efe1..65ceff3930a 100644 --- a/llvm/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp +++ b/llvm/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp @@ -20,6 +20,7 @@ //===----------------------------------------------------------------------===// #include "SIInstrInfo.h" +#include "AMDGPUSubtarget.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -61,7 +62,7 @@ FunctionPass *llvm::createSIDebuggerInsertNopsPass() { bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) { // Skip this pass if "amdgpu-debugger-insert-nops" attribute was not // specified. - const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); if (!ST.debuggerInsertNops()) return false; @@ -70,8 +71,7 @@ bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) { return false; // Target instruction info. - const SIInstrInfo *TII = - static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo()); + const SIInstrInfo *TII = ST.getInstrInfo(); // Set containing line numbers that have nop inserted. DenseSet<unsigned> NopInserted; diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 9a2e8b66bd3..23cde1c4bc1 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -48,7 +48,7 @@ namespace llvm { namespace AMDGPU { enum OperandType { /// Operand with register or 32-bit immediate - OPERAND_REG_IMM32 = llvm::MCOI::OPERAND_FIRST_TARGET, + OPERAND_REG_IMM32 = MCOI::OPERAND_FIRST_TARGET, /// Operand with register or inline constant OPERAND_REG_INLINE_C }; diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index e241dd8fe4b..9e0086b7908 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -237,11 +237,10 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, } bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); MachineRegisterInfo &MRI = MF.getRegInfo(); - const SIRegisterInfo *TRI = - static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); SmallVector<MachineInstr *, 16> Worklist; diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 236a3f16d5f..4ba9d73e321 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -16,8 +16,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -298,9 +296,10 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + MachineRegisterInfo &MRI = MF.getRegInfo(); - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); + const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 37a47b45c18..1f3b361175e 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -11,6 +11,8 @@ #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" +#include "AMDGPUSubtarget.h" + #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -52,10 +54,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, if (hasOnlySGPRSpills(MFI, MF.getFrameInfo())) return; - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo *TRI = &TII->getRegisterInfo(); - const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); MachineRegisterInfo &MRI = MF.getRegInfo(); MachineBasicBlock::iterator I = MBB.begin(); @@ -263,6 +264,11 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, } } +void SIFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + +} + void SIFrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index a9152fd8b2a..c2e7a710817 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -23,6 +23,8 @@ public: void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const override; void processFunctionBeforeFrameFinalized( MachineFunction &MF, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 63efbde70c5..60fe8c8bf54 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -52,8 +52,8 @@ static unsigned findFirstFreeSGPR(CCState &CCInfo) { llvm_unreachable("Cannot allocate sgpr"); } -SITargetLowering::SITargetLowering(TargetMachine &TM, - const AMDGPUSubtarget &STI) +SITargetLowering::SITargetLowering(const TargetMachine &TM, + const SISubtarget &STI) : AMDGPUTargetLowering(TM, STI) { addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass); addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); @@ -190,7 +190,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Expand); - if (Subtarget->hasFlatAddressSpace()) { + if (getSubtarget()->hasFlatAddressSpace()) { setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom); setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom); } @@ -205,7 +205,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); - if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { + if (Subtarget->getGeneration() >= SISubtarget::SEA_ISLANDS) { setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FRINT, MVT::f64, Legal); @@ -255,6 +255,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setSchedulingPreference(Sched::RegPressure); } +const SISubtarget *SITargetLowering::getSubtarget() const { + return static_cast<const SISubtarget *>(Subtarget); +} + //===----------------------------------------------------------------------===// // TargetLowering queries //===----------------------------------------------------------------------===// @@ -335,7 +339,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, switch (AS) { case AMDGPUAS::GLOBAL_ADDRESS: { - if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { // Assume the we will use FLAT for all global memory accesses // on VI. // FIXME: This assumption is currently wrong. On VI we still use @@ -363,16 +367,16 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, if (DL.getTypeStoreSize(Ty) < 4) return isLegalMUBUFAddressingMode(AM); - if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) { + if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS) { // SMRD instructions have an 8-bit, dword offset on SI. if (!isUInt<8>(AM.BaseOffs / 4)) return false; - } else if (Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) { + } else if (Subtarget->getGeneration() == SISubtarget::SEA_ISLANDS) { // On CI+, this can also be a 32-bit literal constant offset. If it fits // in 8-bits, it can use a smaller encoding. if (!isUInt<32>(AM.BaseOffs / 4)) return false; - } else if (Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) { + } else if (Subtarget->getGeneration() == SISubtarget::VOLCANIC_ISLANDS) { // On VI, these use the SMEM format and the offset is 20-bit in bytes. if (!isUInt<20>(AM.BaseOffs)) return false; @@ -519,8 +523,7 @@ SITargetLowering::getPreferredVectorAction(EVT VT) const { bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); return TII->isInlineConstant(Imm); } @@ -539,8 +542,7 @@ SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG, unsigned Offset) const { const DataLayout &DL = DAG.getDataLayout(); MachineFunction &MF = DAG.getMachineFunction(); - const SIRegisterInfo *TRI = - static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo()); + const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); @@ -579,13 +581,12 @@ SDValue SITargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - const SIRegisterInfo *TRI = - static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); + const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); MachineFunction &MF = DAG.getMachineFunction(); FunctionType *FType = MF.getFunction()->getFunctionType(); SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); - const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) { const Function *Fn = MF.getFunction(); @@ -740,7 +741,7 @@ SDValue SITargetLowering::LowerFormalArguments( auto *ParamTy = dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex())); - if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS && + if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS && ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { // On SI local pointers are just offsets into LDS, so they are always // less than 16-bits. On CI and newer they could potentially be @@ -1030,7 +1031,7 @@ unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT, } - if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS && + if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS && Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) { report_fatal_error(Twine("invalid register \"" + StringRef(RegName) + "\" for subtarget.")); @@ -1062,8 +1063,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { case AMDGPU::SI_INIT_M0: { - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); BuildMI(*BB, MI->getIterator(), MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addOperand(MI->getOperand(0)); @@ -1073,8 +1073,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( case AMDGPU::BRANCH: return BB; case AMDGPU::GET_GROUPSTATICSIZE: { - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); + MachineFunction *MF = BB->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); DebugLoc DL = MI->getDebugLoc(); @@ -1522,8 +1522,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); auto MFI = MF.getInfo<SIMachineFunctionInfo>(); - const SIRegisterInfo *TRI = - static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); + const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); EVT VT = Op.getValueType(); SDLoc DL(Op); @@ -1562,14 +1561,14 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); case Intrinsic::amdgcn_rsq_legacy: { - if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) return emitRemovedIntrinsicError(DAG, DL, VT); return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1)); } case Intrinsic::amdgcn_rsq_clamp: case AMDGPUIntrinsic::AMDGPU_rsq_clamped: { // Legacy name - if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS) return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1)); Type *Type = VT.getTypeForEVT(*DAG.getContext()); @@ -1730,7 +1729,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::COS_HW, DL, VT, Op.getOperand(1)); case Intrinsic::amdgcn_log_clamp: { - if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS) return SDValue(); DiagnosticInfoUnsupported BadIntrin( @@ -2129,7 +2128,7 @@ SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const { SDValue Scale; - if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) { + if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS) { // Workaround a hardware bug on SI where the condition output from div_scale // is not usable. @@ -2389,7 +2388,7 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N, /// \brief Return true if the given offset Size in bytes can be folded into /// the immediate offsets of a memory instruction for the given address space. static bool canFoldOffset(unsigned OffsetSize, unsigned AS, - const AMDGPUSubtarget &STI) { + const SISubtarget &STI) { switch (AS) { case AMDGPUAS::GLOBAL_ADDRESS: { // MUBUF instructions a 12-bit offset in bytes. @@ -2398,7 +2397,7 @@ static bool canFoldOffset(unsigned OffsetSize, unsigned AS, case AMDGPUAS::CONSTANT_ADDRESS: { // SMRD instructions have an 8-bit offset in dwords on SI and // a 20-bit offset in bytes on VI. - if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (STI.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) return isUInt<20>(OffsetSize); else return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); @@ -2449,7 +2448,7 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N, // If the resulting offset is too large, we can't fold it into the addressing // mode offset. APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue(); - if (!canFoldOffset(Offset.getZExtValue(), AddrSpace, *Subtarget)) + if (!canFoldOffset(Offset.getZExtValue(), AddrSpace, *getSubtarget())) return SDValue(); SelectionDAG &DAG = DCI.DAG; @@ -3013,9 +3012,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, /// Returns -1 if it isn't an immediate, 0 if it's and inline immediate /// and the immediate value if it's a literal immediate int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const { - - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) { if (TII->isInlineConstant(Node->getAPIntValue())) @@ -3163,8 +3160,7 @@ void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node, /// \brief Fold the instructions after selecting them. SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, SelectionDAG &DAG) const { - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); unsigned Opcode = Node->getMachineOpcode(); if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore()) @@ -3182,8 +3178,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, /// bits set in the writemask void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); @@ -3260,8 +3255,7 @@ static SDValue buildSMovImm32(SelectionDAG &DAG, const SDLoc &DL, MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const { - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); // Build the half of the subregister with the constants before building the // full 128-bit register. If we are building multiple resource descriptors, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 4f709a58e4d..2f013198970 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -71,7 +71,9 @@ class SITargetLowering final : public AMDGPUTargetLowering { bool isCFIntrinsic(const SDNode *Intr) const; public: - SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI); + SITargetLowering(const TargetMachine &tm, const SISubtarget &STI); + + const SISubtarget *getSubtarget() const; bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, unsigned IntrinsicID) const override; diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index abc9c4d6184..a6976eeacbe 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -55,6 +55,7 @@ typedef std::pair<unsigned, unsigned> RegInterval; class SIInsertWaits : public MachineFunctionPass { private: + const SISubtarget *ST; const SIInstrInfo *TII; const SIRegisterInfo *TRI; const MachineRegisterInfo *MRI; @@ -136,6 +137,7 @@ public: SIInsertWaits() : MachineFunctionPass(ID), + ST(nullptr), TII(nullptr), TRI(nullptr), ExpInstrTypesSeen(0), @@ -303,8 +305,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, return; } - if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >= - AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM // or SMEM clause, respectively. // @@ -486,8 +487,7 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { - if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() < - AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (ST->getGeneration() < SISubtarget::VOLCANIC_ISLANDS) return; // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG. @@ -514,11 +514,9 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB, bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { bool Changes = false; - TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); - TRI = - static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); - - const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); + ST = &MF.getSubtarget<SISubtarget>(); + TII = ST->getInstrInfo(); + TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); WaitedOn = ZeroCounts; @@ -540,7 +538,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { - if (ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) { + if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) { // There is a hardware bug on CI/SI where SMRD instruction may corrupt // vccz bit, so when we detect that an instruction may read from a // corrupt vccz bit, we need to: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d23b14c8d6c..91fbebfbe5c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -28,8 +28,8 @@ using namespace llvm; -SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st) - : AMDGPUInstrInfo(st), RI() {} +SIInstrInfo::SIInstrInfo(const SISubtarget &ST) + : AMDGPUInstrInfo(ST), RI(), ST(ST) {} //===----------------------------------------------------------------------===// // TargetInstrInfo callbacks @@ -730,9 +730,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB, unsigned Size) const { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); - const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>(); - const SIRegisterInfo *TRI = - static_cast<const SIRegisterInfo*>(ST.getRegisterInfo()); + const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); DebugLoc DL = MBB.findDebugLoc(MI); unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF); unsigned WavefrontSize = ST.getWavefrontSize(); @@ -913,8 +912,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { } case AMDGPU::SI_PC_ADD_REL_OFFSET: { - const SIRegisterInfo *TRI = - static_cast<const SIRegisterInfo *>(ST.getRegisterInfo()); + const SIRegisterInfo *TRI + = static_cast<const SIRegisterInfo *>(ST.getRegisterInfo()); MachineFunction &MF = *MBB.getParent(); unsigned Reg = MI->getOperand(0).getReg(); unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0); @@ -1460,7 +1459,7 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr *MI, // Target-independent instructions do not have an implicit-use of EXEC, even // when they operate on VGPRs. Treating EXEC modifications as scheduling // boundaries prevents incorrect movements of such instructions. - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo(); if (MI->modifiesRegister(AMDGPU::EXEC, TRI)) return true; @@ -2422,8 +2421,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { } else { // This instructions is the _OFFSET variant, so we need to convert it to // ADDR64. - assert(MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() - < AMDGPUSubtarget::VOLCANIC_ISLANDS && + assert(MBB.getParent()->getSubtarget<SISubtarget>().getGeneration() + < SISubtarget::VOLCANIC_ISLANDS && "FIXME: Need to emit flat atomics here"); MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata); @@ -2547,37 +2546,37 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { } case AMDGPU::S_LSHL_B32: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHLREV_B32_e64; swapOperands(Inst); } break; case AMDGPU::S_ASHR_I32: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_ASHRREV_I32_e64; swapOperands(Inst); } break; case AMDGPU::S_LSHR_B32: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHRREV_B32_e64; swapOperands(Inst); } break; case AMDGPU::S_LSHL_B64: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHLREV_B64; swapOperands(Inst); } break; case AMDGPU::S_ASHR_I64: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_ASHRREV_I64; swapOperands(Inst); } break; case AMDGPU::S_LSHR_B64: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHRREV_B64; swapOperands(Inst); } @@ -3096,7 +3095,7 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { if (ST.isAmdHsaOS()) { RsrcDataFormat |= (1ULL << 56); - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) // Set MTYPE = 2 RsrcDataFormat |= (2ULL << 59); } @@ -3117,7 +3116,7 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const { // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. // Clear them unless we want a huge stride. - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; return Rsrc23; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index a20b8178888..3f6a8d26651 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -25,6 +25,7 @@ namespace llvm { class SIInstrInfo final : public AMDGPUInstrInfo { private: const SIRegisterInfo RI; + const SISubtarget &ST; // The the inverse predicate should have the negative value. enum BranchPredicate { @@ -91,9 +92,9 @@ protected: unsigned OpIdx1) const override; public: - explicit SIInstrInfo(const AMDGPUSubtarget &st); + explicit SIInstrInfo(const SISubtarget &); - const SIRegisterInfo &getRegisterInfo() const override { + const SIRegisterInfo &getRegisterInfo() const { return RI; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 1802a905d80..301690d5fd2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// def isCI : Predicate<"Subtarget->getGeneration() " - ">= AMDGPUSubtarget::SEA_ISLANDS">; + ">= SISubtarget::SEA_ISLANDS">; def isCIOnly : Predicate<"Subtarget->getGeneration() ==" - "AMDGPUSubtarget::SEA_ISLANDS">, + "SISubtarget::SEA_ISLANDS">, AssemblerPredicate <"FeatureSeaIslands">; def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">; @@ -78,9 +78,9 @@ class smrd<bits<8> si, bits<8> vi = si> { field bits<8> VI = vi; } -// Execpt for the NONE field, this must be kept in sync with the SISubtarget enum -// in AMDGPUInstrInfo.cpp -def SISubtarget { +// Execpt for the NONE field, this must be kept in sync with the +// SIEncodingFamily enum in AMDGPUInstrInfo.cpp +def SIEncodingFamily { int NONE = -1; int SI = 0; int VI = 1; @@ -425,7 +425,7 @@ class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{ }]>; class SGPRImm <dag frag> : PatLeaf<frag, [{ - if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) { + if (Subtarget->getGeneration() < SISubtarget::SOUTHERN_ISLANDS) { return false; } const SIRegisterInfo *SIRI = @@ -681,15 +681,15 @@ class EXPCommon : InstSI< multiclass EXP_m { let isPseudo = 1, isCodeGenOnly = 1 in { - def "" : EXPCommon, SIMCInstr <"exp", SISubtarget.NONE> ; + def "" : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.NONE> ; } - def _si : EXPCommon, SIMCInstr <"exp", SISubtarget.SI>, EXPe { + def _si : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.SI>, EXPe { let DecoderNamespace="SICI"; let DisableDecoder = DisableSIDecoder; } - def _vi : EXPCommon, SIMCInstr <"exp", SISubtarget.VI>, EXPe_vi { + def _vi : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.VI>, EXPe_vi { let DecoderNamespace="VI"; let DisableDecoder = DisableVIDecoder; } @@ -701,7 +701,7 @@ multiclass EXP_m { class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : SOP1 <outs, ins, "", pattern>, - SIMCInstr<opName, SISubtarget.NONE> { + SIMCInstr<opName, SIEncodingFamily.NONE> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -709,7 +709,7 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> : SOP1 <outs, ins, asm, []>, SOP1e <op.SI>, - SIMCInstr<opName, SISubtarget.SI> { + SIMCInstr<opName, SIEncodingFamily.SI> { let isCodeGenOnly = 0; let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; @@ -719,7 +719,7 @@ class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> : class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> : SOP1 <outs, ins, asm, []>, SOP1e <op.VI>, - SIMCInstr<opName, SISubtarget.VI> { + SIMCInstr<opName, SIEncodingFamily.VI> { let isCodeGenOnly = 0; let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; @@ -791,7 +791,7 @@ multiclass SOP1_64_32 <sop1 op, string opName, list<dag> pattern> : SOP1_m < class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> : SOP2<outs, ins, "", pattern>, - SIMCInstr<opName, SISubtarget.NONE> { + SIMCInstr<opName, SIEncodingFamily.NONE> { let isPseudo = 1; let isCodeGenOnly = 1; let Size = 4; @@ -806,7 +806,7 @@ class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> : class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> : SOP2<outs, ins, asm, []>, SOP2e<op.SI>, - SIMCInstr<opName, SISubtarget.SI> { + SIMCInstr<opName, SIEncodingFamily.SI> { let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; @@ -815,7 +815,7 @@ class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> : class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> : SOP2<outs, ins, asm, []>, SOP2e<op.VI>, - SIMCInstr<opName, SISubtarget.VI> { + SIMCInstr<opName, SIEncodingFamily.VI> { let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; @@ -875,7 +875,7 @@ class SOPC_64_32<bits<7> op, string opName, list<dag> pattern = []> class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : SOPK <outs, ins, "", pattern>, - SIMCInstr<opName, SISubtarget.NONE> { + SIMCInstr<opName, SIEncodingFamily.NONE> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -883,7 +883,7 @@ class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> : SOPK <outs, ins, asm, []>, SOPKe <op.SI>, - SIMCInstr<opName, SISubtarget.SI> { + SIMCInstr<opName, SIEncodingFamily.SI> { let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; @@ -893,7 +893,7 @@ class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> : class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> : SOPK <outs, ins, asm, []>, SOPKe <op.VI>, - SIMCInstr<opName, SISubtarget.VI> { + SIMCInstr<opName, SIEncodingFamily.VI> { let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; @@ -951,7 +951,7 @@ multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins, def _si : SOPK <outs, ins, asm, []>, SOPK64e <op.SI>, - SIMCInstr<opName, SISubtarget.SI> { + SIMCInstr<opName, SIEncodingFamily.SI> { let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; @@ -960,7 +960,7 @@ multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins, def _vi : SOPK <outs, ins, asm, []>, SOPK64e <op.VI>, - SIMCInstr<opName, SISubtarget.VI> { + SIMCInstr<opName, SIEncodingFamily.VI> { let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; @@ -973,7 +973,7 @@ multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins, class SMRD_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : SMRD <outs, ins, "", pattern>, - SIMCInstr<opName, SISubtarget.NONE> { + SIMCInstr<opName, SIEncodingFamily.NONE> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -982,7 +982,7 @@ class SMRD_IMM_Real_si <bits<5> op, string opName, dag outs, dag ins, string asm> : SMRD <outs, ins, asm, []>, SMRD_IMMe <op>, - SIMCInstr<opName, SISubtarget.SI> { + SIMCInstr<opName, SIEncodingFamily.SI> { let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; @@ -992,7 +992,7 @@ class SMRD_SOFF_Real_si <bits<5> op, string opName, dag outs, dag ins, string asm> : SMRD <outs, ins, asm, []>, SMRD_SOFFe <op>, - SIMCInstr<opName, SISubtarget.SI> { + SIMCInstr<opName, SIEncodingFamily.SI> { let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; @@ -1003,7 +1003,7 @@ class SMRD_IMM_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm, list<dag> pattern = []> : SMRD <outs, ins, asm, pattern>, SMEM_IMMe_vi <op>, - SIMCInstr<opName, SISubtarget.VI> { + SIMCInstr<opName, SIEncodingFamily.VI> { let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; @@ -1013,7 +1013,7 @@ class SMRD_SOFF_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm, list<dag> pattern = []> : SMRD <outs, ins, asm, pattern>, SMEM_SOFFe_vi <op>, - SIMCInstr<opName, SISubtarget.VI> { + SIMCInstr<opName, SIEncodingFamily.VI> { let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; @@ -1342,7 +1342,7 @@ class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasFloatModifiers, ValueType DstVT = i32> { - string dst = !if(HasDst, + string dst = !if(HasDst, !if(!eq(DstVT.Size, 1), "$sdst", // use $sdst for VOPC "$vdst"), @@ -1350,8 +1350,8 @@ class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasFloatModifiers, string src0 = !if(HasFloatModifiers, "$src0_fmodifiers", "$src0_imodifiers"); string src1 = !if(HasFloatModifiers, "$src1_fmodifiers", "$src1_imodifiers"); string args = !if(!eq(NumSrcArgs, 0), - "", - !if(!eq(NumSrcArgs, 1), + "", + !if(!eq(NumSrcArgs, 1), ", "#src0#"$clamp", ", "#src0#", "#src1#"$clamp" ) @@ -1652,7 +1652,7 @@ class AtomicNoRet <string noRetOp, bit isRet> { class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : VOP1Common <outs, ins, "", pattern>, VOP <opName>, - SIMCInstr <opName#"_e32", SISubtarget.NONE>, + SIMCInstr <opName#"_e32", SIEncodingFamily.NONE>, MnemonicAlias<opName#"_e32", opName> { let isPseudo = 1; let isCodeGenOnly = 1; @@ -1663,7 +1663,7 @@ class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> : VOP1<op.SI, outs, ins, asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.SI> { + SIMCInstr <opName#"_e32", SIEncodingFamily.SI> { let AssemblerPredicate = SIAssemblerPredicate; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; @@ -1671,7 +1671,7 @@ class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> : class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> : VOP1<op.VI, outs, ins, asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.VI> { + SIMCInstr <opName#"_e32", SIEncodingFamily.VI> { let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; @@ -1741,7 +1741,7 @@ multiclass VOP1SI_m <vop1 op, string opName, VOPProfile p, list<dag> pattern, class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : VOP2Common <outs, ins, "", pattern>, VOP <opName>, - SIMCInstr<opName#"_e32", SISubtarget.NONE>, + SIMCInstr<opName#"_e32", SIEncodingFamily.NONE>, MnemonicAlias<opName#"_e32", opName> { let isPseudo = 1; let isCodeGenOnly = 1; @@ -1749,7 +1749,7 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> : VOP2 <op.SI, outs, ins, opName#asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.SI> { + SIMCInstr <opName#"_e32", SIEncodingFamily.SI> { let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; @@ -1757,7 +1757,7 @@ class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> : class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> : VOP2 <op.VI, outs, ins, opName#asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.VI> { + SIMCInstr <opName#"_e32", SIEncodingFamily.VI> { let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; @@ -1830,7 +1830,7 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName, bit HasMods = 0, bit VOP3Only = 0> : VOP3Common <outs, ins, "", pattern, HasMods, VOP3Only>, VOP <opName>, - SIMCInstr<opName#"_e64", SISubtarget.NONE>, + SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>, MnemonicAlias<opName#"_e64", opName> { let isPseudo = 1; let isCodeGenOnly = 1; @@ -1843,7 +1843,7 @@ class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName, bit HasMods = 0, bit VOP3Only = 0> : VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>, VOP3e <op>, - SIMCInstr<opName#"_e64", SISubtarget.SI> { + SIMCInstr<opName#"_e64", SIEncodingFamily.SI> { let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; @@ -1853,7 +1853,7 @@ class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName, bit HasMods = 0, bit VOP3Only = 0> : VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>, VOP3e_vi <op>, - SIMCInstr <opName#"_e64", SISubtarget.VI> { + SIMCInstr <opName#"_e64", SIEncodingFamily.VI> { let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; @@ -1863,7 +1863,7 @@ class VOP3_C_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName, bit HasMods = 0, bit VOP3Only = 0> : VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>, VOP3ce <op>, - SIMCInstr<opName#"_e64", SISubtarget.SI> { + SIMCInstr<opName#"_e64", SIEncodingFamily.SI> { let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; @@ -1873,7 +1873,7 @@ class VOP3_C_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName, bit HasMods = 0, bit VOP3Only = 0> : VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>, VOP3ce_vi <op>, - SIMCInstr <opName#"_e64", SISubtarget.VI> { + SIMCInstr <opName#"_e64", SIEncodingFamily.VI> { let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; @@ -1883,7 +1883,7 @@ class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName, bit HasMods = 0, bit VOP3Only = 0> : VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>, VOP3be <op>, - SIMCInstr<opName#"_e64", SISubtarget.SI> { + SIMCInstr<opName#"_e64", SIEncodingFamily.SI> { let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; @@ -1893,7 +1893,7 @@ class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName, bit HasMods = 0, bit VOP3Only = 0> : VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>, VOP3be_vi <op>, - SIMCInstr <opName#"_e64", SISubtarget.VI> { + SIMCInstr <opName#"_e64", SIEncodingFamily.VI> { let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; @@ -1903,7 +1903,7 @@ class VOP3e_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName, bit HasMods = 0, bit VOP3Only = 0> : VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>, VOP3e <op>, - SIMCInstr<opName#"_e64", SISubtarget.SI> { + SIMCInstr<opName#"_e64", SIEncodingFamily.SI> { let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; @@ -1913,7 +1913,7 @@ class VOP3e_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName, bit HasMods = 0, bit VOP3Only = 0> : VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>, VOP3e_vi <op>, - SIMCInstr <opName#"_e64", SISubtarget.VI> { + SIMCInstr <opName#"_e64", SIEncodingFamily.VI> { let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; @@ -2039,11 +2039,11 @@ multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins, string asm, list<dag> pattern = []> { let isPseudo = 1, isCodeGenOnly = 1 in { def "" : VOPAnyCommon <outs, ins, "", pattern>, - SIMCInstr<opName, SISubtarget.NONE>; + SIMCInstr<opName, SIEncodingFamily.NONE>; } def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>, - SIMCInstr <opName, SISubtarget.SI> { + SIMCInstr <opName, SIEncodingFamily.SI> { let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; @@ -2052,7 +2052,7 @@ multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins, def _vi : VOP3Common <outs, ins, asm, []>, VOP3e_vi <op.VI3>, VOP3DisableFields <1, 0, 0>, - SIMCInstr <opName, SISubtarget.VI> { + SIMCInstr <opName, SIEncodingFamily.VI> { let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; @@ -2221,7 +2221,7 @@ multiclass VOP2MADK <vop2 op, string opName, VOPProfile P, list<dag> pattern = [ let isCodeGenOnly = 0 in { def _si : VOP2Common <P.Outs, P.Ins32, !strconcat(opName, P.Asm32), []>, - SIMCInstr <opName#"_e32", SISubtarget.SI>, + SIMCInstr <opName#"_e32", SIEncodingFamily.SI>, VOP2_MADKe <op.SI> { let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; @@ -2230,7 +2230,7 @@ let isCodeGenOnly = 0 in { def _vi : VOP2Common <P.Outs, P.Ins32, !strconcat(opName, P.Asm32), []>, - SIMCInstr <opName#"_e32", SISubtarget.VI>, + SIMCInstr <opName#"_e32", SIEncodingFamily.VI>, VOP2_MADKe <op.VI> { let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; @@ -2242,7 +2242,7 @@ let isCodeGenOnly = 0 in { class VOPC_Pseudo <dag ins, list<dag> pattern, string opName> : VOPCCommon <ins, "", pattern>, VOP <opName>, - SIMCInstr<opName#"_e32", SISubtarget.NONE> { + SIMCInstr<opName#"_e32", SIEncodingFamily.NONE> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -2260,7 +2260,7 @@ multiclass VOPC_m <vopc op, dag ins, string op_asm, list<dag> pattern, let AssemblerPredicates = [isSICI] in { def _si : VOPC<op.SI, ins, asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.SI> { + SIMCInstr <opName#"_e32", SIEncodingFamily.SI> { let Defs = !if(DefExec, [VCC, EXEC], [VCC]); let hasSideEffects = DefExec; let SchedRW = sched; @@ -2272,7 +2272,7 @@ multiclass VOPC_m <vopc op, dag ins, string op_asm, list<dag> pattern, let AssemblerPredicates = [isVI] in { def _vi : VOPC<op.VI, ins, asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.VI> { + SIMCInstr <opName#"_e32", SIEncodingFamily.VI> { let Defs = !if(DefExec, [VCC, EXEC], [VCC]); let hasSideEffects = DefExec; let SchedRW = sched; @@ -2459,7 +2459,7 @@ class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat< class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : VINTRPCommon <outs, ins, "", pattern>, - SIMCInstr<opName, SISubtarget.NONE> { + SIMCInstr<opName, SIEncodingFamily.NONE> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -2468,7 +2468,7 @@ class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins, string asm> : VINTRPCommon <outs, ins, asm, []>, VINTRPe <op>, - SIMCInstr<opName, SISubtarget.SI> { + SIMCInstr<opName, SIEncodingFamily.SI> { let AssemblerPredicate = SIAssemblerPredicate; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; @@ -2478,7 +2478,7 @@ class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins, string asm> : VINTRPCommon <outs, ins, asm, []>, VINTRPe_vi <op>, - SIMCInstr<opName, SISubtarget.VI> { + SIMCInstr<opName, SIEncodingFamily.VI> { let AssemblerPredicate = VIAssemblerPredicate; let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; @@ -2499,7 +2499,7 @@ multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm, class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : DS <outs, ins, "", pattern>, - SIMCInstr <opName, SISubtarget.NONE> { + SIMCInstr <opName, SIEncodingFamily.NONE> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -2507,7 +2507,7 @@ class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> : DS <outs, ins, asm, []>, DSe <op>, - SIMCInstr <opName, SISubtarget.SI> { + SIMCInstr <opName, SIEncodingFamily.SI> { let isCodeGenOnly = 0; let AssemblerPredicates = [isSICI]; let DecoderNamespace="SICI"; @@ -2517,7 +2517,7 @@ class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> : class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> : DS <outs, ins, asm, []>, DSe_vi <op>, - SIMCInstr <opName, SISubtarget.VI> { + SIMCInstr <opName, SIEncodingFamily.VI> { let isCodeGenOnly = 0; let AssemblerPredicates = [isVI]; let DecoderNamespace="VI"; @@ -2730,7 +2730,7 @@ multiclass DS_1A <bits<8> op, string opName, class MTBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : MTBUF <outs, ins, "", pattern>, - SIMCInstr<opName, SISubtarget.NONE> { + SIMCInstr<opName, SIEncodingFamily.NONE> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -2739,7 +2739,7 @@ class MTBUF_Real_si <bits<3> op, string opName, dag outs, dag ins, string asm> : MTBUF <outs, ins, asm, []>, MTBUFe <op>, - SIMCInstr<opName, SISubtarget.SI> { + SIMCInstr<opName, SIEncodingFamily.SI> { let DecoderNamespace="SICI"; let DisableDecoder = DisableSIDecoder; } @@ -2747,7 +2747,7 @@ class MTBUF_Real_si <bits<3> op, string opName, dag outs, dag ins, class MTBUF_Real_vi <bits<4> op, string opName, dag outs, dag ins, string asm> : MTBUF <outs, ins, asm, []>, MTBUFe_vi <op>, - SIMCInstr <opName, SISubtarget.VI> { + SIMCInstr <opName, SIEncodingFamily.VI> { let DecoderNamespace="VI"; let DisableDecoder = DisableVIDecoder; } @@ -2821,7 +2821,7 @@ class MUBUFAddr64Table <bit is_addr64, string suffix = ""> { class MUBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : MUBUF <outs, ins, "", pattern>, - SIMCInstr<opName, SISubtarget.NONE> { + SIMCInstr<opName, SIEncodingFamily.NONE> { let isPseudo = 1; let isCodeGenOnly = 1; @@ -2839,7 +2839,7 @@ class MUBUF_Real_si <mubuf op, string opName, dag outs, dag ins, string asm> : MUBUF <outs, ins, asm, []>, MUBUFe <op.SI>, - SIMCInstr<opName, SISubtarget.SI> { + SIMCInstr<opName, SIEncodingFamily.SI> { let lds = 0; let AssemblerPredicate = SIAssemblerPredicate; let DecoderNamespace="SICI"; @@ -2850,7 +2850,7 @@ class MUBUF_Real_vi <mubuf op, string opName, dag outs, dag ins, string asm> : MUBUF <outs, ins, asm, []>, MUBUFe_vi <op.VI>, - SIMCInstr<opName, SISubtarget.VI> { + SIMCInstr<opName, SIEncodingFamily.VI> { let lds = 0; let AssemblerPredicate = VIAssemblerPredicate; let DecoderNamespace="VI"; @@ -3174,21 +3174,21 @@ class flat <bits<7> ci, bits<7> vi = ci> { class FLAT_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : FLAT <0, outs, ins, "", pattern>, - SIMCInstr<opName, SISubtarget.NONE> { + SIMCInstr<opName, SIEncodingFamily.NONE> { let isPseudo = 1; let isCodeGenOnly = 1; } class FLAT_Real_ci <bits<7> op, string opName, dag outs, dag ins, string asm> : FLAT <op, outs, ins, asm, []>, - SIMCInstr<opName, SISubtarget.SI> { + SIMCInstr<opName, SIEncodingFamily.SI> { let AssemblerPredicate = isCIOnly; let DecoderNamespace="CI"; } class FLAT_Real_vi <bits<7> op, string opName, dag outs, dag ins, string asm> : FLAT <op, outs, ins, asm, []>, - SIMCInstr<opName, SISubtarget.VI> { + SIMCInstr<opName, SIEncodingFamily.VI> { let AssemblerPredicate = VIAssemblerPredicate; let DecoderNamespace="VI"; let DisableDecoder = DisableVIDecoder; @@ -3375,7 +3375,7 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc, class MIMG_Atomic_Real_si<mimg op, string name, string asm, RegisterClass data_rc, RegisterClass addr_rc> : MIMG_Atomic_Helper<asm, data_rc, addr_rc>, - SIMCInstr<name, SISubtarget.SI>, + SIMCInstr<name, SIEncodingFamily.SI>, MIMGe<op.SI> { let isCodeGenOnly = 0; let AssemblerPredicates = [isSICI]; @@ -3386,7 +3386,7 @@ class MIMG_Atomic_Real_si<mimg op, string name, string asm, class MIMG_Atomic_Real_vi<mimg op, string name, string asm, RegisterClass data_rc, RegisterClass addr_rc> : MIMG_Atomic_Helper<asm, data_rc, addr_rc>, - SIMCInstr<name, SISubtarget.VI>, + SIMCInstr<name, SIEncodingFamily.VI>, MIMGe<op.VI> { let isCodeGenOnly = 0; let AssemblerPredicates = [isVI]; @@ -3398,7 +3398,7 @@ multiclass MIMG_Atomic_Helper_m <mimg op, string name, string asm, RegisterClass data_rc, RegisterClass addr_rc> { let isPseudo = 1, isCodeGenOnly = 1 in { def "" : MIMG_Atomic_Helper<asm, data_rc, addr_rc>, - SIMCInstr<name, SISubtarget.NONE>; + SIMCInstr<name, SIEncodingFamily.NONE>; } let ssamp = 0 in { @@ -3573,8 +3573,9 @@ def getMCOpcodeGen : InstrMapping { let FilterClass = "SIMCInstr"; let RowFields = ["PseudoInstr"]; let ColFields = ["Subtarget"]; - let KeyCol = [!cast<string>(SISubtarget.NONE)]; - let ValueCols = [[!cast<string>(SISubtarget.SI)],[!cast<string>(SISubtarget.VI)]]; + let KeyCol = [!cast<string>(SIEncodingFamily.NONE)]; + let ValueCols = [[!cast<string>(SIEncodingFamily.SI)], + [!cast<string>(SIEncodingFamily.VI)]]; } def getAddr64Inst : InstrMapping { diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index db1038d9fdf..1f7c7ee0e86 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -19,10 +19,10 @@ int P20 = 1; def INTERP : InterpSlots; def isGCN : Predicate<"Subtarget->getGeneration() " - ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">, + ">= SISubtarget::SOUTHERN_ISLANDS">, AssemblerPredicate<"FeatureGCN">; def isSI : Predicate<"Subtarget->getGeneration() " - "== AMDGPUSubtarget::SOUTHERN_ISLANDS">, + "== SISubtarget::SOUTHERN_ISLANDS">, AssemblerPredicate<"FeatureSouthernIslands">; diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index ec92265c03e..0b9b29a54b5 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -36,6 +36,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIRegisterInfo.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -61,7 +62,6 @@ private: MachineRegisterInfo *MRI; LiveIntervals *LIS; - static bool offsetsCanBeCombined(unsigned Offset0, unsigned Offset1, unsigned EltSize); @@ -411,9 +411,10 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; - const TargetSubtargetInfo &STM = MF.getSubtarget(); - TRI = static_cast<const SIRegisterInfo *>(STM.getRegisterInfo()); - TII = static_cast<const SIInstrInfo *>(STM.getInstrInfo()); + const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); + TII = STM.getInstrInfo(); + TRI = &TII->getRegisterInfo(); + MRI = &MF.getRegInfo(); LIS = &getAnalysis<LiveIntervals>(); diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index cd382f5ab26..60a0bc3380e 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -590,9 +590,10 @@ bool SILowerControlFlow::indirectDst(MachineInstr &MI) { } bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { - TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); - TRI = - static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + TII = ST.getInstrInfo(); + TRI = &TII->getRegisterInfo(); + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); bool HaveKill = false; diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp index ca942f60d1c..dc1d20ddb27 100644 --- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -66,9 +66,10 @@ FunctionPass *llvm::createSILowerI1CopiesPass() { bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); + const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); + std::vector<unsigned> I1Defs; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index faab72ca9f9..6daebbaa0e4 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -79,7 +79,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkItemIDX(false), WorkItemIDY(false), WorkItemIDZ(false) { - const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); const Function *F = MF.getFunction(); PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); @@ -129,7 +129,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) // We don't need to worry about accessing spills with flat instructions. // TODO: On VI where we must use flat for global, we should be able to omit // this if it is never used for generic access. - if (HasStackObjects && ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS && + if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS && ST.isAmdHsaOS()) FlatScratchInit = true; @@ -178,16 +178,17 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { return FlatScratchInitUserSGPR; } -SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( +SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg ( MachineFunction *MF, unsigned FrameIndex, unsigned SubIdx) { if (!EnableSpillSGPRToVGPR) return SpilledReg(); + const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + MachineFrameInfo *FrameInfo = MF->getFrameInfo(); - const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>( - MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo()); MachineRegisterInfo &MRI = MF->getRegInfo(); int64_t Offset = FrameInfo->getObjectOffset(FrameIndex); Offset += SubIdx * 4; diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp index cefacb0dc1b..147eb99dc2a 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// +#include "AMDGPU.h" #include "SIMachineScheduler.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index a3a19efcff8..5a934ecec7a 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -15,6 +15,7 @@ #include "SIRegisterInfo.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" @@ -24,8 +25,8 @@ using namespace llvm; static unsigned getMaxWaveCountPerSIMD(const MachineFunction &MF) { - const SIMachineFunctionInfo& MFI = *MF.getInfo<SIMachineFunctionInfo>(); - const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); + const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); unsigned SIMDPerCU = 4; unsigned MaxInvocationsPerWave = SIMDPerCU * ST.getWavefrontSize(); @@ -34,13 +35,13 @@ static unsigned getMaxWaveCountPerSIMD(const MachineFunction &MF) { } static unsigned getMaxWorkGroupSGPRCount(const MachineFunction &MF) { - const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); unsigned MaxWaveCountPerSIMD = getMaxWaveCountPerSIMD(MF); unsigned TotalSGPRCountPerSIMD, AddressableSGPRCount, SGPRUsageAlignment; unsigned ReservedSGPRCount; - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { TotalSGPRCountPerSIMD = 800; AddressableSGPRCount = 102; SGPRUsageAlignment = 16; @@ -56,7 +57,7 @@ static unsigned getMaxWorkGroupSGPRCount(const MachineFunction &MF) { MaxSGPRCount = alignDown(MaxSGPRCount, SGPRUsageAlignment); if (ST.hasSGPRInitBug()) - MaxSGPRCount = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; + MaxSGPRCount = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; return std::min(MaxSGPRCount - ReservedSGPRCount, AddressableSGPRCount); } @@ -195,7 +196,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Reserve registers for debugger usage if "amdgpu-debugger-reserve-trap-regs" // attribute was specified. - const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); if (ST.debuggerReserveRegs()) { unsigned ReservedVGPRFirst = MaxWorkGroupVGPRCount - MFI->getDebuggerReservedVGPRCount(); @@ -210,10 +211,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const { - const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>(); + const SISubtarget &STI = MF.getSubtarget<SISubtarget>(); // FIXME: We should adjust the max number of waves based on LDS size. - unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(), - STI.getMaxWavesPerCU()); + unsigned SGPRLimit = getNumSGPRsAllowed(STI, STI.getMaxWavesPerCU()); unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU()); unsigned VSLimit = SGPRLimit + VGPRLimit; @@ -274,8 +274,8 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, DL = Ins->getDebugLoc(); MachineFunction *MF = MBB->getParent(); - const AMDGPUSubtarget &Subtarget = MF->getSubtarget<AMDGPUSubtarget>(); - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>(); + const SIInstrInfo *TII = Subtarget.getInstrInfo(); if (Offset == 0) { BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg) @@ -297,9 +297,8 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, MachineBasicBlock *MBB = MI.getParent(); MachineFunction *MF = MBB->getParent(); - const AMDGPUSubtarget &Subtarget = MF->getSubtarget<AMDGPUSubtarget>(); - const SIInstrInfo *TII - = static_cast<const SIInstrInfo *>(Subtarget.getInstrInfo()); + const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>(); + const SIInstrInfo *TII = Subtarget.getInstrInfo(); #ifndef NDEBUG // FIXME: Is it possible to be storing a frame index to itself? @@ -409,10 +408,11 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, bool IsKill = SrcDst->isKill(); MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MI->getParent()->getParent(); - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo()); + const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); - bool IsStore = TII->get(LoadStoreOp).mayStore(); + bool IsStore = MI->mayStore(); bool RanOutOfSGPRs = false; bool Scavenged = false; @@ -489,8 +489,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo *FrameInfo = MF->getFrameInfo(); - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo()); + const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); MachineOperand &FIOp = MI->getOperand(FIOperandNum); @@ -662,10 +662,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } } -unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const { - return getEncodingValue(Reg) & 0xff; -} - // FIXME: This is very slow. It might be worth creating a map from physreg to // register class. const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { @@ -900,7 +896,7 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, enum PreloadedValue Value) const { const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); (void)ST; switch (Value) { case SIRegisterInfo::WORKGROUP_ID_X: @@ -971,9 +967,9 @@ unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const { } } -unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen, +unsigned SIRegisterInfo::getNumSGPRsAllowed(const SISubtarget &ST, unsigned WaveCount) const { - if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { switch (WaveCount) { case 10: return 80; case 9: return 80; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 45d14e97cda..c02c5a96b01 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -12,17 +12,17 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H #include "AMDGPURegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Debug.h" namespace llvm { +class SISubtarget; +class MachineRegisterInfo; + struct SIRegisterInfo final : public AMDGPURegisterInfo { private: unsigned SGPR32SetID; @@ -80,7 +80,9 @@ public: unsigned FIOperandNum, RegScavenger *RS) const override; - unsigned getHWRegIndex(unsigned Reg) const override; + unsigned getHWRegIndex(unsigned Reg) const { + return getEncodingValue(Reg) & 0xff; + } /// \brief Return the 'base' register class for this register. /// e.g. SGPR0 => SReg_32, VGPR => VGPR_32 SGPR0_SGPR1 -> SReg_32, etc. @@ -179,8 +181,7 @@ public: /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount /// concurrent waves. - unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen, - unsigned WaveCount) const; + unsigned getNumSGPRsAllowed(const SISubtarget &ST, unsigned WaveCount) const; unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC) const; diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 1e13e98f6c7..517533ee734 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -199,9 +199,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { return false; MachineRegisterInfo &MRI = MF.getRegInfo(); - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); + std::vector<unsigned> I1Defs; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 79796853497..3315c4cd0e9 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -476,8 +476,10 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { ExecExports.clear(); LiveMaskQueries.clear(); - TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); - TRI = static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + + TII = ST.getInstrInfo(); + TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); char GlobalFlags = analyzeFunction(MF); |