diff options
author | Ron Lieberman <ronlieb.g@gmail.com> | 2018-11-16 01:13:34 +0000 |
---|---|---|
committer | Ron Lieberman <ronlieb.g@gmail.com> | 2018-11-16 01:13:34 +0000 |
commit | cac749ac884cfab87a0b2a805b43530c26a627c8 (patch) | |
tree | 483b52cfd6f80f9842c2ce8132146e9dd1b798e0 /llvm/lib | |
parent | 5d14b72d5c3f5169fd896ce91378e377f464b18b (diff) | |
download | bcm5719-llvm-cac749ac884cfab87a0b2a805b43530c26a627c8.tar.gz bcm5719-llvm-cac749ac884cfab87a0b2a805b43530c26a627c8.zip |
[AMDGPU] Add FixupVectorISel pass, currently Supports SREGs in GLOBAL LD/ST
Add a pass to fixup various vector ISel issues.
Currently we handle converting GLOBAL_{LOAD|STORE}_*
and GLOBAL_Atomic_* instructions into their _SADDR variants.
This involves feeding the sreg into the saddr field of the new instruction.
llvm-svn: 347008
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/FLATInstructions.td | 23 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp | 224 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 1 |
8 files changed, 263 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 07e5d97dff9..07ae2bee49b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -41,6 +41,7 @@ FunctionPass *createSIAnnotateControlFlowPass(); FunctionPass *createSIFoldOperandsPass(); FunctionPass *createSIPeepholeSDWAPass(); FunctionPass *createSILowerI1CopiesPass(); +FunctionPass *createSIFixupVectorISelPass(); FunctionPass *createSIShrinkInstructionsPass(); FunctionPass *createSILoadStoreOptimizerPass(); FunctionPass *createSIWholeQuadModePass(); @@ -122,6 +123,9 @@ extern char &SIFixSGPRCopiesID; void initializeSIFixVGPRCopiesPass(PassRegistry &); extern char &SIFixVGPRCopiesID; +void initializeSIFixupVectorISelPass(PassRegistry &); +extern char &SIFixupVectorISelID; + void initializeSILowerI1CopiesPass(PassRegistry &); extern char &SILowerI1CopiesID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 403dace533a..cdd3017e18d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -161,6 +161,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() { initializeSILowerI1CopiesPass(*PR); initializeSIFixSGPRCopiesPass(*PR); initializeSIFixVGPRCopiesPass(*PR); + initializeSIFixupVectorISelPass(*PR); initializeSIFoldOperandsPass(*PR); initializeSIPeepholeSDWAPass(*PR); initializeSIShrinkInstructionsPass(*PR); @@ -813,6 +814,7 @@ bool GCNPassConfig::addInstSelector() { AMDGPUPassConfig::addInstSelector(); addPass(&SIFixSGPRCopiesID); addPass(createSILowerI1CopiesPass()); + addPass(createSIFixupVectorISelPass()); return false; } diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index 3c87dc18827..bb1096bc1de 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -96,6 +96,7 @@ add_llvm_target(AMDGPUCodeGen SIAnnotateControlFlow.cpp SIDebuggerInsertNops.cpp SIFixSGPRCopies.cpp + SIFixupVectorISel.cpp SIFixVGPRCopies.cpp SIFixWWMLiveness.cpp SIFoldOperands.cpp diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 18e8b8a1c2d..44040d352e6 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -121,6 +121,11 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> : let Inst{63-56} = !if(ps.has_vdst, vdst, ?); } +class GlobalSaddrTable <bit is_saddr, string Name = ""> { + bit IsSaddr = is_saddr; + string SaddrOp = Name; +} + // TODO: Is exec allowed for saddr? The disabled value 0x7f is the // same encoding value as exec_hi, so it isn't possible to use that if // saddr is 32-bit (which isn't handled here yet). @@ -171,15 +176,19 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { let is_flat_global = 1 in { - def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>; - def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1, 1>; + def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>, + GlobalSaddrTable<0, opName>; + def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1, 1>, + GlobalSaddrTable<1, opName>; } } multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> { let is_flat_global = 1 in { - def "" : FLAT_Store_Pseudo<opName, regClass, 1, 1>; - def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1, 1>; + def "" : FLAT_Store_Pseudo<opName, regClass, 1, 1>, + GlobalSaddrTable<0, opName>; + def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1, 1>, + GlobalSaddrTable<1, opName>; } } @@ -262,6 +271,7 @@ multiclass FLAT_Atomic_Pseudo< (outs), (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, SLC:$slc), " $vaddr, $vdata$offset$slc">, + GlobalSaddrTable<0, opName>, AtomicNoRet <opName, 0> { let PseudoInstr = NAME; } @@ -272,6 +282,7 @@ multiclass FLAT_Atomic_Pseudo< " $vdst, $vaddr, $vdata$offset glc$slc", [(set vt:$vdst, (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>, + GlobalSaddrTable<0, opName#"_rtn">, AtomicNoRet <opName, 1>; } @@ -287,6 +298,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< (outs), (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc), " $vaddr, $vdata, off$offset$slc">, + GlobalSaddrTable<0, opName>, AtomicNoRet <opName, 0> { let has_saddr = 1; let PseudoInstr = NAME; @@ -296,6 +308,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< (outs), (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, SLC:$slc), " $vaddr, $vdata, $saddr$offset$slc">, + GlobalSaddrTable<1, opName>, AtomicNoRet <opName#"_saddr", 0> { let has_saddr = 1; let enabled_saddr = 1; @@ -317,6 +330,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN< " $vdst, $vaddr, $vdata, off$offset glc$slc", [(set vt:$vdst, (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>, + GlobalSaddrTable<0, opName#"_rtn">, AtomicNoRet <opName, 1> { let has_saddr = 1; } @@ -325,6 +339,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN< (outs vdst_rc:$vdst), (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, SLC:$slc), " $vdst, $vaddr, $vdata, $saddr$offset glc$slc">, + GlobalSaddrTable<1, opName#"_rtn">, AtomicNoRet <opName#"_saddr", 1> { let has_saddr = 1; let enabled_saddr = 1; diff --git a/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp b/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp new file mode 100644 index 00000000000..3da043f3709 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp @@ -0,0 +1,224 @@ +//===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +/// SIFixupVectorISel pass cleans up post ISEL Vector issues. +/// Currently this will convert GLOBAL_{LOAD|STORE}_* +/// and GLOBAL_Atomic_* instructions into their _SADDR variants, +/// feeding the sreg into the saddr field of the new instruction. +/// We currently handle a REG_SEQUENCE feeding the vaddr +/// and decompose it into a base and index. +/// +/// Transform: +/// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32 +/// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32, +/// %24:vgpr_32, %19:sreg_64_xexec +/// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1 +/// %11:vreg_64 = COPY %16:vreg_64 +/// %10:vgpr_32 = GLOBAL_LOAD_DWORD killed %11:vreg_64, 16, 0, 0 +/// Into: +/// %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64, 36, 0 +/// %14:vreg_64 = REG_SEQUENCE %6:vgpr_32, %sub0, %15:vgpr_32, %sub1 +/// %10:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %14:vreg_64, %4:sreg_64_xexec,16... +/// +//===----------------------------------------------------------------------===// +// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" +#define DEBUG_TYPE "si-fixup-vector-isel" + +using namespace llvm; + +STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities"); +STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted"); + +namespace { + +class SIFixupVectorISel : public MachineFunctionPass { +public: + static char ID; + +public: + SIFixupVectorISel() : MachineFunctionPass(ID) { + initializeSIFixupVectorISelPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS(SIFixupVectorISel, DEBUG_TYPE, + "SI Fixup Vector ISel", false, false) + +char SIFixupVectorISel::ID = 0; + +char &llvm::SIFixupVectorISelID = SIFixupVectorISel::ID; + +FunctionPass *llvm::createSIFixupVectorISelPass() { + return new SIFixupVectorISel(); +} + +static bool findSRegBaseAndIndex(MachineOperand *Op, + unsigned &BaseReg, + unsigned &IndexReg, + MachineRegisterInfo &MRI, + const SIRegisterInfo *TRI) { + SmallVector<MachineOperand *, 8> Worklist; + Worklist.push_back(Op); + while (!Worklist.empty()) { + MachineOperand *WOp = Worklist.pop_back_val(); + if (!WOp->isReg() || + !TargetRegisterInfo::isVirtualRegister(WOp->getReg())) + continue; + MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg()); + switch (DefInst->getOpcode()) { + default: + continue; + case AMDGPU::COPY: + Worklist.push_back(&DefInst->getOperand(1)); + break; + case AMDGPU::REG_SEQUENCE: + if (DefInst->getNumOperands() != 5) + continue; + Worklist.push_back(&DefInst->getOperand(1)); + Worklist.push_back(&DefInst->getOperand(3)); + break; + case AMDGPU::V_ADD_I32_e64: + // The V_ADD_* and its analogous V_ADDCV_* are generated by + // a previous pass which lowered from an ADD_64_PSEUDO, + // which generates subregs to break up the 64 bit args. + if (DefInst->getOperand(2).getSubReg() != AMDGPU::NoSubRegister) + continue; + BaseReg = DefInst->getOperand(2).getReg(); + if (DefInst->getOperand(3).getSubReg() != AMDGPU::NoSubRegister) + continue; + IndexReg = DefInst->getOperand(3).getReg(); + // Chase the IndexReg. + MachineInstr *MI = MRI.getUniqueVRegDef(IndexReg); + if (!MI || !MI->isCopy()) + continue; + // Make sure the reg class is 64 bit for Index. + // If the Index register is a subreg, we want it to reference + // a 64 bit register which we will use as the Index reg. + const TargetRegisterClass *IdxRC, *BaseRC; + IdxRC = MRI.getRegClass(MI->getOperand(1).getReg()); + if (AMDGPU::getRegBitWidth(IdxRC->getID()) != 64) + continue; + IndexReg = MI->getOperand(1).getReg(); + // Chase the BaseReg. + MI = MRI.getUniqueVRegDef(BaseReg); + if (!MI || !MI->isCopy()) + continue; + // Make sure the register class is 64 bit for Base. + BaseReg = MI->getOperand(1).getReg(); + BaseRC = MRI.getRegClass(BaseReg); + if (AMDGPU::getRegBitWidth(BaseRC->getID()) != 64) + continue; + // Make sure Base is SReg and Index is VReg. + if (!TRI->isSGPRReg(MRI, BaseReg)) + return false; + if (!TRI->hasVGPRs(MRI.getRegClass(IndexReg))) + return false; + // clear any killed flags on Index and Base regs, used later. + MRI.clearKillFlags(IndexReg); + MRI.clearKillFlags(BaseReg); + return true; + } + } + return false; +} + +// Identify Global LOAD|STORE/ATOMIC and try to convert to _SADDR. +static bool fixupGlobalSaddr(MachineBasicBlock &MBB, + MachineFunction &MF, + MachineRegisterInfo &MRI, + const GCNSubtarget &ST, + const SIInstrInfo *TII, + const SIRegisterInfo *TRI) { + bool FuncModified = false; + MachineBasicBlock::iterator I, Next; + for (I = MBB.begin(); I != MBB.end(); I = Next) { + Next = std::next(I); + MachineInstr &MI = *I; + int NewOpcd = AMDGPU::getGlobalSaddrOp(MI.getOpcode()); + if (NewOpcd < 0) + continue; + // Update our statistics on opportunities seen. + ++NumSGPRGlobalOccurs; + LLVM_DEBUG(dbgs() << "Global Mem opp " << MI << '\n'); + // Need a Base and Index or we cant transform to _SADDR. + unsigned BaseReg = 0; + unsigned IndexReg = 0; + MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); + if (!findSRegBaseAndIndex(Op, BaseReg, IndexReg, MRI, TRI)) + continue; + ++NumSGPRGlobalSaddrs; + FuncModified = true; + // Create the new _SADDR Memory instruction. + bool HasVdst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst) != nullptr; + MachineOperand *VData = TII->getNamedOperand(MI, AMDGPU::OpName::vdata); + MachineInstr *NewGlob = nullptr; + NewGlob = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcd)); + if (HasVdst) + NewGlob->addOperand(MF, MI.getOperand(0)); + NewGlob->addOperand(MF, MachineOperand::CreateReg(IndexReg, false)); + if (VData) + NewGlob->addOperand(MF, *VData); + NewGlob->addOperand(MF, MachineOperand::CreateReg(BaseReg, false)); + NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::offset)); + + MachineOperand *Glc = TII->getNamedOperand(MI, AMDGPU::OpName::glc); + // Atomics dont have a GLC, so omit the field if not there. + if (Glc) + NewGlob->addOperand(MF, *Glc); + NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc)); + // _D16 have an vdst_in operand, copy it in. + MachineOperand *VDstInOp = TII->getNamedOperand(MI, + AMDGPU::OpName::vdst_in); + if (VDstInOp) + NewGlob->addOperand(MF, *VDstInOp); + NewGlob->copyImplicitOps(MF, MI); + NewGlob->cloneMemRefs(MF, MI); + // Remove the old Global Memop instruction. + MI.eraseFromParent(); + LLVM_DEBUG(dbgs() << "New Global Mem " << *NewGlob << '\n'); + } + return FuncModified; +} + +bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + MachineRegisterInfo &MRI = MF.getRegInfo(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + + bool FuncModified = false; + for (MachineBasicBlock &MBB : MF) { + // Cleanup missed Saddr opportunites from ISel. + FuncModified |= fixupGlobalSaddr(MBB, MF, MRI, ST, TII, TRI); + } + return FuncModified; +} diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 2f51b199950..aad9e8402ca 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -954,6 +954,9 @@ namespace AMDGPU { LLVM_READONLY int getSOPKOp(uint16_t Opcode); + LLVM_READONLY + int getGlobalSaddrOp(uint16_t Opcode); + const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 0859989b039..265a05706a8 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2017,6 +2017,15 @@ def getAtomicNoRetOp : InstrMapping { let ValueCols = [["0"]]; } +// Maps a GLOBAL to its SADDR form. +def getGlobalSaddrOp : InstrMapping { + let FilterClass = "GlobalSaddrTable"; + let RowFields = ["SaddrOp"]; + let ColFields = ["IsSaddr"]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; +} + include "SIInstructions.td" include "DSInstructions.td" diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index fc1e71299c5..c43389a13b8 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -755,6 +755,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::VS_64RegClassID: case AMDGPU::SReg_64RegClassID: case AMDGPU::VReg_64RegClassID: + case AMDGPU::SReg_64_XEXECRegClassID: return 64; case AMDGPU::VReg_96RegClassID: return 96; |