diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp | 181 |
1 files changed, 181 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp b/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp new file mode 100644 index 00000000000..69cafef4a35 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp @@ -0,0 +1,181 @@ +//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Any MIMG instructions that use tfe or lwe require an initialization of the +/// result register that will be written in the case of a memory access failure +/// The required code is also added to tie this init code to the result of the +/// img instruction +/// +//===----------------------------------------------------------------------===// +// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIInstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "si-img-init" + +using namespace llvm; + +namespace { + +class SIAddIMGInit : public MachineFunctionPass { +public: + static char ID; + +public: + SIAddIMGInit() : MachineFunctionPass(ID) { + initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false) + +char SIAddIMGInit::ID = 0; + +char &llvm::SIAddIMGInitID = SIAddIMGInit::ID; + +FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); } + +bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); + const SIRegisterInfo *RI = ST.getRegisterInfo(); + bool Changed = false; + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; + ++BI) { + MachineBasicBlock &MBB = *BI; + MachineBasicBlock::iterator I, Next; + for (I = MBB.begin(); I != MBB.end(); I = Next) { + Next = std::next(I); + MachineInstr &MI = *I; + + auto Opcode = MI.getOpcode(); + if (TII->isMIMG(Opcode) && !MI.mayStore()) { + MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe); + MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe); + MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16); + + // Check for instructions that don't have tfe or lwe fields + // There shouldn't be any at this point. + assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction"); + + unsigned TFEVal = TFE->getImm(); + unsigned LWEVal = LWE->getImm(); + unsigned D16Val = D16 ? D16->getImm() : 0; + + if (TFEVal || LWEVal) { + // At least one of TFE or LWE are non-zero + // We have to insert a suitable initialization of the result value and + // tie this to the dest of the image instruction. + + const DebugLoc &DL = MI.getDebugLoc(); + + int DstIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); + + // Calculate which dword we have to initialize to 0. + MachineOperand *MO_Dmask = + TII->getNamedOperand(MI, AMDGPU::OpName::dmask); + + // check that dmask operand is found. + assert(MO_Dmask && "Expected dmask operand in instruction"); + + unsigned dmask = MO_Dmask->getImm(); + // Determine the number of active lanes taking into account the + // Gather4 special case + unsigned ActiveLanes = + TII->isGather4(Opcode) ? 4 : countPopulation(dmask); + + // Subreg indices are counted from 1 + // When D16 then we want next whole VGPR after write data. + static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected"); + + bool Packed = !ST.hasUnpackedD16VMem(); + + unsigned InitIdx = + D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1; + + // Abandon attempt if the dst size isn't large enough + // - this is in fact an error but this is picked up elsewhere and + // reported correctly. + uint32_t DstSize = + RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32; + if (DstSize < InitIdx) + continue; + + // Create a register for the intialization value. + unsigned PrevDst = + MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); + unsigned NewDst = 0; // Final initialized value will be in here + + // If PRTStrictNull feature is enabled (the default) then initialize + // all the result registers to 0, otherwise just the error indication + // register (VGPRn+1) + unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1; + unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx; + + if (DstSize == 1) { + // In this case we can just initialize the result directly + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst) + .addImm(0); + NewDst = PrevDst; + } else { + BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst); + for (; SizeLeft; SizeLeft--, CurrIdx++) { + NewDst = + MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); + // Initialize dword + unsigned SubReg = + MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg) + .addImm(0); + // Insert into the super-reg + BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst) + .addReg(PrevDst) + .addReg(SubReg) + .addImm(CurrIdx); + + PrevDst = NewDst; + } + } + + // Add as an implicit operand + MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit); + + // Tie the just added implicit operand to the dst + MI.tieOperands(DstIdx, MI.getNumOperands() - 1); + + Changed = true; + } + } + } + } + + return Changed; +} |