//===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file /// This file implements the targeting of the RegisterBankInfo class for /// AMDGPU. /// \todo This should be generated by TableGen. //===----------------------------------------------------------------------===// #include "AMDGPURegisterBankInfo.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUSubtarget.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" #define GET_TARGET_REGBANK_IMPL #include "AMDGPUGenRegisterBank.inc" // This file will be TableGen'ed at some point. #include "AMDGPUGenRegisterBankInfo.def" using namespace llvm; AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI) : AMDGPUGenRegisterBankInfo(), TRI(static_cast(&TRI)) { // HACK: Until this is fully tablegen'd. static bool AlreadyInit = false; if (AlreadyInit) return; AlreadyInit = true; const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID); (void)RBSGPR; assert(&RBSGPR == &AMDGPU::SGPRRegBank); const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID); (void)RBVGPR; assert(&RBVGPR == &AMDGPU::VGPRRegBank); } unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, const RegisterBank &Src, unsigned Size) const { if (Dst.getID() == AMDGPU::SGPRRegBankID && Src.getID() == AMDGPU::VGPRRegBankID) { return std::numeric_limits::max(); } // SGPRRegBank with size 1 is actually vcc or another 64-bit sgpr written by // the valu. if (Size == 1 && Dst.getID() == AMDGPU::SCCRegBankID && (Src.getID() == AMDGPU::SGPRRegBankID || Src.getID() == AMDGPU::VGPRRegBankID || Src.getID() == AMDGPU::VCCRegBankID)) return std::numeric_limits::max(); if (Dst.getID() == AMDGPU::SCCRegBankID && Src.getID() == AMDGPU::VCCRegBankID) return std::numeric_limits::max(); return RegisterBankInfo::copyCost(Dst, Src, Size); } unsigned AMDGPURegisterBankInfo::getBreakDownCost( const ValueMapping &ValMapping, const RegisterBank *CurBank) const { assert(ValMapping.NumBreakDowns == 2 && ValMapping.BreakDown[0].Length == 32 && ValMapping.BreakDown[0].StartIdx == 0 && ValMapping.BreakDown[1].Length == 32 && ValMapping.BreakDown[1].StartIdx == 32 && ValMapping.BreakDown[0].RegBank == ValMapping.BreakDown[1].RegBank); // 32-bit extract of a 64-bit value is just access of a subregister, so free. // TODO: Cost of 0 hits assert, though it's not clear it's what we really // want. // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR // alignment restrictions, but this probably isn't important. return 1; } const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass( const TargetRegisterClass &RC) const { if (TRI->isSGPRClass(&RC)) return getRegBank(AMDGPU::SGPRRegBankID); return getRegBank(AMDGPU::VGPRRegBankID); } RegisterBankInfo::InstructionMappings AMDGPURegisterBankInfo::getInstrAlternativeMappings( const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); InstructionMappings AltMappings; switch (MI.getOpcode()) { case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); if (Size != 64) break; const InstructionMapping &SSMapping = getInstructionMapping( 1, 1, getOperandsMapping( {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}), 3); // Num Operands AltMappings.push_back(&SSMapping); const InstructionMapping &VVMapping = getInstructionMapping( 2, 2, getOperandsMapping( {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}), 3); // Num Operands AltMappings.push_back(&VVMapping); const InstructionMapping &SVMapping = getInstructionMapping( 3, 3, getOperandsMapping( {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size), AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}), 3); // Num Operands AltMappings.push_back(&SVMapping); // SGPR in LHS is slightly preferrable, so make it VS more expnesive than // SV. const InstructionMapping &VSMapping = getInstructionMapping( 3, 4, getOperandsMapping( {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}), 3); // Num Operands AltMappings.push_back(&VSMapping); break; } case TargetOpcode::G_LOAD: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); // FIXME: Should we be hard coding the size for these mappings? const InstructionMapping &SSMapping = getInstructionMapping( 1, 1, getOperandsMapping( {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), 2); // Num Operands AltMappings.push_back(&SSMapping); const InstructionMapping &VVMapping = getInstructionMapping( 2, 1, getOperandsMapping( {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}), 2); // Num Operands AltMappings.push_back(&VVMapping); // FIXME: Should this be the pointer-size (64-bits) or the size of the // register that will hold the bufffer resourc (128-bits). const InstructionMapping &VSMapping = getInstructionMapping( 3, 1, getOperandsMapping( {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), 2); // Num Operands AltMappings.push_back(&VSMapping); return AltMappings; } case TargetOpcode::G_ICMP: { unsigned Size = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI); const InstructionMapping &SSMapping = getInstructionMapping(1, 1, getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), nullptr, // Predicate operand. AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}), 4); // Num Operands AltMappings.push_back(&SSMapping); const InstructionMapping &SVMapping = getInstructionMapping(2, 1, getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr, // Predicate operand. AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}), 4); // Num Operands AltMappings.push_back(&SVMapping); const InstructionMapping &VSMapping = getInstructionMapping(3, 1, getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr, // Predicate operand. AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}), 4); // Num Operands AltMappings.push_back(&VSMapping); const InstructionMapping &VVMapping = getInstructionMapping(4, 1, getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr, // Predicate operand. AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}), 4); // Num Operands AltMappings.push_back(&VVMapping); return AltMappings; } case TargetOpcode::G_SELECT: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); const InstructionMapping &SSMapping = getInstructionMapping(1, 1, getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}), 4); // Num Operands AltMappings.push_back(&SSMapping); const InstructionMapping &VVMapping = getInstructionMapping(2, 1, getOperandsMapping({AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}), 4); // Num Operands AltMappings.push_back(&VVMapping); return AltMappings; } case TargetOpcode::G_UADDE: case TargetOpcode::G_USUBE: case TargetOpcode::G_SADDE: case TargetOpcode::G_SSUBE: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); const InstructionMapping &SSMapping = getInstructionMapping(1, 1, getOperandsMapping( {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1)}), 5); // Num Operands AltMappings.push_back(&SSMapping); const InstructionMapping &VVMapping = getInstructionMapping(2, 1, getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}), 5); // Num Operands AltMappings.push_back(&VVMapping); return AltMappings; } case AMDGPU::G_BRCOND: { assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1); const InstructionMapping &SMapping = getInstructionMapping( 1, 1, getOperandsMapping( {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), nullptr}), 2); // Num Operands AltMappings.push_back(&SMapping); const InstructionMapping &VMapping = getInstructionMapping( 1, 1, getOperandsMapping( {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr }), 2); // Num Operands AltMappings.push_back(&VMapping); return AltMappings; } default: break; } return RegisterBankInfo::getInstrAlternativeMappings(MI); } void AMDGPURegisterBankInfo::split64BitValueForMapping( MachineIRBuilder &B, SmallVector &Regs, LLT HalfTy, unsigned Reg) const { assert(HalfTy.getSizeInBits() == 32); MachineRegisterInfo *MRI = B.getMRI(); unsigned LoLHS = MRI->createGenericVirtualRegister(HalfTy); unsigned HiLHS = MRI->createGenericVirtualRegister(HalfTy); const RegisterBank *Bank = getRegBank(Reg, *MRI, *TRI); MRI->setRegBank(LoLHS, *Bank); MRI->setRegBank(HiLHS, *Bank); Regs.push_back(LoLHS); Regs.push_back(HiLHS); B.buildInstr(AMDGPU::G_UNMERGE_VALUES) .addDef(LoLHS) .addDef(HiLHS) .addUse(Reg); } /// Replace the current type each register in \p Regs has with \p NewTy static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef Regs, LLT NewTy) { for (unsigned Reg : Regs) { assert(MRI.getType(Reg).getSizeInBits() == NewTy.getSizeInBits()); MRI.setType(Reg, NewTy); } } static LLT getHalfSizedType(LLT Ty) { if (Ty.isVector()) { assert(Ty.getNumElements() % 2 == 0); return LLT::scalarOrVector(Ty.getNumElements() / 2, Ty.getElementType()); } assert(Ty.getSizeInBits() % 2 == 0); return LLT::scalar(Ty.getSizeInBits() / 2); } /// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If /// any of the required SGPR operands are VGPRs, perform a waterfall loop to /// execute the instruction for each unique combination of values in all lanes /// in the wave. The block will be split such that new blocks void AMDGPURegisterBankInfo::executeInWaterfallLoop( MachineInstr &MI, MachineRegisterInfo &MRI, ArrayRef OpIndices) const { MachineFunction *MF = MI.getParent()->getParent(); const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); MachineBasicBlock::iterator I(MI); MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); assert(OpIndices.size() == 1 && "need to implement support for multiple operands"); // Use a set to avoid extra readfirstlanes in the case where multiple operands // are the same register. SmallSet SGPROperandRegs; for (unsigned Op : OpIndices) { assert(MI.getOperand(Op).isUse()); unsigned Reg = MI.getOperand(Op).getReg(); const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI); if (OpBank->getID() == AMDGPU::VGPRRegBankID) SGPROperandRegs.insert(Reg); } // No operands need to be replaced, so no need to loop. if (SGPROperandRegs.empty()) return; MachineIRBuilder B(MI); SmallVector ResultRegs; SmallVector InitResultRegs; SmallVector PhiRegs; for (MachineOperand &Def : MI.defs()) { LLT ResTy = MRI.getType(Def.getReg()); const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI); ResultRegs.push_back(Def.getReg()); unsigned InitReg = B.buildUndef(ResTy).getReg(0); unsigned PhiReg = MRI.createGenericVirtualRegister(ResTy); InitResultRegs.push_back(InitReg); PhiRegs.push_back(PhiReg); MRI.setRegBank(PhiReg, *DefBank); MRI.setRegBank(InitReg, *DefBank); } unsigned SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); // Don't bother using generic instructions/registers for the exec mask. B.buildInstr(TargetOpcode::IMPLICIT_DEF) .addDef(InitSaveExecReg); // Save the EXEC mask BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg) .addReg(AMDGPU::EXEC); unsigned PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); unsigned NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); unsigned CondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); // To insert the loop we need to split the block. Move everything before this // point to a new block, and insert a new empty block before this instruction. MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock(); MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock(); MachineBasicBlock *RestoreExecBB = MF->CreateMachineBasicBlock(); MachineFunction::iterator MBBI(MBB); ++MBBI; MF->insert(MBBI, LoopBB); MF->insert(MBBI, RestoreExecBB); MF->insert(MBBI, RemainderBB); LoopBB->addSuccessor(RestoreExecBB); LoopBB->addSuccessor(LoopBB); // Move the rest of the block into a new block. RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB); RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end()); MBB.addSuccessor(LoopBB); RestoreExecBB->addSuccessor(RemainderBB); B.setInsertPt(*LoopBB, LoopBB->end()); B.buildInstr(TargetOpcode::PHI) .addDef(PhiExec) .addReg(InitSaveExecReg) .addMBB(&MBB) .addReg(NewExec) .addMBB(LoopBB); for (auto Result : zip(InitResultRegs, ResultRegs, PhiRegs)) { B.buildInstr(TargetOpcode::G_PHI) .addDef(std::get<2>(Result)) .addReg(std::get<0>(Result)) // Initial value / implicit_def .addMBB(&MBB) .addReg(std::get<1>(Result)) // Mid-loop value. .addMBB(LoopBB); } // Move the instruction into the loop. LoopBB->splice(LoopBB->end(), &MBB, I); I = std::prev(LoopBB->end()); for (MachineOperand &Op : MI.uses()) { if (!Op.isReg()) continue; assert(!Op.isDef()); if (SGPROperandRegs.count(Op.getReg())) { unsigned CurrentLaneOpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); MRI.setType(CurrentLaneOpReg, LLT::scalar(32)); // FIXME assert(MRI.getType(Op.getReg())== LLT::scalar(32) && "need to implement support for other types"); constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI); // Read the next variant <- also loop target. BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentLaneOpReg) .addReg(Op.getReg()); // FIXME: Need to and each conditon // Compare the just read SGPR value to all possible operand values. B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64) .addDef(CondReg) .addReg(CurrentLaneOpReg) .addReg(Op.getReg()); Op.setReg(CurrentLaneOpReg); } } // Update EXEC, save the original EXEC value to VCC. B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64) .addDef(NewExec) .addReg(CondReg, RegState::Kill); MRI.setSimpleHint(NewExec, CondReg); // Update EXEC, switch all done bits to 0 and all todo bits to 1. B.buildInstr(AMDGPU::S_XOR_B64_term) .addDef(AMDGPU::EXEC) .addReg(AMDGPU::EXEC) .addReg(NewExec); // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use // s_cbranch_scc0? // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover. B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ) .addMBB(LoopBB); // Restore the EXEC mask B.buildInstr(AMDGPU::S_MOV_B64_term) .addDef(AMDGPU::EXEC) .addReg(SaveExecReg); } void AMDGPURegisterBankInfo::applyMappingImpl( const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); unsigned Opc = MI.getOpcode(); MachineRegisterInfo &MRI = OpdMapper.getMRI(); switch (Opc) { case AMDGPU::G_SELECT: { unsigned DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); if (DstTy.getSizeInBits() != 64) break; LLT HalfTy = getHalfSizedType(DstTy); SmallVector DefRegs(OpdMapper.getVRegs(0)); SmallVector Src0Regs(OpdMapper.getVRegs(1)); SmallVector Src1Regs(OpdMapper.getVRegs(2)); SmallVector Src2Regs(OpdMapper.getVRegs(3)); // All inputs are SGPRs, nothing special to do. if (DefRegs.empty()) { assert(Src1Regs.empty() && Src2Regs.empty()); break; } MachineIRBuilder B(MI); if (Src0Regs.empty()) Src0Regs.push_back(MI.getOperand(1).getReg()); else { assert(Src0Regs.size() == 1); } if (Src1Regs.empty()) split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg()); else { setRegsToType(MRI, Src1Regs, HalfTy); } if (Src2Regs.empty()) split64BitValueForMapping(B, Src2Regs, HalfTy, MI.getOperand(3).getReg()); else setRegsToType(MRI, Src2Regs, HalfTy); setRegsToType(MRI, DefRegs, HalfTy); B.buildSelect(DefRegs[0], Src0Regs[0], Src1Regs[0], Src2Regs[0]); B.buildSelect(DefRegs[1], Src0Regs[0], Src1Regs[1], Src2Regs[1]); MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID)); MI.eraseFromParent(); return; } case AMDGPU::G_AND: case AMDGPU::G_OR: case AMDGPU::G_XOR: { // 64-bit and is only available on the SALU, so split into 2 32-bit ops if // there is a VGPR input. unsigned DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); if (DstTy.getSizeInBits() != 64) break; LLT HalfTy = getHalfSizedType(DstTy); SmallVector DefRegs(OpdMapper.getVRegs(0)); SmallVector Src0Regs(OpdMapper.getVRegs(1)); SmallVector Src1Regs(OpdMapper.getVRegs(2)); // All inputs are SGPRs, nothing special to do. if (DefRegs.empty()) { assert(Src0Regs.empty() && Src1Regs.empty()); break; } assert(DefRegs.size() == 2); assert(Src0Regs.size() == Src1Regs.size() && (Src0Regs.empty() || Src0Regs.size() == 2)); // Depending on where the source registers came from, the generic code may // have decided to split the inputs already or not. If not, we still need to // extract the values. MachineIRBuilder B(MI); if (Src0Regs.empty()) split64BitValueForMapping(B, Src0Regs, HalfTy, MI.getOperand(1).getReg()); else setRegsToType(MRI, Src0Regs, HalfTy); if (Src1Regs.empty()) split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg()); else setRegsToType(MRI, Src1Regs, HalfTy); setRegsToType(MRI, DefRegs, HalfTy); B.buildInstr(Opc) .addDef(DefRegs[0]) .addUse(Src0Regs[0]) .addUse(Src1Regs[0]); B.buildInstr(Opc) .addDef(DefRegs[1]) .addUse(Src0Regs[1]) .addUse(Src1Regs[1]); MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID)); MI.eraseFromParent(); return; } case AMDGPU::G_EXTRACT_VECTOR_ELT: applyDefaultMapping(OpdMapper); executeInWaterfallLoop(MI, MRI, { 2 }); return; default: break; } return applyDefaultMapping(OpdMapper); } static bool isInstrUniform(const MachineInstr &MI) { if (!MI.hasOneMemOperand()) return false; const MachineMemOperand *MMO = *MI.memoperands_begin(); return AMDGPUInstrInfo::isUniformMMO(MMO); } bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) { if (!MI.getOperand(i).isReg()) continue; unsigned Reg = MI.getOperand(i).getReg(); if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) { if (Bank->getID() == AMDGPU::VGPRRegBankID) return false; assert(Bank->getID() == AMDGPU::SGPRRegBankID || Bank->getID() == AMDGPU::SCCRegBankID); } } return true; } const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); SmallVector OpdsMapping(MI.getNumOperands()); for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI); unsigned BankID = Size == 1 ? AMDGPU::SCCRegBankID : AMDGPU::SGPRRegBankID; OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size); } return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands()); } const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); SmallVector OpdsMapping(MI.getNumOperands()); unsigned OpdIdx = 0; unsigned Size0 = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0); if (MI.getOperand(OpdIdx).isIntrinsicID()) OpdsMapping[OpdIdx++] = nullptr; unsigned Reg1 = MI.getOperand(OpdIdx).getReg(); unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI); unsigned DefaultBankID = Size1 == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID; unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI, DefaultBankID); OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1); for (unsigned e = MI.getNumOperands(); OpdIdx != e; ++OpdIdx) { unsigned Size = getSizeInBits(MI.getOperand(OpdIdx).getReg(), MRI, *TRI); unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID; OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size); } return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands()); } const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); SmallVector OpdsMapping(MI.getNumOperands()); for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { unsigned Size = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI); OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); } return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands()); } const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); SmallVector OpdsMapping(MI.getNumOperands()); unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); const ValueMapping *ValMapping; const ValueMapping *PtrMapping; if (isInstrUniform(MI)) { // We have a uniform instruction so we want to use an SMRD load ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize); } else { ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); // FIXME: What would happen if we used SGPRRegBankID here? PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize); } OpdsMapping[0] = ValMapping; OpdsMapping[1] = PtrMapping; const RegisterBankInfo::InstructionMapping &Mapping = getInstructionMapping( 1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands()); return Mapping; // FIXME: Do we want to add a mapping for FLAT load, or should we just // handle that during instruction selection? } unsigned AMDGPURegisterBankInfo::getRegBankID(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, unsigned Default) const { const RegisterBank *Bank = getRegBank(Reg, MRI, TRI); return Bank ? Bank->getID() : Default; } /// /// This function must return a legal mapping, because /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called /// in RegBankSelect::Mode::Fast. Any mapping that would cause a /// VGPR to SGPR generated is illegal. /// const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); SmallVector OpdsMapping(MI.getNumOperands()); switch (MI.getOpcode()) { default: return getInvalidInstructionMapping(); case AMDGPU::G_AND: case AMDGPU::G_OR: case AMDGPU::G_XOR: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); if (Size == 1) { OpdsMapping[0] = OpdsMapping[1] = OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } if (Size == 64) { if (isSALUMapping(MI)) { OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size); OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0]; } else { OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size); unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/); OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size); unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/); OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size); } break; } LLVM_FALLTHROUGH; } case AMDGPU::G_GEP: case AMDGPU::G_ADD: case AMDGPU::G_SUB: case AMDGPU::G_MUL: case AMDGPU::G_SHL: case AMDGPU::G_LSHR: case AMDGPU::G_ASHR: case AMDGPU::G_UADDO: case AMDGPU::G_SADDO: case AMDGPU::G_USUBO: case AMDGPU::G_SSUBO: case AMDGPU::G_UADDE: case AMDGPU::G_SADDE: case AMDGPU::G_USUBE: case AMDGPU::G_SSUBE: case AMDGPU::G_UMULH: case AMDGPU::G_SMULH: if (isSALUMapping(MI)) return getDefaultMappingSOP(MI); LLVM_FALLTHROUGH; case AMDGPU::G_FADD: case AMDGPU::G_FSUB: case AMDGPU::G_FPTOSI: case AMDGPU::G_FPTOUI: case AMDGPU::G_FMUL: case AMDGPU::G_FMA: case AMDGPU::G_FSQRT: case AMDGPU::G_SITOFP: case AMDGPU::G_UITOFP: case AMDGPU::G_FPTRUNC: case AMDGPU::G_FPEXT: case AMDGPU::G_FEXP2: case AMDGPU::G_FLOG2: case AMDGPU::G_INTRINSIC_TRUNC: case AMDGPU::G_INTRINSIC_ROUND: return getDefaultMappingVOP(MI); case AMDGPU::G_IMPLICIT_DEF: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } case AMDGPU::G_FCONSTANT: case AMDGPU::G_CONSTANT: case AMDGPU::G_FRAME_INDEX: case AMDGPU::G_BLOCK_ADDR: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } case AMDGPU::G_INSERT: { unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); unsigned EltSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI); OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize); OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize); OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize); OpdsMapping[3] = nullptr; break; } case AMDGPU::G_EXTRACT: { unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI); unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize); OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize); OpdsMapping[2] = nullptr; break; } case AMDGPU::G_MERGE_VALUES: { unsigned Bank = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize); // Op1 and Dst should use the same register bank. for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i) OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize); break; } case AMDGPU::G_BITCAST: case AMDGPU::G_INTTOPTR: case AMDGPU::G_PTRTOINT: case AMDGPU::G_CTLZ: case AMDGPU::G_CTLZ_ZERO_UNDEF: case AMDGPU::G_CTTZ: case AMDGPU::G_CTTZ_ZERO_UNDEF: case AMDGPU::G_CTPOP: case AMDGPU::G_BSWAP: case AMDGPU::G_FABS: case AMDGPU::G_FNEG: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI); OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size); break; } case AMDGPU::G_TRUNC: { unsigned Dst = MI.getOperand(0).getReg(); unsigned Src = MI.getOperand(1).getReg(); unsigned Bank = getRegBankID(Src, MRI, *TRI); unsigned DstSize = getSizeInBits(Dst, MRI, *TRI); unsigned SrcSize = getSizeInBits(Src, MRI, *TRI); OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize); OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize); break; } case AMDGPU::G_ZEXT: case AMDGPU::G_SEXT: case AMDGPU::G_ANYEXT: { unsigned Dst = MI.getOperand(0).getReg(); unsigned Src = MI.getOperand(1).getReg(); unsigned DstSize = getSizeInBits(Dst, MRI, *TRI); unsigned SrcSize = getSizeInBits(Src, MRI, *TRI); unsigned SrcBank = getRegBankID(Src, MRI, *TRI, SrcSize == 1 ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID); unsigned DstBank = SrcBank; if (SrcSize == 1) { if (SrcBank == AMDGPU::SGPRRegBankID) DstBank = AMDGPU::VGPRRegBankID; else DstBank = AMDGPU::SGPRRegBankID; } OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize); OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank, SrcSize); break; } case AMDGPU::G_FCMP: { unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1); OpdsMapping[1] = nullptr; // Predicate Operand. OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size); OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); break; } case AMDGPU::G_STORE: { assert(MI.getOperand(0).isReg()); unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); // FIXME: We need to specify a different reg bank once scalar stores // are supported. const ValueMapping *ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); // FIXME: Depending on the type of store, the pointer could be in // the SGPR Reg bank. // FIXME: Pointer size should be based on the address space. const ValueMapping *PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64); OpdsMapping[0] = ValMapping; OpdsMapping[1] = PtrMapping; break; } case AMDGPU::G_ICMP: { unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI); unsigned Op0Bank = Op2Bank == AMDGPU::SGPRRegBankID && Op3Bank == AMDGPU::SGPRRegBankID ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID; OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, 1); OpdsMapping[1] = nullptr; // Predicate Operand. OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size); OpdsMapping[3] = AMDGPU::getValueMapping(Op3Bank, Size); break; } case AMDGPU::G_EXTRACT_VECTOR_ELT: { unsigned OutputBankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize); OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize); // The index can be either if the source vector is VGPR. OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize); break; } case AMDGPU::G_INSERT_VECTOR_ELT: { unsigned OutputBankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits(); unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize); OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize); OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize); // The index can be either if the source vector is VGPR. OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize); break; } case AMDGPU::G_UNMERGE_VALUES: { unsigned Bank = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; // Op1 and Dst should use the same register bank. // FIXME: Shouldn't this be the default? Why do we need to handle this? for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI); OpdsMapping[i] = AMDGPU::getValueMapping(Bank, Size); } break; } case AMDGPU::G_INTRINSIC: { switch (MI.getOperand(1).getIntrinsicID()) { default: return getInvalidInstructionMapping(); case Intrinsic::maxnum: case Intrinsic::minnum: case Intrinsic::amdgcn_cvt_pkrtz: return getDefaultMappingVOP(MI); case Intrinsic::amdgcn_kernarg_segment_ptr: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } case Intrinsic::amdgcn_wqm_vote: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } } break; } case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { switch (MI.getOperand(0).getIntrinsicID()) { default: return getInvalidInstructionMapping(); case Intrinsic::amdgcn_exp_compr: OpdsMapping[0] = nullptr; // IntrinsicID // FIXME: These are immediate values which can't be read from registers. OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); // FIXME: Could we support packed types here? OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); // FIXME: These are immediate values which can't be read from registers. OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); break; case Intrinsic::amdgcn_exp: OpdsMapping[0] = nullptr; // IntrinsicID // FIXME: These are immediate values which can't be read from registers. OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); // FIXME: Could we support packed types here? OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); // FIXME: These are immediate values which can't be read from registers. OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); break; } break; } case AMDGPU::G_SELECT: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); unsigned Op1Bank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI, AMDGPU::SGPRRegBankID); unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI); bool SGPRSrcs = Op1Bank == AMDGPU::SCCRegBankID && Op2Bank == AMDGPU::SGPRRegBankID && Op3Bank == AMDGPU::SGPRRegBankID; unsigned Bank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; Op1Bank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID; if (Size == 64) { OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank, Size); OpdsMapping[1] = AMDGPU::getValueMapping(Op1Bank, 1); OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank, Size); OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank, Size); } else { OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size); OpdsMapping[1] = AMDGPU::getValueMapping(Op1Bank, 1); OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size); OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size); } break; } case AMDGPU::G_LOAD: return getInstrMappingForLoad(MI); case AMDGPU::G_ATOMICRMW_XCHG: case AMDGPU::G_ATOMICRMW_ADD: case AMDGPU::G_ATOMICRMW_SUB: case AMDGPU::G_ATOMICRMW_AND: case AMDGPU::G_ATOMICRMW_OR: case AMDGPU::G_ATOMICRMW_XOR: case AMDGPU::G_ATOMICRMW_MAX: case AMDGPU::G_ATOMICRMW_MIN: case AMDGPU::G_ATOMICRMW_UMAX: case AMDGPU::G_ATOMICRMW_UMIN: case AMDGPU::G_ATOMIC_CMPXCHG: { return getDefaultMappingAllVGPR(MI); } case AMDGPU::G_BRCOND: { unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI, AMDGPU::SGPRRegBankID); assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1); if (Bank != AMDGPU::SCCRegBankID) Bank = AMDGPU::VCCRegBankID; OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1); break; } } return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands()); }