diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 310 |
1 files changed, 9 insertions, 301 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index c87b04256f8..92e452a3d6a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -496,188 +496,6 @@ int SIInstrInfo::commuteOpcode(unsigned Opcode) const { return Opcode; } -void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const DebugLoc &DL, unsigned DestReg, - int64_t Value) const { - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg); - if (RegClass == &AMDGPU::SReg_32RegClass || - RegClass == &AMDGPU::SGPR_32RegClass || - RegClass == &AMDGPU::SReg_32_XM0RegClass || - RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) { - BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) - .addImm(Value); - return; - } - - if (RegClass == &AMDGPU::SReg_64RegClass || - RegClass == &AMDGPU::SGPR_64RegClass || - RegClass == &AMDGPU::SReg_64_XEXECRegClass) { - BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) - .addImm(Value); - return; - } - - if (RegClass == &AMDGPU::VGPR_32RegClass) { - BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) - .addImm(Value); - return; - } - if (RegClass == &AMDGPU::VReg_64RegClass) { - BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg) - .addImm(Value); - return; - } - - unsigned EltSize = 4; - unsigned Opcode = AMDGPU::V_MOV_B32_e32; - if (RI.isSGPRClass(RegClass)) { - if (RI.getRegSizeInBits(*RegClass) > 32) { - Opcode = AMDGPU::S_MOV_B64; - EltSize = 8; - } else { - Opcode = AMDGPU::S_MOV_B32; - EltSize = 4; - } - } - - ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize); - for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { - int64_t IdxValue = Idx == 0 ? Value : 0; - - MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, - get(Opcode), RI.getSubReg(DestReg, Idx)); - Builder.addImm(IdxValue); - } -} - -const TargetRegisterClass * -SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const { - return &AMDGPU::VGPR_32RegClass; -} - -void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - const DebugLoc &DL, unsigned DstReg, - ArrayRef<MachineOperand> Cond, - unsigned TrueReg, - unsigned FalseReg) const { - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - const TargetRegisterClass *RegClass = MRI.getRegClass(DstReg); - assert(RegClass == &AMDGPU::VGPR_32RegClass && "Not a VGPR32 reg"); - - if (Cond.size() == 1) { - BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) - .addReg(FalseReg) - .addReg(TrueReg) - .add(Cond[0]); - } else if (Cond.size() == 2) { - assert(Cond[0].isImm() && "Cond[0] is not an immediate"); - switch (Cond[0].getImm()) { - case SIInstrInfo::SCC_TRUE: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) - .addImm(-1) - .addImm(0); - BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) - .addReg(FalseReg) - .addReg(TrueReg) - .addReg(SReg); - break; - } - case SIInstrInfo::SCC_FALSE: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) - .addImm(0) - .addImm(-1); - BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) - .addReg(FalseReg) - .addReg(TrueReg) - .addReg(SReg); - break; - } - case SIInstrInfo::VCCNZ: { - MachineOperand RegOp = Cond[1]; - RegOp.setImplicit(false); - BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) - .addReg(FalseReg) - .addReg(TrueReg) - .add(RegOp); - break; - } - case SIInstrInfo::VCCZ: { - MachineOperand RegOp = Cond[1]; - RegOp.setImplicit(false); - BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) - .addReg(TrueReg) - .addReg(FalseReg) - .add(RegOp); - break; - } - case SIInstrInfo::EXECNZ: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2) - .addImm(0); - BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) - .addImm(-1) - .addImm(0); - BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) - .addReg(FalseReg) - .addReg(TrueReg) - .addReg(SReg); - break; - } - case SIInstrInfo::EXECZ: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2) - .addImm(0); - BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) - .addImm(0) - .addImm(-1); - BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) - .addReg(FalseReg) - .addReg(TrueReg) - .addReg(SReg); - llvm_unreachable("Unhandled branch predicate EXECZ"); - break; - } - default: - llvm_unreachable("invalid branch predicate"); - } - } else { - llvm_unreachable("Can only handle Cond size 1 or 2"); - } -} - -unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB, - MachineBasicBlock::iterator I, - const DebugLoc &DL, - unsigned SrcReg, int Value) const { - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg) - .addImm(Value) - .addReg(SrcReg); - - return Reg; -} - -unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB, - MachineBasicBlock::iterator I, - const DebugLoc &DL, - unsigned SrcReg, int Value) const { - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg) - .addImm(Value) - .addReg(SrcReg); - - return Reg; -} - unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { if (RI.getRegSizeInBits(*DstRC) == 32) { @@ -1016,20 +834,6 @@ void SIInstrInfo::insertNoop(MachineBasicBlock &MBB, insertWaitStates(MBB, MI, 1); } -void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const { - auto MF = MBB.getParent(); - SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); - - assert(Info->isEntryFunction()); - - if (MBB.succ_empty()) { - bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end(); - if (HasNoTerminator) - BuildMI(MBB, MBB.end(), DebugLoc(), - get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG)); - } -} - unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: return 1; // FIXME: Do wait states equal cycles? @@ -1437,20 +1241,14 @@ bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB, return false; } - MachineBasicBlock *CondBB = nullptr; + BranchPredicate Pred = getBranchPredicate(I->getOpcode()); + if (Pred == INVALID_BR) + return true; - if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { - CondBB = I->getOperand(1).getMBB(); - Cond.push_back(I->getOperand(0)); - } else { - BranchPredicate Pred = getBranchPredicate(I->getOpcode()); - if (Pred == INVALID_BR) - return true; + MachineBasicBlock *CondBB = I->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(Pred)); + Cond.push_back(I->getOperand(1)); // Save the branch register. - CondBB = I->getOperand(0).getMBB(); - Cond.push_back(MachineOperand::CreateImm(Pred)); - Cond.push_back(I->getOperand(1)); // Save the branch register. - } ++I; if (I == MBB.end()) { @@ -1553,13 +1351,6 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB, return 1; } - if(Cond.size() == 1 && Cond[0].isReg()) { - BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO)) - .add(Cond[0]) - .addMBB(TBB); - return 1; - } - assert(TBB && Cond[0].isImm()); unsigned Opcode @@ -1599,16 +1390,9 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB, bool SIInstrInfo::reverseBranchCondition( SmallVectorImpl<MachineOperand> &Cond) const { - if (Cond.size() != 2) { - return true; - } - - if (Cond[0].isImm()) { - Cond[0].setImm(-Cond[0].getImm()); - return false; - } - - return true; + assert(Cond.size() == 2); + Cond[0].setImm(-Cond[0].getImm()); + return false; } bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, @@ -4136,82 +3920,6 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const { return false; } -bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const { - return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO; -} - -void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry, - MachineBasicBlock *IfEnd) const { - MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator(); - assert(TI != IfEntry->end()); - - MachineInstr *Branch = &(*TI); - MachineFunction *MF = IfEntry->getParent(); - MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo(); - - if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { - unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - MachineInstr *SIIF = - BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg) - .add(Branch->getOperand(0)) - .add(Branch->getOperand(1)); - MachineInstr *SIEND = - BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF)) - .addReg(DstReg); - - IfEntry->erase(TI); - IfEntry->insert(IfEntry->end(), SIIF); - IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND); - } -} - -void SIInstrInfo::convertNonUniformLoopRegion( - MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const { - MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator(); - // We expect 2 terminators, one conditional and one unconditional. - assert(TI != LoopEnd->end()); - - MachineInstr *Branch = &(*TI); - MachineFunction *MF = LoopEnd->getParent(); - MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo(); - - if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { - - unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - MachineInstrBuilder HeaderPHIBuilder = - BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg); - for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(), - E = LoopEntry->pred_end(); - PI != E; ++PI) { - if (*PI == LoopEnd) { - HeaderPHIBuilder.addReg(BackEdgeReg); - } else { - MachineBasicBlock *PMBB = *PI; - unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(), - ZeroReg, 0); - HeaderPHIBuilder.addReg(ZeroReg); - } - HeaderPHIBuilder.addMBB(*PI); - } - MachineInstr *HeaderPhi = HeaderPHIBuilder; - MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(), - get(AMDGPU::SI_IF_BREAK), BackEdgeReg) - .addReg(DstReg) - .add(Branch->getOperand(0)); - MachineInstr *SILOOP = - BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP)) - .addReg(BackEdgeReg) - .addMBB(LoopEntry); - - LoopEntry->insert(LoopEntry->begin(), HeaderPhi); - LoopEnd->erase(TI); - LoopEnd->insert(LoopEnd->end(), SIIFBREAK); - LoopEnd->insert(LoopEnd->end(), SILOOP); - } -} - ArrayRef<std::pair<int, const char *>> SIInstrInfo::getSerializableTargetIndices() const { static const std::pair<int, const char *> TargetIndices[] = { |