diff options
| author | Tom Stellard <thomas.stellard@amd.com> | 2016-02-12 23:45:29 +0000 |
|---|---|---|
| committer | Tom Stellard <thomas.stellard@amd.com> | 2016-02-12 23:45:29 +0000 |
| commit | bc4497b13ccc73e1c8c156350cc7fe50f9beae93 (patch) | |
| tree | 71e7e13564f64f708f17731941d9b73818cb5be9 /llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | |
| parent | 0de36ec169b8c818487606658ed2504c88f4c0e7 (diff) | |
| download | bcm5719-llvm-bc4497b13ccc73e1c8c156350cc7fe50f9beae93.tar.gz bcm5719-llvm-bc4497b13ccc73e1c8c156350cc7fe50f9beae93.zip | |
AMDGPU/SI: Detect uniform branches and emit s_cbranch instructions
Reviewers: arsenm
Subscribers: mareko, MatzeB, qcolombet, arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D16603
llvm-svn: 260765
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 69 |
1 files changed, 59 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 7228d40e611..5650098efed 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1437,6 +1437,16 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); + // Make sure we don't have SCC live-ins to basic blocks. moveToVALU assumes + // all SCC users are in the same blocks as their defs. + const MachineBasicBlock *MBB = MI->getParent(); + if (MI == &MBB->front()) { + if (MBB->isLiveIn(AMDGPU::SCC)) { + ErrInfo = "scc register cannot be live across blocks."; + return false; + } + } + // Make sure the number of operands is correct. const MCInstrDesc &Desc = get(Opcode); if (!Desc.isVariadic() && @@ -1605,6 +1615,12 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32; case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; + case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32; + case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32; + case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32; + case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32; + case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32; + case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32; case AMDGPU::S_LOAD_DWORD_IMM: case AMDGPU::S_LOAD_DWORD_SGPR: case AMDGPU::S_LOAD_DWORD_IMM_ci: @@ -1621,6 +1637,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64; + case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ; + case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ; } } @@ -1979,7 +1997,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); // Legalize VOP2 - if (isVOP2(*MI)) { + if (isVOP2(*MI) || isVOPC(*MI)) { legalizeOperandsVOP2(MRI, MI); return; } @@ -2568,6 +2586,14 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { Inst->eraseFromParent(); continue; + case AMDGPU::S_CBRANCH_SCC0: + case AMDGPU::S_CBRANCH_SCC1: + // Clear unused bits of vcc + BuildMI(*MBB, Inst, Inst->getDebugLoc(), get(AMDGPU::S_AND_B64), AMDGPU::VCC) + .addReg(AMDGPU::EXEC) + .addReg(AMDGPU::VCC); + break; + case AMDGPU::S_BFE_U64: case AMDGPU::S_BFM_B64: llvm_unreachable("Moving this op to VALU not implemented"); @@ -2589,8 +2615,10 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { // both. for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) { MachineOperand &Op = Inst->getOperand(i); - if (Op.isReg() && Op.getReg() == AMDGPU::SCC) + if (Op.isReg() && Op.getReg() == AMDGPU::SCC) { Inst->RemoveOperand(i); + addSCCDefUsersToVALUWorklist(Inst, Worklist); + } } if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { @@ -2623,19 +2651,24 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { Inst->addOperand(MachineOperand::CreateImm(BitWidth)); } - // Update the destination register class. - const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(*Inst); - if (!NewDstRC) - continue; + bool HasDst = Inst->getOperand(0).isReg() && Inst->getOperand(0).isDef(); + unsigned NewDstReg = AMDGPU::NoRegister; + if (HasDst) { + // Update the destination register class. + const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(*Inst); + if (!NewDstRC) + continue; - unsigned DstReg = Inst->getOperand(0).getReg(); - unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); - MRI.replaceRegWith(DstReg, NewDstReg); + unsigned DstReg = Inst->getOperand(0).getReg(); + NewDstReg = MRI.createVirtualRegister(NewDstRC); + MRI.replaceRegWith(DstReg, NewDstReg); + } // Legalize the operands legalizeOperands(Inst); - addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); + if (HasDst) + addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); } } @@ -2910,6 +2943,22 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist( } } +void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineInstr *SCCDefInst, + SmallVectorImpl<MachineInstr *> &Worklist) const { + // This assumes that all the users of SCC are in the same block + // as the SCC def. + for (MachineBasicBlock::iterator I = SCCDefInst, + E = SCCDefInst->getParent()->end(); I != E; ++I) { + + // Exit if we find another SCC def. + if (I->findRegisterDefOperandIdx(AMDGPU::SCC) != -1) + return; + + if (I->findRegisterUseOperandIdx(AMDGPU::SCC) != -1) + Worklist.push_back(I); + } +} + const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( const MachineInstr &Inst) const { const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0); |

