diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 33 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 18 |
2 files changed, 37 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ee7ad3293d9..52880a282d9 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1103,10 +1103,18 @@ unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT, + StringRef(RegName) + "\".")); } -MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( - MachineInstr * MI, MachineBasicBlock * BB) const { - +MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( + MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { + case AMDGPU::SI_INIT_M0: { + const SIInstrInfo *TII = + static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); + BuildMI(*BB, MI->getIterator(), MI->getDebugLoc(), + TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addOperand(MI->getOperand(0)); + MI->eraseFromParent(); + break; + } case AMDGPU::BRANCH: return BB; case AMDGPU::GET_GROUPSTATICSIZE: { @@ -1395,19 +1403,18 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const { + // We can't use S_MOV_B32 directly, because there is no way to specify m0 as + // the destination register. + // // We can't use CopyToReg, because MachineCSE won't combine COPY instructions, // so we will end up with redundant moves to m0. // - // We can't use S_MOV_B32, because there is no way to specify m0 as the - // destination register. - // - // We have to use them both. Machine cse will combine all the S_MOV_B32 - // instructions and the register coalescer eliminate the extra copies. - SDNode *M0 = DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, V.getValueType(), V); - return DAG.getCopyToReg(Chain, DL, DAG.getRegister(AMDGPU::M0, MVT::i32), - SDValue(M0, 0), SDValue()); // Glue - // A Null SDValue creates - // a glue result. + // We use a pseudo to ensure we emit s_mov_b32 with m0 as the direct result. + + // A Null SDValue creates a glue result. + SDNode *M0 = DAG.getMachineNode(AMDGPU::SI_INIT_M0, DL, MVT::Other, MVT::Glue, + V, Chain); + return SDValue(M0, 0); } SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG, diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 4620ec05752..7b8a62bc8fb 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2014,7 +2014,23 @@ def SI_KILL : InstSI < } // End mayLoad = 1, mayStore = 1, hasSideEffects = 1 -let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { +// Used as an isel pseudo to directly emit initialization with an +// s_mov_b32 rather than a copy of another initialized +// register. MachineCSE skips copies, and we don't want to have to +// fold operands before it runs. +def SI_INIT_M0 : InstSI < + (outs), + (ins SSrc_32:$src), "", []> { + let Defs = [M0]; + let usesCustomInserter = 1; + let isPseudo = 1; + let isCodeGenOnly = 1; + let isAsCheapAsAMove = 1; + let SALU = 1; + let isReMaterializable = 1; +} + +let Uses = [EXEC], Defs = [EXEC, VCC, M0] in { class SI_INDIRECT_SRC<RegisterClass rc> : InstSI < (outs VGPR_32:$dst, SReg_64:$temp), |