diff options
| author | Carl Ritson <carl.ritson@amd.com> | 2019-07-26 09:54:12 +0000 |
|---|---|---|
| committer | Carl Ritson <carl.ritson@amd.com> | 2019-07-26 09:54:12 +0000 |
| commit | 00e89b428b9962c9e0317bf6cb54fa081417f878 (patch) | |
| tree | 981e35186f3d90e5f07d19b73981bbde24231962 /llvm/lib/Target | |
| parent | 9758407bf100ec9d1d8a1666046e6cb6675207d5 (diff) | |
| download | bcm5719-llvm-00e89b428b9962c9e0317bf6cb54fa081417f878.tar.gz bcm5719-llvm-00e89b428b9962c9e0317bf6cb54fa081417f878.zip | |
[AMDGPU] Add llvm.amdgcn.softwqm intrinsic
Add llvm.amdgcn.softwqm intrinsic which behaves like llvm.amdgcn.wqm
only if there is other WQM computation in the shader.
Reviewers: nhaehnle, tpr
Reviewed By: nhaehnle
Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64935
llvm-svn: 367097
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 21 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 10 |
5 files changed, 38 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index e69fdb3da7a..ef04732c981 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -282,6 +282,7 @@ private: void SelectDSAppendConsume(SDNode *N, unsigned IntrID); void SelectDS_GWS(SDNode *N, unsigned IntrID); void SelectINTRINSIC_W_CHAIN(SDNode *N); + void SelectINTRINSIC_WO_CHAIN(SDNode *N); void SelectINTRINSIC_VOID(SDNode *N); protected: @@ -908,6 +909,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SelectINTRINSIC_W_CHAIN(N); return; } + case ISD::INTRINSIC_WO_CHAIN: { + SelectINTRINSIC_WO_CHAIN(N); + return; + } case ISD::INTRINSIC_VOID: { SelectINTRINSIC_VOID(N); return; @@ -2235,6 +2240,22 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { SelectCode(N); } +void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { + unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned Opcode; + switch (IntrID) { + case Intrinsic::amdgcn_softwqm: + Opcode = AMDGPU::SOFT_WQM; + break; + default: + SelectCode(N); + return; + } + + SDValue Src = N->getOperand(1); + CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src}); +} + void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) { unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); switch (IntrID) { diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 624953963cf..861fe747c66 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -617,6 +617,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { continue; case AMDGPU::COPY: case AMDGPU::WQM: + case AMDGPU::SOFT_WQM: case AMDGPU::WWM: { // If the destination register is a physical register there isn't really // much we can do to fix this. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 1fce2dbe774..8df409ad654 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3631,6 +3631,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::PHI: return AMDGPU::PHI; case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; case AMDGPU::WQM: return AMDGPU::WQM; + case AMDGPU::SOFT_WQM: return AMDGPU::SOFT_WQM; case AMDGPU::WWM: return AMDGPU::WWM; case AMDGPU::S_MOV_B32: { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); @@ -5506,6 +5507,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist( switch (UseMI.getOpcode()) { case AMDGPU::COPY: case AMDGPU::WQM: + case AMDGPU::SOFT_WQM: case AMDGPU::WWM: case AMDGPU::REG_SEQUENCE: case AMDGPU::PHI: @@ -5623,6 +5625,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( case AMDGPU::REG_SEQUENCE: case AMDGPU::INSERT_SUBREG: case AMDGPU::WQM: + case AMDGPU::SOFT_WQM: case AMDGPU::WWM: { const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1); if (RI.hasAGPRs(SrcRC)) { diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 70f20bb6937..934b50b87de 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -111,6 +111,10 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst), // WQM pass processes it. def WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>; +// Pseudoinstruction for @llvm.amdgcn.softwqm. Like @llvm.amdgcn.wqm it is +// turned into a copy by WQM pass, but does not seed WQM requirements. +def SOFT_WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>; + // Pseudoinstruction for @llvm.amdgcn.wwm. It is turned into a copy post-RA, so // that the @earlyclobber is respected. The @earlyclobber is to make sure that // the instruction that defines $src0 (which is run in WWM) doesn't diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 4e07efff55d..332c7176a8c 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -312,6 +312,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, char GlobalFlags = 0; bool WQMOutputs = MF.getFunction().hasFnAttribute("amdgpu-ps-wqm-outputs"); SmallVector<MachineInstr *, 4> SetInactiveInstrs; + SmallVector<MachineInstr *, 4> SoftWQMInstrs; // We need to visit the basic blocks in reverse post-order so that we visit // defs before uses, in particular so that we don't accidentally mark an @@ -340,6 +341,10 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, // correct, so we need it to be in WQM. Flags = StateWQM; LowerToCopyInstrs.push_back(&MI); + } else if (Opcode == AMDGPU::SOFT_WQM) { + LowerToCopyInstrs.push_back(&MI); + SoftWQMInstrs.push_back(&MI); + continue; } else if (Opcode == AMDGPU::WWM) { // The WWM intrinsic doesn't make the same guarantee, and plus it needs // to be executed in WQM or Exact so that its copy doesn't clobber @@ -407,9 +412,12 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, // Mark sure that any SET_INACTIVE instructions are computed in WQM if WQM is // ever used anywhere in the function. This implements the corresponding // semantics of @llvm.amdgcn.set.inactive. + // Similarly for SOFT_WQM instructions, implementing @llvm.amdgcn.softwqm. if (GlobalFlags & StateWQM) { for (MachineInstr *MI : SetInactiveInstrs) markInstruction(*MI, StateWQM, Worklist); + for (MachineInstr *MI : SoftWQMInstrs) + markInstruction(*MI, StateWQM, Worklist); } return GlobalFlags; @@ -885,7 +893,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; if (!(GlobalFlags & StateWQM)) { lowerLiveMaskQueries(Exec); - if (!(GlobalFlags & StateWWM)) + if (!(GlobalFlags & StateWWM) && LowerToCopyInstrs.empty()) return !LiveMaskQueries.empty(); } else { // Store a copy of the original live mask when required |

