diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 14 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 31 |
3 files changed, 66 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0b0d0388031..e9492c4cf9c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1099,6 +1099,28 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.eraseFromParent(); break; } + case AMDGPU::V_SET_INACTIVE_B32: { + BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg()) + .add(MI.getOperand(2)); + BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC); + MI.eraseFromParent(); + break; + } + case AMDGPU::V_SET_INACTIVE_B64: { + BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC); + MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), + MI.getOperand(0).getReg()) + .add(MI.getOperand(2)); + expandPostRAPseudo(*Copy); + BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC); + MI.eraseFromParent(); + break; + } case AMDGPU::V_MOVRELD_B32_V1: case AMDGPU::V_MOVRELD_B32_V2: case AMDGPU::V_MOVRELD_B32_V4: diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index a13c8f32fe6..70ad847fc5e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -137,6 +137,20 @@ def EXIT_WWM : SPseudoInstSI <(outs SReg_64:$sdst), (ins SReg_64:$src0)> { let mayStore = 0; } +// Invert the exec mask and overwrite the inactive lanes of dst with inactive, +// restoring it after we're done. +def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst), + (ins VGPR_32: $src, VSrc_b32:$inactive), + [(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> { + let Constraints = "$src = $vdst"; +} + +def V_SET_INACTIVE_B64 : VPseudoInstSI <(outs VReg_64:$vdst), + (ins VReg_64: $src, VSrc_b64:$inactive), + [(set i64:$vdst, (int_amdgcn_set_inactive i64:$src, i64:$inactive))]> { + let Constraints = "$src = $vdst"; +} + let usesCustomInserter = 1, SALU = 1 in { def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins), [(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>; diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 1a0f0f9aca9..8aa57ba7293 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -303,6 +303,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist) { char GlobalFlags = 0; bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs"); + SmallVector<MachineInstr *, 4> SetInactiveInstrs; // We need to visit the basic blocks in reverse post-order so that we visit // defs before uses, in particular so that we don't accidentally mark an @@ -341,6 +342,23 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, GlobalFlags |= StateWWM; LowerToCopyInstrs.push_back(&MI); continue; + } else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 || + Opcode == AMDGPU::V_SET_INACTIVE_B64) { + III.Disabled = StateWWM; + MachineOperand &Inactive = MI.getOperand(2); + if (Inactive.isReg()) { + if (Inactive.isUndef()) { + LowerToCopyInstrs.push_back(&MI); + } else { + unsigned Reg = Inactive.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + for (MachineInstr &DefMI : MRI->def_instructions(Reg)) + markInstruction(DefMI, StateWWM, Worklist); + } + } + } + SetInactiveInstrs.push_back(&MI); + continue; } else if (TII->isDisableWQM(MI)) { BBI.Needs |= StateExact; if (!(BBI.InNeeds & StateExact)) { @@ -380,6 +398,14 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, } } + // Mark sure that any SET_INACTIVE instructions are computed in WQM if WQM is + // ever used anywhere in the function. This implements the corresponding + // semantics of @llvm.amdgcn.set.inactive. + if (GlobalFlags & StateWQM) { + for (MachineInstr *MI : SetInactiveInstrs) + markInstruction(*MI, StateWQM, Worklist); + } + return GlobalFlags; } @@ -799,8 +825,11 @@ void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) { } void SIWholeQuadMode::lowerCopyInstrs() { - for (MachineInstr *MI : LowerToCopyInstrs) + for (MachineInstr *MI : LowerToCopyInstrs) { + for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--) + MI->RemoveOperand(i); MI->setDesc(TII->get(AMDGPU::COPY)); + } } bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { |

