summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp22
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td14
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp31
3 files changed, 66 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0b0d0388031..e9492c4cf9c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1099,6 +1099,28 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.eraseFromParent();
break;
}
+ case AMDGPU::V_SET_INACTIVE_B32: {
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
+ .add(MI.getOperand(2));
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC);
+ MI.eraseFromParent();
+ break;
+ }
+ case AMDGPU::V_SET_INACTIVE_B64: {
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC);
+ MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(2));
+ expandPostRAPseudo(*Copy);
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC);
+ MI.eraseFromParent();
+ break;
+ }
case AMDGPU::V_MOVRELD_B32_V1:
case AMDGPU::V_MOVRELD_B32_V2:
case AMDGPU::V_MOVRELD_B32_V4:
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index a13c8f32fe6..70ad847fc5e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -137,6 +137,20 @@ def EXIT_WWM : SPseudoInstSI <(outs SReg_64:$sdst), (ins SReg_64:$src0)> {
let mayStore = 0;
}
+// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
+// restoring it after we're done.
+def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst),
+ (ins VGPR_32: $src, VSrc_b32:$inactive),
+ [(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> {
+ let Constraints = "$src = $vdst";
+}
+
+def V_SET_INACTIVE_B64 : VPseudoInstSI <(outs VReg_64:$vdst),
+ (ins VReg_64: $src, VSrc_b64:$inactive),
+ [(set i64:$vdst, (int_amdgcn_set_inactive i64:$src, i64:$inactive))]> {
+ let Constraints = "$src = $vdst";
+}
+
let usesCustomInserter = 1, SALU = 1 in {
def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),
[(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 1a0f0f9aca9..8aa57ba7293 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -303,6 +303,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
std::vector<WorkItem> &Worklist) {
char GlobalFlags = 0;
bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs");
+ SmallVector<MachineInstr *, 4> SetInactiveInstrs;
// We need to visit the basic blocks in reverse post-order so that we visit
// defs before uses, in particular so that we don't accidentally mark an
@@ -341,6 +342,23 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
GlobalFlags |= StateWWM;
LowerToCopyInstrs.push_back(&MI);
continue;
+ } else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 ||
+ Opcode == AMDGPU::V_SET_INACTIVE_B64) {
+ III.Disabled = StateWWM;
+ MachineOperand &Inactive = MI.getOperand(2);
+ if (Inactive.isReg()) {
+ if (Inactive.isUndef()) {
+ LowerToCopyInstrs.push_back(&MI);
+ } else {
+ unsigned Reg = Inactive.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ for (MachineInstr &DefMI : MRI->def_instructions(Reg))
+ markInstruction(DefMI, StateWWM, Worklist);
+ }
+ }
+ }
+ SetInactiveInstrs.push_back(&MI);
+ continue;
} else if (TII->isDisableWQM(MI)) {
BBI.Needs |= StateExact;
if (!(BBI.InNeeds & StateExact)) {
@@ -380,6 +398,14 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
}
}
+ // Mark sure that any SET_INACTIVE instructions are computed in WQM if WQM is
+ // ever used anywhere in the function. This implements the corresponding
+ // semantics of @llvm.amdgcn.set.inactive.
+ if (GlobalFlags & StateWQM) {
+ for (MachineInstr *MI : SetInactiveInstrs)
+ markInstruction(*MI, StateWQM, Worklist);
+ }
+
return GlobalFlags;
}
@@ -799,8 +825,11 @@ void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
}
void SIWholeQuadMode::lowerCopyInstrs() {
- for (MachineInstr *MI : LowerToCopyInstrs)
+ for (MachineInstr *MI : LowerToCopyInstrs) {
+ for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--)
+ MI->RemoveOperand(i);
MI->setDesc(TII->get(AMDGPU::COPY));
+ }
}
bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
OpenPOWER on IntegriCloud