summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp27
1 files changed, 22 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 600b80d8300..39f5df76797 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -156,6 +156,7 @@ private:
DenseMap<const MachineInstr *, InstrInfo> Instructions;
DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
SmallVector<MachineInstr *, 1> LiveMaskQueries;
+ SmallVector<MachineInstr *, 4> LowerToMovInstrs;
SmallVector<MachineInstr *, 4> LowerToCopyInstrs;
void printInfo();
@@ -352,7 +353,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
// inactive lanes.
markInstructionUses(MI, StateWWM, Worklist);
GlobalFlags |= StateWWM;
- LowerToCopyInstrs.push_back(&MI);
+ LowerToMovInstrs.push_back(&MI);
continue;
} else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 ||
Opcode == AMDGPU::V_SET_INACTIVE_B64) {
@@ -852,9 +853,8 @@ void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
}
void SIWholeQuadMode::lowerCopyInstrs() {
- for (MachineInstr *MI : LowerToCopyInstrs) {
- for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--)
- MI->RemoveOperand(i);
+ for (MachineInstr *MI : LowerToMovInstrs) {
+ assert(MI->getNumExplicitOperands() == 2);
const Register Reg = MI->getOperand(0).getReg();
@@ -872,6 +872,22 @@ void SIWholeQuadMode::lowerCopyInstrs() {
MI->setDesc(TII->get(AMDGPU::COPY));
}
}
+ for (MachineInstr *MI : LowerToCopyInstrs) {
+ if (MI->getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
+ MI->getOpcode() == AMDGPU::V_SET_INACTIVE_B64) {
+ assert(MI->getNumExplicitOperands() == 3);
+ // the only reason we should be here is V_SET_INACTIVE has
+ // an undef input so it is being replaced by a simple copy.
+ // There should be a second undef source that we should remove.
+ assert(MI->getOperand(2).isUndef());
+ MI->RemoveOperand(2);
+ MI->untieRegOperand(1);
+ } else {
+ assert(MI->getNumExplicitOperands() == 2);
+ }
+
+ MI->setDesc(TII->get(AMDGPU::COPY));
+ }
}
bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
@@ -879,6 +895,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
Blocks.clear();
LiveMaskQueries.clear();
LowerToCopyInstrs.clear();
+ LowerToMovInstrs.clear();
CallingConv = MF.getFunction().getCallingConv();
ST = &MF.getSubtarget<GCNSubtarget>();
@@ -893,7 +910,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
if (!(GlobalFlags & StateWQM)) {
lowerLiveMaskQueries(Exec);
- if (!(GlobalFlags & StateWWM) && LowerToCopyInstrs.empty())
+ if (!(GlobalFlags & StateWWM) && LowerToCopyInstrs.empty() && LowerToMovInstrs.empty())
return !LiveMaskQueries.empty();
} else {
// Store a copy of the original live mask when required
OpenPOWER on IntegriCloud