summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2017-08-01 23:44:35 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2017-08-01 23:44:35 +0000
commitda0edef1bd7b2526faebdaa1aa9924a70f91aa66 (patch)
tree5f8d87c2f8dfd248106ceb6e5e4f20ba62229eac /llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
parent4f345060dd2ca75316b8b657d02c2129ab85d1a3 (diff)
downloadbcm5719-llvm-da0edef1bd7b2526faebdaa1aa9924a70f91aa66.tar.gz
bcm5719-llvm-da0edef1bd7b2526faebdaa1aa9924a70f91aa66.zip
[AMDGPU] Turn s_and_saveexec_b64 into s_and_b64 if result is unused
With SI_END_CF elimination for some nested control flow we can now eliminate saved exec register completely by turning a saveexec version of instruction into just a logical instruction. Differential Revision: https://reviews.llvm.org/D36007 llvm-svn: 309766
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp42
1 files changed, 41 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index 4d2f917278e..46e58a2ca5f 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -87,6 +87,30 @@ static unsigned isCopyToExec(const MachineInstr &MI) {
return AMDGPU::NoRegister;
}
+/// If \p MI is a logical operation on an exec value,
+/// return the register copied to.
+static unsigned isLogicalOpOnExec(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::S_AND_B64:
+ case AMDGPU::S_OR_B64:
+ case AMDGPU::S_XOR_B64:
+ case AMDGPU::S_ANDN2_B64:
+ case AMDGPU::S_ORN2_B64:
+ case AMDGPU::S_NAND_B64:
+ case AMDGPU::S_NOR_B64:
+ case AMDGPU::S_XNOR_B64: {
+ const MachineOperand &Src1 = MI.getOperand(1);
+ if (Src1.isReg() && Src1.getReg() == AMDGPU::EXEC)
+ return MI.getOperand(0).getReg();
+ const MachineOperand &Src2 = MI.getOperand(2);
+ if (Src2.isReg() && Src2.getReg() == AMDGPU::EXEC)
+ return MI.getOperand(0).getReg();
+ }
+ }
+
+ return AMDGPU::NoRegister;
+}
+
static unsigned getSaveExecOp(unsigned Opc) {
switch (Opc) {
case AMDGPU::S_AND_B64:
@@ -209,8 +233,24 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
// Scan backwards to find the def.
auto CopyToExecInst = &*I;
auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec);
- if (CopyFromExecInst == E)
+ if (CopyFromExecInst == E) {
+ auto PrepareExecInst = std::next(I);
+ if (PrepareExecInst == E)
+ continue;
+ // Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec
+ if (CopyToExecInst->getOperand(1).isKill() &&
+ isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) {
+ DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst);
+
+ PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC);
+
+ DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');
+
+ CopyToExecInst->eraseFromParent();
+ }
+
continue;
+ }
if (isLiveOut(MBB, CopyToExec)) {
// The copied register is live out and has a second use in another block.
OpenPOWER on IntegriCloud