summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
diff options
context:
space:
mode:
authorConnor Abbott <cwabbott0@gmail.com>2017-08-04 18:36:54 +0000
committerConnor Abbott <cwabbott0@gmail.com>2017-08-04 18:36:54 +0000
commit66b9bd6e503bdd87616ab04509ffb3f70220c281 (patch)
treeef831c300d054fcfca5e13142fa969db41c3fbf5 /llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
parent92638ab62559ce082c02680efca7280344ca05dd (diff)
downloadbcm5719-llvm-66b9bd6e503bdd87616ab04509ffb3f70220c281.tar.gz
bcm5719-llvm-66b9bd6e503bdd87616ab04509ffb3f70220c281.zip
[AMDGPU] Implement llvm.amdgcn.set.inactive intrinsic
Summary: This intrinsic lets us set inactive lanes to an identity value when implementing wavefront reductions. In combination with Whole Wavefront Mode, it lets inactive lanes be skipped over as required by GLSL/Vulkan. Lowering the intrinsic needs to happen post-RA so that RA knows that the destination isn't completely overwritten due to the EXEC shenanigans, so we need another pseudo-instruction to represent the un-lowered intrinsic. Reviewers: tstellar, arsenm Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye Differential Revision: https://reviews.llvm.org/D34719 llvm-svn: 310088
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp22
1 files changed, 22 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0b0d0388031..e9492c4cf9c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1099,6 +1099,28 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.eraseFromParent();
break;
}
+ case AMDGPU::V_SET_INACTIVE_B32: {
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
+ .add(MI.getOperand(2));
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC);
+ MI.eraseFromParent();
+ break;
+ }
+ case AMDGPU::V_SET_INACTIVE_B64: {
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC);
+ MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(2));
+ expandPostRAPseudo(*Copy);
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC);
+ MI.eraseFromParent();
+ break;
+ }
case AMDGPU::V_MOVRELD_B32_V1:
case AMDGPU::V_MOVRELD_B32_V2:
case AMDGPU::V_MOVRELD_B32_V4:
OpenPOWER on IntegriCloud