[AMDGPU] Add an llvm.amdgcn.wqm intrinsic for WQM

Summary: Previously, we assumed that certain types of instructions needed WQM in pixel shaders, particularly DS instructions and image sampling instructions. This was ok because with OpenGL, the assumption was correct. But we want to start using DPP instructions for derivatives as well as other things, so the assumption that we can infer whether to use WQM based on the instruction won't continue to hold. This intrinsic lets frontends like Mesa indicate what things need WQM based on their knowledge of the API, rather than second-guessing them in the backend. We need to keep around the old method of enabling WQM, but eventually we should remove it once Mesa catches up. For now, this will let us use DPP instructions for computing derivatives correctly. Reviewers: arsenm, tpr, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, dstuttard, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D35167 llvm-svn: 310085
author: Connor Abbott <cwabbott0@gmail.com> 2017-08-04 18:36:49 +0000
committer: Connor Abbott <cwabbott0@gmail.com> 2017-08-04 18:36:49 +0000
commit: 8c217d0a295999583de52377afa1aa4ef4a3ebb4 (patch)
tree: 8850d9668c02ecb9b197aee948860d3606f69a59 /llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
parent: 3bab91332fcb139d8c15c6d6c5a81a06cde7a7d2 (diff)
download: bcm5719-llvm-8c217d0a295999583de52377afa1aa4ef4a3ebb4.tar.gz
bcm5719-llvm-8c217d0a295999583de52377afa1aa4ef4a3ebb4.zip
1 files changed, 15 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index a613a220e29..62e1b7e84c4 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -136,6 +136,7 @@ private:
   DenseMap<const MachineInstr *, InstrInfo> Instructions;
   DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
   SmallVector<MachineInstr *, 1> LiveMaskQueries;
+  SmallVector<MachineInstr *, 4> LowerToCopyInstrs;
 
   void printInfo();
 
@@ -162,6 +163,7 @@ private:
   void processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry);
 
   void lowerLiveMaskQueries(unsigned LiveMaskReg);
+  void lowerCopyInstrs();
 
 public:
   static char ID;
@@ -294,6 +296,11 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
         markUsesWQM(MI, Worklist);
         GlobalFlags |= StateWQM;
         continue;
+      } else if (Opcode == AMDGPU::WQM) {
+        // The WQM intrinsic requires its output to have all the helper lanes
+        // correct, so we need it to be in WQM.
+        Flags = StateWQM;
+        LowerToCopyInstrs.push_back(&MI);
       } else if (TII->isDisableWQM(MI)) {
         Flags = StateExact;
       } else {
@@ -666,6 +673,11 @@ void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
   }
 }
 
+void SIWholeQuadMode::lowerCopyInstrs() {
+  for (MachineInstr *MI : LowerToCopyInstrs)
+    MI->setDesc(TII->get(AMDGPU::COPY));
+}
+
 bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
   if (MF.getFunction()->getCallingConv() != CallingConv::AMDGPU_PS)
     return false;
@@ -673,6 +685,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
   Instructions.clear();
   Blocks.clear();
   LiveMaskQueries.clear();
+  LowerToCopyInstrs.clear();
 
   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
 
@@ -708,6 +721,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
           .addReg(AMDGPU::EXEC);
 
       lowerLiveMaskQueries(LiveMaskReg);
+      lowerCopyInstrs();
       // EntryMI may become invalid here
       return true;
     }
@@ -716,6 +730,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(printInfo());
 
   lowerLiveMaskQueries(LiveMaskReg);
+  lowerCopyInstrs();
 
   // Handle the general case
   for (auto BII : Blocks)
author	Connor Abbott <cwabbott0@gmail.com>	2017-08-04 18:36:49 +0000
committer	Connor Abbott <cwabbott0@gmail.com>	2017-08-04 18:36:49 +0000
commit	8c217d0a295999583de52377afa1aa4ef4a3ebb4 (patch)
tree	8850d9668c02ecb9b197aee948860d3606f69a59 /llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
parent	3bab91332fcb139d8c15c6d6c5a81a06cde7a7d2 (diff)
download	bcm5719-llvm-8c217d0a295999583de52377afa1aa4ef4a3ebb4.tar.gz bcm5719-llvm-8c217d0a295999583de52377afa1aa4ef4a3ebb4.zip