[AMDGPU] gfx1010 MIMG implementation

Differential Revision: https://reviews.llvm.org/D61339 llvm-svn: 359698
author: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> 2019-05-01 16:32:58 +0000
committer: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> 2019-05-01 16:32:58 +0000
commit: 692560dc986d90930887c0313249bc9fff798bf8 (patch)
tree: 53610f5e08260b53aa5d8e6a14c0254d33209ae2 /llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
parent: b3203ec078ca72919475093aca9fc19ca58dd272 (diff)
download: bcm5719-llvm-692560dc986d90930887c0313249bc9fff798bf8.tar.gz
bcm5719-llvm-692560dc986d90930887c0313249bc9fff798bf8.zip
1 files changed, 100 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index e589f60b583..0bcbcc8271e 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -38,6 +38,8 @@ class SIShrinkInstructions : public MachineFunctionPass {
 public:
   static char ID;
 
+  void shrinkMIMG(MachineInstr &MI);
+
 public:
   SIShrinkInstructions() : MachineFunctionPass(ID) {
   }
@@ -211,6 +213,96 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
   }
 }
 
+// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
+void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) {
+  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
+  if (Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
+    return;
+
+  MachineFunction *MF = MI.getParent()->getParent();
+  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+  int VAddr0Idx =
+      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
+  unsigned NewAddrDwords = Info->VAddrDwords;
+  const TargetRegisterClass *RC;
+
+  if (Info->VAddrDwords == 2) {
+    RC = &AMDGPU::VReg_64RegClass;
+  } else if (Info->VAddrDwords == 3) {
+    RC = &AMDGPU::VReg_96RegClass;
+  } else if (Info->VAddrDwords == 4) {
+    RC = &AMDGPU::VReg_128RegClass;
+  } else if (Info->VAddrDwords <= 8) {
+    RC = &AMDGPU::VReg_256RegClass;
+    NewAddrDwords = 8;
+  } else {
+    RC = &AMDGPU::VReg_512RegClass;
+    NewAddrDwords = 16;
+  }
+
+  unsigned VgprBase = 0;
+  bool IsUndef = true;
+  bool IsKill = NewAddrDwords == Info->VAddrDwords;
+  for (unsigned i = 0; i < Info->VAddrDwords; ++i) {
+    const MachineOperand &Op = MI.getOperand(VAddr0Idx + i);
+    unsigned Vgpr = TRI.getHWRegIndex(Op.getReg());
+
+    if (i == 0) {
+      VgprBase = Vgpr;
+    } else if (VgprBase + i != Vgpr)
+      return;
+
+    if (!Op.isUndef())
+      IsUndef = false;
+    if (!Op.isKill())
+      IsKill = false;
+  }
+
+  if (VgprBase + NewAddrDwords > 256)
+    return;
+
+  // Further check for implicit tied operands - this may be present if TFE is
+  // enabled
+  int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
+  int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
+  unsigned TFEVal = MI.getOperand(TFEIdx).getImm();
+  unsigned LWEVal = MI.getOperand(LWEIdx).getImm();
+  int ToUntie = -1;
+  if (TFEVal || LWEVal) {
+    // TFE/LWE is enabled so we need to deal with an implicit tied operand
+    for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) {
+      if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&
+          MI.getOperand(i).isImplicit()) {
+        // This is the tied operand
+        assert(
+            ToUntie == -1 &&
+            "found more than one tied implicit operand when expecting only 1");
+        ToUntie = i;
+        MI.untieRegOperand(ToUntie);
+      }
+    }
+  }
+
+  unsigned NewOpcode =
+      AMDGPU::getMIMGOpcode(Info->BaseOpcode, AMDGPU::MIMGEncGfx10Default,
+                            Info->VDataDwords, NewAddrDwords);
+  MI.setDesc(TII->get(NewOpcode));
+  MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));
+  MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
+  MI.getOperand(VAddr0Idx).setIsKill(IsKill);
+
+  for (unsigned i = 1; i < Info->VAddrDwords; ++i)
+    MI.RemoveOperand(VAddr0Idx + 1);
+
+  if (ToUntie >= 0) {
+    MI.tieOperands(
+        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
+        ToUntie - (Info->VAddrDwords - 1));
+  }
+}
+
 /// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
 /// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
 /// If the inverse of the immediate is legal, use ANDN2, ORN2 or
@@ -597,6 +689,14 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
           continue;
       }
 
+      if (TII->isMIMG(MI.getOpcode()) &&
+          ST.getGeneration() >= AMDGPUSubtarget::GFX10 &&
+          MF.getProperties().hasProperty(
+              MachineFunctionProperties::Property::NoVRegs)) {
+        shrinkMIMG(MI);
+        continue;
+      }
+
       if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
         continue;
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	2019-05-01 16:32:58 +0000
committer	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	2019-05-01 16:32:58 +0000
commit	692560dc986d90930887c0313249bc9fff798bf8 (patch)
tree	53610f5e08260b53aa5d8e6a14c0254d33209ae2 /llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
parent	b3203ec078ca72919475093aca9fc19ca58dd272 (diff)
download	bcm5719-llvm-692560dc986d90930887c0313249bc9fff798bf8.tar.gz bcm5719-llvm-692560dc986d90930887c0313249bc9fff798bf8.zip