AMDGPU: Partially fix disassembly of MIMG instructions

Stores failed to decode at all since they didn't have a DecoderNamespace set. Loads worked, but did not change the register width displayed to match the numbmer of enabled channels. The number of printed registers for vaddr is still wrong, but I don't think that's encoded in the instruction so there's not much we can do about that. Image atomics are still broken. MIMG is the same encoding for SI/VI, but the image atomic classes are split up into encoding specific versions unlike every other MIMG instruction. They have isAsmParserOnly set on them for some reason. dmask is also special for these, so we probably should not have it as an explicit operand as it is now. llvm-svn: 320614
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-12-13 21:07:51 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-12-13 21:07:51 +0000
commit: cad7fa857cf6d9949f25dcc5bc1f313f7ec2b0b6 (patch)
tree: f908e4b0cdd17b3597b031b8b96e823f454695f1 /llvm/lib
parent: a9f77c6df755841ba1ae2b5c56d0ca4d53907433 (diff)
download: bcm5719-llvm-cad7fa857cf6d9949f25dcc5bc1f313f7ec2b0b6.tar.gz
bcm5719-llvm-cad7fa857cf6d9949f25dcc5bc1f313f7ec2b0b6.zip
8 files changed, 128 insertions, 78 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 49447862b60..8156599528c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -23,7 +23,6 @@
 using namespace llvm;
 
 #define GET_INSTRINFO_CTOR_DTOR
-#define GET_INSTRMAP_INFO
 #include "AMDGPUGenInstrInfo.inc"
 
 // Pin the vtable to this file.
@@ -56,59 +55,6 @@ bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
   return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
 }
 
-static AMDGPU::Channels indexToChannel(unsigned Channel) {
-  switch (Channel) {
-  case 1:
-    return AMDGPU::Channels_1;
-  case 2:
-    return AMDGPU::Channels_2;
-  case 3:
-    return AMDGPU::Channels_3;
-  case 4:
-    return AMDGPU::Channels_4;
-  default:
-    llvm_unreachable("invalid MIMG channel");
-  }
-}
-
-// FIXME: Need to handle d16 images correctly.
-static unsigned rcToChannels(unsigned RCID) {
-  switch (RCID) {
-  case AMDGPU::VGPR_32RegClassID:
-    return 1;
-  case AMDGPU::VReg_64RegClassID:
-    return 2;
-  case AMDGPU::VReg_96RegClassID:
-    return 3;
-  case AMDGPU::VReg_128RegClassID:
-    return 4;
-  default:
-    llvm_unreachable("invalid MIMG register class");
-  }
-}
-
-int AMDGPUInstrInfo::getMaskedMIMGOp(unsigned Opc,
-                                     unsigned NewChannels) const {
-  AMDGPU::Channels Channel = indexToChannel(NewChannels);
-  unsigned OrigChannels = rcToChannels(get(Opc).OpInfo[0].RegClass);
-  if (NewChannels == OrigChannels)
-    return Opc;
-
-  switch (OrigChannels) {
-  case 1:
-    return AMDGPU::getMaskedMIMGOp1(Opc, Channel);
-  case 2:
-    return AMDGPU::getMaskedMIMGOp2(Opc, Channel);
-  case 3:
-    return AMDGPU::getMaskedMIMGOp3(Opc, Channel);
-  case 4:
-    return AMDGPU::getMaskedMIMGOp4(Opc, Channel);
-  default:
-    llvm_unreachable("invalid MIMG channel");
-  }
-}
-
-
 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
 enum SIEncodingFamily {
   SI = 0,
@@ -118,17 +64,6 @@ enum SIEncodingFamily {
   GFX9 = 4
 };
 
-// Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
-// header files, so we need to wrap it in a function that takes unsigned
-// instead.
-namespace llvm {
-namespace AMDGPU {
-static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
-  return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
-}
-}
-}
-
 static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
   switch (ST.getGeneration()) {
   case AMDGPUSubtarget::SOUTHERN_ISLANDS:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 4ab0515d5ca..a9fcd483463 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -22,6 +22,7 @@
 
 #define GET_INSTRINFO_HEADER
 #include "AMDGPUGenInstrInfo.inc"
+#undef GET_INSTRINFO_HEADER
 
 namespace llvm {
 
@@ -49,10 +50,6 @@ public:
   /// Return -1 if the target-specific opcode for the pseudo instruction does
   /// not exist. If Opcode is not a pseudo instruction, this is identity.
   int pseudoToMCOpcode(int Opcode) const;
-
-  /// \brief Given a MIMG \p MI that writes any number of channels, return the
-  /// equivalent opcode that writes \p NewChannels Channels.
-  int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) const;
 };
 } // End llvm namespace
 
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index a33670c6403..4a3f2c97517 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -234,6 +234,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                          AMDGPU::OpName::src2_modifiers);
   }
 
+  if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
+    Res = convertMIMGInst(MI);
+  }
+
   if (Res && IsSDWA)
     Res = convertSDWAInst(MI);
 
@@ -260,6 +264,42 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
   return MCDisassembler::Success;
 }
 
+DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
+  int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+                                            AMDGPU::OpName::vdata);
+
+  int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+                                            AMDGPU::OpName::dmask);
+  unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
+  if (DMask == 0)
+    return MCDisassembler::Success;
+
+  unsigned ChannelCount = countPopulation(DMask);
+  if (ChannelCount == 1)
+    return MCDisassembler::Success;
+
+  int NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), ChannelCount);
+  assert(NewOpcode != -1 && "could not find matching mimg channel instruction");
+  auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
+
+  // Widen the register to the correct number of enabled channels.
+  unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
+  auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
+                                          &MRI.getRegClass(RCID));
+  if (NewVdata == AMDGPU::NoRegister) {
+    // It's possible to encode this such that the low register + enabled
+    // components exceeds the register count.
+    return MCDisassembler::Success;
+  }
+
+  MI.setOpcode(NewOpcode);
+  // vaddr will be always appear as a single VGPR. This will look different than
+  // how it is usually emitted because the number of register components is not
+  // in the instruction encoding.
+  MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
+  return MCDisassembler::Success;
+}
+
 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
   return getContext().getRegisterInfo()->
     getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
@@ -786,7 +826,7 @@ static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
 static MCDisassembler *createAMDGPUDisassembler(const Target &T,
                                                 const MCSubtargetInfo &STI,
                                                 MCContext &Ctx) {
-  return new AMDGPUDisassembler(STI, Ctx);
+  return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
 }
 
 extern "C" void LLVMInitializeAMDGPUDisassembler() {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 18a91356d20..ce396eb68c4 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -17,16 +17,18 @@
 #define LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
 #include "llvm/MC/MCDisassembler/MCSymbolizer.h"
+
 #include <algorithm>
 #include <cstdint>
 #include <memory>
 
 namespace llvm {
 
-class MCContext;
 class MCInst;
 class MCOperand;
 class MCSubtargetInfo;
@@ -38,13 +40,16 @@ class Twine;
 
 class AMDGPUDisassembler : public MCDisassembler {
 private:
+  std::unique_ptr<MCInstrInfo const> const MCII;
+  const MCRegisterInfo &MRI;
   mutable ArrayRef<uint8_t> Bytes;
   mutable uint32_t Literal;
   mutable bool HasLiteral;
 
 public:
-  AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
-    MCDisassembler(STI, Ctx) {}
+  AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+                     MCInstrInfo const *MCII) :
+    MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()) {}
 
   ~AMDGPUDisassembler() override = default;
 
@@ -64,6 +69,7 @@ public:
                              uint64_t Address) const;
 
   DecodeStatus convertSDWAInst(MCInst &MI) const;
+  DecodeStatus convertMIMGInst(MCInst &MI) const;
 
   MCOperand decodeOperand_VGPR_32(unsigned Val) const;
   MCOperand decodeOperand_VS_32(unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 9622bccddf9..30a2df51038 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -63,13 +63,13 @@ multiclass MIMG_NoSampler <bits<7> op, string asm> {
 
 class MIMG_Store_Helper <bits<7> op, string asm,
                          RegisterClass data_rc,
-                         RegisterClass addr_rc> : MIMG_Helper <
+                         RegisterClass addr_rc,
+                         string dns = ""> : MIMG_Helper <
   (outs),
   (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
        dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc,
        r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
-  asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
-     >, MIMGe<op> {
+  asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da", dns>, MIMGe<op> {
   let ssamp = 0;
   let mayLoad = 1; // TableGen requires this for matching with the intrinsics
   let mayStore = 1;
@@ -81,7 +81,8 @@ class MIMG_Store_Helper <bits<7> op, string asm,
 multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
                                   RegisterClass data_rc,
                                   int channels> {
-  def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32>,
+  def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32,
+                               !if(!eq(channels, 1), "AMDGPU", "")>,
             MIMG_Mask<asm#"_V1", channels>;
   def _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>,
             MIMG_Mask<asm#"_V2", channels>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 92087623cec..d0262105ae8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6652,7 +6652,8 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
   unsigned BitsSet = countPopulation(NewDmask);
 
   const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
-  int NewOpcode = TII->getMaskedMIMGOp(Node->getMachineOpcode(), BitsSet);
+  int NewOpcode = AMDGPU::getMaskedMIMGOp(*TII,
+                                          Node->getMachineOpcode(), BitsSet);
   assert(NewOpcode != -1 &&
          NewOpcode != static_cast<int>(Node->getMachineOpcode()) &&
          "failed to find equivalent MIMG op");
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 5a59e04c849..819a7add0be 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -39,7 +40,9 @@
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 
 #define GET_INSTRINFO_NAMED_OPS
+#define GET_INSTRMAP_INFO
 #include "AMDGPUGenInstrInfo.inc"
+#undef GET_INSTRMAP_INFO
 #undef GET_INSTRINFO_NAMED_OPS
 
 namespace {
@@ -100,6 +103,66 @@ static cl::opt<bool> EnablePackedInlinableLiterals(
 
 namespace AMDGPU {
 
+LLVM_READNONE
+static inline Channels indexToChannel(unsigned Channel) {
+  switch (Channel) {
+  case 1:
+    return AMDGPU::Channels_1;
+  case 2:
+    return AMDGPU::Channels_2;
+  case 3:
+    return AMDGPU::Channels_3;
+  case 4:
+    return AMDGPU::Channels_4;
+  default:
+    llvm_unreachable("invalid MIMG channel");
+  }
+}
+
+
+// FIXME: Need to handle d16 images correctly.
+static unsigned rcToChannels(unsigned RCID) {
+  switch (RCID) {
+  case AMDGPU::VGPR_32RegClassID:
+    return 1;
+  case AMDGPU::VReg_64RegClassID:
+    return 2;
+  case AMDGPU::VReg_96RegClassID:
+    return 3;
+  case AMDGPU::VReg_128RegClassID:
+    return 4;
+  default:
+    llvm_unreachable("invalid MIMG register class");
+  }
+}
+
+int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) {
+  AMDGPU::Channels Channel = AMDGPU::indexToChannel(NewChannels);
+  unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass);
+  if (NewChannels == OrigChannels)
+    return Opc;
+
+  switch (OrigChannels) {
+  case 1:
+    return AMDGPU::getMaskedMIMGOp1(Opc, Channel);
+  case 2:
+    return AMDGPU::getMaskedMIMGOp2(Opc, Channel);
+  case 3:
+    return AMDGPU::getMaskedMIMGOp3(Opc, Channel);
+  case 4:
+    return AMDGPU::getMaskedMIMGOp4(Opc, Channel);
+  default:
+    llvm_unreachable("invalid MIMG channel");
+  }
+}
+
+// Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
+// header files, so we need to wrap it in a function that takes unsigned
+// instead.
+int getMCOpcode(uint16_t Opcode, unsigned Gen) {
+  return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
+}
+
 namespace IsaInfo {
 
 IsaVersion getIsaVersion(const FeatureBitset &Features) {
@@ -833,6 +896,7 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
   return isGCN3Encoding(ST) ?
     isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
 }
+
 } // end namespace AMDGPU
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index c4b7779514f..a215b445378 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -156,6 +156,12 @@ unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
 LLVM_READONLY
 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
 
+LLVM_READONLY
+int getMaskedMIMGOp(const MCInstrInfo &MII,
+                    unsigned Opc, unsigned NewChannels);
+LLVM_READONLY
+int getMCOpcode(uint16_t Opcode, unsigned Gen);
+
 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
                                const FeatureBitset &Features);
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-12-13 21:07:51 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-12-13 21:07:51 +0000
commit	cad7fa857cf6d9949f25dcc5bc1f313f7ec2b0b6 (patch)
tree	f908e4b0cdd17b3597b031b8b96e823f454695f1 /llvm/lib
parent	a9f77c6df755841ba1ae2b5c56d0ca4d53907433 (diff)
download	bcm5719-llvm-cad7fa857cf6d9949f25dcc5bc1f313f7ec2b0b6.tar.gz bcm5719-llvm-cad7fa857cf6d9949f25dcc5bc1f313f7ec2b0b6.zip