[AMDGPU] Assembler: support v_mac_f32 DPP and SDWA. Move getNamedOperandIdx to AMDGPUBaseInfo.h

Reviewers: artem.tamazov, tstellarAMD Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, tony-tye Differential Revision: https://reviews.llvm.org/D25084 llvm-svn: 283560
author: Sam Kolton <Sam.Kolton@amd.com> 2016-10-07 14:46:06 +0000
committer: Sam Kolton <Sam.Kolton@amd.com> 2016-10-07 14:46:06 +0000
commit: a3ec5c10e236fbc4ce421c0849cb8c37988fcd81 (patch)
tree: 0f904324503cdb5a6663590251295d86cde6d2b0 /llvm/lib
parent: 02f623e74c1e27e8488e49bdf2766d7178e89714 (diff)
download: bcm5719-llvm-a3ec5c10e236fbc4ce421c0849cb8c37988fcd81.tar.gz
bcm5719-llvm-a3ec5c10e236fbc4ce421c0849cb8c37988fcd81.zip
8 files changed, 139 insertions, 54 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 9a00ecb24eb..42c7b967f3e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -23,7 +23,6 @@
 using namespace llvm;
 
 #define GET_INSTRINFO_CTOR_DTOR
-#define GET_INSTRINFO_NAMED_OPS
 #define GET_INSTRMAP_INFO
 #include "AMDGPUGenInstrInfo.inc"
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 001e03748ab..de834f453a6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -17,10 +17,10 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
 
 #include "llvm/Target/TargetInstrInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #define GET_INSTRINFO_ENUM
-#define GET_INSTRINFO_OPERAND_ENUM
 #include "AMDGPUGenInstrInfo.inc"
 
 namespace llvm {
@@ -54,12 +54,6 @@ public:
   /// equivalent opcode that writes \p Channels Channels.
   int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const;
 };
-
-namespace AMDGPU {
-  LLVM_READONLY
-  int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
-}  // End namespace AMDGPU
-
 } // End llvm namespace
 
 #endif
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 76074dca283..cab545012c0 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1365,6 +1365,16 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
       getForcedEncodingSize() != 64)
     return Match_PreferE32;
 
+  if (Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
+      Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa) {
+    // v_mac_f32/16 allow only dst_sel == DWORD;
+    auto OpNum = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
+    const auto &Op = Inst.getOperand(OpNum);
+    if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
+      return Match_InvalidOperand;
+    }
+  }
+
   return Match_Success;
 }
 
@@ -2675,6 +2685,17 @@ void AMDGPUAsmParser::cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands)
   }
 }
 
+static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
+      // 1. This operand is input modifiers
+  return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
+      // 2. This is not last operand
+      && Desc.NumOperands > (OpNum + 1)
+      // 3. Next operand is register class
+      && Desc.OpInfo[OpNum + 1].RegClass != -1
+      // 4. Next register is not tied to any other operand
+      && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
+}
+
 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
   OptionalImmIndexMap OptionalIdx;
   unsigned I = 1;
@@ -2685,7 +2706,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
 
   for (unsigned E = Operands.size(); I != E; ++I) {
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
-    if (Desc.OpInfo[Inst.getNumOperands()].OperandType == AMDGPU::OPERAND_INPUT_MODS) {
+    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
     } else if (Op.isImm()) {
       OptionalIdx[Op.getImmTy()] = I;
@@ -2696,6 +2717,19 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
 
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
+
+  // special case v_mac_f32:
+  // it has src2 register operand that is tied to dst operand
+  // we don't allow modifiers for this operand in assembler so src2_modifiers
+  // should be 0
+  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_si ||
+      Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_vi) {
+    auto it = Inst.begin();
+    std::advance(it, AMDGPU::getNamedOperandIdx(AMDGPU::V_MAC_F32_e64, AMDGPU::OpName::src2_modifiers));
+    it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
+    ++it;
+    Inst.insert(it, Inst.getOperand(0)); // src2 = dst
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -2846,7 +2880,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
   for (unsigned E = Operands.size(); I != E; ++I) {
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
     // Add the register arguments
-    if (Desc.OpInfo[Inst.getNumOperands()].OperandType == AMDGPU::OPERAND_INPUT_MODS) {
+    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
     } else if (Op.isDPPCtrl()) {
       Op.addImmOperands(Inst, 1);
@@ -2861,6 +2895,14 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
+
+  // special case v_mac_f32:
+  // it has src2 register operand that is tied to dst operand
+  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_dpp) {
+    auto it = Inst.begin();
+    std::advance(it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
+    Inst.insert(it, Inst.getOperand(0)); // src2 = dst
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -2870,6 +2912,8 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
 AMDGPUAsmParser::OperandMatchResultTy
 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
                               AMDGPUOperand::ImmTy Type) {
+  using namespace llvm::AMDGPU::SDWA;
+
   SMLoc S = Parser.getTok().getLoc();
   StringRef Value;
   AMDGPUAsmParser::OperandMatchResultTy res;
@@ -2881,13 +2925,13 @@ AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
 
   int64_t Int;
   Int = StringSwitch<int64_t>(Value)
-        .Case("BYTE_0", 0)
-        .Case("BYTE_1", 1)
-        .Case("BYTE_2", 2)
-        .Case("BYTE_3", 3)
-        .Case("WORD_0", 4)
-        .Case("WORD_1", 5)
-        .Case("DWORD", 6)
+        .Case("BYTE_0", SdwaSel::BYTE_0)
+        .Case("BYTE_1", SdwaSel::BYTE_1)
+        .Case("BYTE_2", SdwaSel::BYTE_2)
+        .Case("BYTE_3", SdwaSel::BYTE_3)
+        .Case("WORD_0", SdwaSel::WORD_0)
+        .Case("WORD_1", SdwaSel::WORD_1)
+        .Case("DWORD", SdwaSel::DWORD)
         .Default(0xffffffff);
   Parser.Lex(); // eat last token
 
@@ -2901,6 +2945,8 @@ AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
 
 AMDGPUAsmParser::OperandMatchResultTy
 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
+  using namespace llvm::AMDGPU::SDWA;
+
   SMLoc S = Parser.getTok().getLoc();
   StringRef Value;
   AMDGPUAsmParser::OperandMatchResultTy res;
@@ -2912,9 +2958,9 @@ AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
 
   int64_t Int;
   Int = StringSwitch<int64_t>(Value)
-        .Case("UNUSED_PAD", 0)
-        .Case("UNUSED_SEXT", 1)
-        .Case("UNUSED_PRESERVE", 2)
+        .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
+        .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
+        .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
         .Default(0xffffffff);
   Parser.Lex(); // eat last token
 
@@ -2956,7 +3002,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
         Op.Reg.RegNo == AMDGPU::VCC) {
       // VOPC sdwa use "vcc" token as dst. Skip it.
       continue;
-    } else if (Desc.OpInfo[Inst.getNumOperands()].OperandType == AMDGPU::OPERAND_INPUT_MODS) {
+    } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
       Op.addRegOrImmWithInputModsOperands(Inst, 2);
     } else if (Op.isImm()) {
       // Handle optional arguments
@@ -2968,32 +3014,40 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
 
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
   
-  if (Inst.getOpcode() == AMDGPU::V_NOP_sdwa) {
+  if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa) {
     // V_NOP_sdwa has no optional sdwa arguments
-    return;
-  }
-  switch (BasicInstType) {
-  case SIInstrFlags::VOP1: {
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
-    break;
-  }
-  case SIInstrFlags::VOP2: {
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
-    break;
-  }
-  case SIInstrFlags::VOPC: {
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
-    break;
+    switch (BasicInstType) {
+    case SIInstrFlags::VOP1: {
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
+      break;
+    }
+    case SIInstrFlags::VOP2: {
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
+      break;
+    }
+    case SIInstrFlags::VOPC: {
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
+      break;
+    }
+    default:
+      llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
+    }
   }
-  default:
-    llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
+  
+  // special case v_mac_f32:
+  // it has src2 register operand that is tied to dst operand
+  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa) {
+    auto it = Inst.begin();
+    std::advance(it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
+    Inst.insert(it, Inst.getOperand(0)); // src2 = dst
   }
+
 }
 
 /// Force static initialization.
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 6cb1500a827..494b86714c5 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -531,15 +531,17 @@ void AMDGPUInstPrinter::printBoundCtrl(const MCInst *MI, unsigned OpNo,
 
 void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
                                      raw_ostream &O) {
+  using namespace llvm::AMDGPU::SDWA;
+
   unsigned Imm = MI->getOperand(OpNo).getImm();
   switch (Imm) {
-  case 0: O << "BYTE_0"; break;
-  case 1: O << "BYTE_1"; break;
-  case 2: O << "BYTE_2"; break;
-  case 3: O << "BYTE_3"; break;
-  case 4: O << "WORD_0"; break;
-  case 5: O << "WORD_1"; break;
-  case 6: O << "DWORD"; break;
+  case SdwaSel::BYTE_0: O << "BYTE_0"; break;
+  case SdwaSel::BYTE_1: O << "BYTE_1"; break;
+  case SdwaSel::BYTE_2: O << "BYTE_2"; break;
+  case SdwaSel::BYTE_3: O << "BYTE_3"; break;
+  case SdwaSel::WORD_0: O << "WORD_0"; break;
+  case SdwaSel::WORD_1: O << "WORD_1"; break;
+  case SdwaSel::DWORD: O << "DWORD"; break;
   default: llvm_unreachable("Invalid SDWA data select operand");
   }
 }
@@ -568,12 +570,14 @@ void AMDGPUInstPrinter::printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
 void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo,
                                            const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
+  using namespace llvm::AMDGPU::SDWA;
+
   O << "dst_unused:";
   unsigned Imm = MI->getOperand(OpNo).getImm();
   switch (Imm) {
-  case 0: O << "UNUSED_PAD"; break;
-  case 1: O << "UNUSED_SEXT"; break;
-  case 2: O << "UNUSED_PRESERVE"; break;
+  case DstUnused::UNUSED_PAD: O << "UNUSED_PAD"; break;
+  case DstUnused::UNUSED_SEXT: O << "UNUSED_SEXT"; break;
+  case DstUnused::UNUSED_PRESERVE: O << "UNUSED_PRESERVE"; break;
   default: llvm_unreachable("Invalid SDWA dest_unused operand");
   }
 }
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 643b8722d91..2e4d0ecc8ce 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -219,6 +219,26 @@ enum WidthMinusOne { // WidthMinusOne, (5) [15:11]
 };
 
 } // namespace Hwreg
+
+namespace SDWA {
+
+enum SdwaSel {
+  BYTE_0 = 0,
+  BYTE_1 = 1,
+  BYTE_2 = 2,
+  BYTE_3 = 3,
+  WORD_0 = 4,
+  WORD_1 = 5,
+  DWORD = 6,
+};
+
+enum DstUnused {
+  UNUSED_PAD = 0,
+  UNUSED_SEXT = 1,
+  UNUSED_PRESERVE = 2,
+};
+
+} // namespace SDWA
 } // namespace AMDGPU
 } // namespace llvm
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 9be0e298ca5..63c0c8851a7 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -27,6 +27,12 @@
 #include "AMDGPUGenRegisterInfo.inc"
 #undef GET_REGINFO_ENUM
 
+#define GET_INSTRINFO_NAMED_OPS
+#define GET_INSTRINFO_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+#undef GET_INSTRINFO_NAMED_OPS
+#undef GET_INSTRINFO_ENUM
+
 namespace llvm {
 namespace AMDGPU {
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 72fd797b06d..95788766d35 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -13,6 +13,10 @@
 #include "AMDKernelCodeT.h"
 #include "llvm/IR/CallingConv.h"
 
+#define GET_INSTRINFO_OPERAND_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+#undef GET_INSTRINFO_OPERAND_ENUM
+
 namespace llvm {
 
 class FeatureBitset;
@@ -26,6 +30,9 @@ class MCSubtargetInfo;
 
 namespace AMDGPU {
 
+LLVM_READONLY
+int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+
 struct IsaVersion {
   unsigned Major;
   unsigned Minor;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index b0f5d8f9a12..fc13382926d 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -165,6 +165,7 @@ def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
   let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, f32>.ret;
   let HasSrc2 = 0;
   let HasSrc2Mods = 0;
+  let HasExt = 1;
 }
 
 // Write out to vcc or arbitrary SGPR.
author	Sam Kolton <Sam.Kolton@amd.com>	2016-10-07 14:46:06 +0000
committer	Sam Kolton <Sam.Kolton@amd.com>	2016-10-07 14:46:06 +0000
commit	a3ec5c10e236fbc4ce421c0849cb8c37988fcd81 (patch)
tree	0f904324503cdb5a6663590251295d86cde6d2b0 /llvm/lib
parent	02f623e74c1e27e8488e49bdf2766d7178e89714 (diff)
download	bcm5719-llvm-a3ec5c10e236fbc4ce421c0849cb8c37988fcd81.tar.gz bcm5719-llvm-a3ec5c10e236fbc4ce421c0849cb8c37988fcd81.zip