[AMDGPU][MC][GFX10] Enabled v_movrel*[sdwa|dpp|dpp8] opcodes

See https://bugs.llvm.org/show_bug.cgi?id=43712 Reviewers: arsenm, rampitec Differential Revision: https://reviews.llvm.org/D70170
author: Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com> 2019-11-18 17:23:40 +0300
committer: Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com> 2019-11-18 17:23:40 +0300
commit: edd9f701638e28c4419658c1daed25ea0c6e8841 (patch)
tree: e315e05517151074f0a88f6fc80b62d9419954b6 /llvm/lib
parent: b622ff39c0c482494a7400ac0256b543025cd449 (diff)
download: bcm5719-llvm-edd9f701638e28c4419658c1daed25ea0c6e8841.tar.gz
bcm5719-llvm-edd9f701638e28c4419658c1daed25ea0c6e8841.zip
2 files changed, 62 insertions, 41 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 9dd511fab57..1f0f9f238fb 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1320,6 +1320,7 @@ private:
   bool validateIntClampSupported(const MCInst &Inst);
   bool validateMIMGAtomicDMask(const MCInst &Inst);
   bool validateMIMGGatherDMask(const MCInst &Inst);
+  bool validateMovrels(const MCInst &Inst);
   bool validateMIMGDataSize(const MCInst &Inst);
   bool validateMIMGAddrSize(const MCInst &Inst);
   bool validateMIMGD16(const MCInst &Inst);
@@ -3049,6 +3050,41 @@ bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
 }
 
+static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
+{
+  switch (Opcode) {
+  case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
+  case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
+  case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
+    return true;
+  default:
+    return false;
+  }
+}
+
+// movrels* opcodes should only allow VGPRS as src0.
+// This is specified in .td description for vop1/vop3,
+// but sdwa is handled differently. See isSDWAOperand.
+bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
+
+  const unsigned Opc = Inst.getOpcode();
+  const MCInstrDesc &Desc = MII.get(Opc);
+
+  if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
+    return true;
+
+  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+  assert(Src0Idx != -1);
+
+  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
+  if (!Src0.isReg())
+    return false;
+
+  auto Reg = Src0.getReg();
+  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+  return !isSGPR(mc2PseudoReg(Reg), TRI);
+}
+
 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
 
   const unsigned Opc = Inst.getOpcode();
@@ -3469,6 +3505,10 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
       "invalid image_gather dmask: only one bit must be set");
     return false;
   }
+  if (!validateMovrels(Inst)) {
+    Error(IDLoc, "source operand must be a VGPR");
+    return false;
+  }
   if (!validateFlatOffset(Inst, Operands)) {
     return false;
   }
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index a871aba40d4..2d8f488168c 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -260,14 +260,9 @@ defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
 }
 
 // Restrict src0 to be VGPR
-def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> {
+def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> {
   let Src0RC32 = VRegSrc_32;
   let Src0RC64 = VRegSrc_32;
-
-  let HasExt = 0;
-  let HasExtDPP = 0;
-  let HasExtSDWA = 0;
-  let HasExtSDWA9 = 0;
 }
 
 // Special case because there are no true output operands.  Hack vdst
@@ -281,26 +276,24 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
   let Outs = (outs);
   let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0);
   let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0);
-  let InsDPP = (ins DstRC:$vdst, DstRC:$old, Src0RC32:$src0,
-                    dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
-                    bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
-  let InsDPP16 = !con(InsDPP, (ins FI:$fi));
+  let Asm32 = getAsm32<1, 1>.ret;
+  let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret;
 
-  let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
-                     clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused,
+  let OutsSDWA = (outs Src0RC32:$vdst);
+  let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
+                     clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
                      src0_sel:$src0_sel);
+  let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
 
-  let Asm32 = getAsm32<1, 1>.ret;
-  let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret;
-  let AsmDPP = getAsmDPP<1, 1, 0>.ret;
+  let OutsDPP = (outs Src0RC32:$vdst);
+  let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0,
+                      dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+                      bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi);
   let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret;
-  let AsmSDWA = getAsmSDWA<1, 1>.ret;
-  let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
 
-  let HasExt = 0;
-  let HasExtDPP = 0;
-  let HasExtSDWA = 0;
-  let HasExtSDWA9 = 0;
+  let OutsDPP8 = (outs Src0RC32:$vdst);
+  let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi);
+  let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret;
 
   let HasDst = 0;
   let EmitDst = 1; // force vdst emission
@@ -310,14 +303,14 @@ def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>;
 def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32>;
 
 let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in {
-// v_movreld_b32 is a special case because the destination output
+ // v_movreld_b32 is a special case because the destination output
  // register is really a source. It isn't actually read (but may be
  // written), and is only to provide the base register to start
  // indexing from. Tablegen seems to not let you define an implicit
  // virtual register output for the super register being written into,
  // so this must have an implicit def of the register added to it.
 defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>;
-defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>;
+defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>;
 defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>;
 } // End Uses = [M0, EXEC]
 
@@ -528,16 +521,10 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
   }
 } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
 
-multiclass VOP1_Real_gfx10_no_dpp<bits<9> op> :
-  VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>,
-  VOP1_Real_sdwa_gfx10<op>;
-
-multiclass VOP1_Real_gfx10_no_dpp8<bits<9> op> :
-  VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>,
-  VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>;
-
 multiclass VOP1_Real_gfx10<bits<9> op> :
-  VOP1_Real_gfx10_no_dpp8<op>, VOP1_Real_dpp8_gfx10<op>;
+  VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>,
+  VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>,
+  VOP1_Real_dpp8_gfx10<op>;
 
 defm V_PIPEFLUSH         : VOP1_Real_gfx10<0x01b>;
 defm V_MOVRELSD_2_B32    : VOP1_Real_gfx10<0x048>;
@@ -620,12 +607,6 @@ multiclass VOP1_Real_gfx6_gfx7<bits<9> op> :
 multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> :
   VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>;
 
-multiclass VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<bits<9> op> :
-  VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10_no_dpp8<op>;
-
-multiclass VOP1_Real_gfx6_gfx7_gfx10_no_dpp<bits<9> op> :
-  VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10_no_dpp<op>;
-
 defm V_LOG_CLAMP_F32  : VOP1_Real_gfx6_gfx7<0x026>;
 defm V_RCP_CLAMP_F32  : VOP1_Real_gfx6_gfx7<0x028>;
 defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>;
@@ -683,9 +664,9 @@ defm V_FRACT_F64         : VOP1_Real_gfx6_gfx7_gfx10<0x03e>;
 defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x03f>;
 defm V_FREXP_MANT_F32    : VOP1_Real_gfx6_gfx7_gfx10<0x040>;
 defm V_CLREXCP           : VOP1_Real_gfx6_gfx7_gfx10<0x041>;
-defm V_MOVRELD_B32       : VOP1_Real_gfx6_gfx7_gfx10_no_dpp<0x042>;
-defm V_MOVRELS_B32       : VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<0x043>;
-defm V_MOVRELSD_B32      : VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<0x044>;
+defm V_MOVRELD_B32       : VOP1_Real_gfx6_gfx7_gfx10<0x042>;
+defm V_MOVRELS_B32       : VOP1_Real_gfx6_gfx7_gfx10<0x043>;
+defm V_MOVRELSD_B32      : VOP1_Real_gfx6_gfx7_gfx10<0x044>;
 
 //===----------------------------------------------------------------------===//
 // GFX8, GFX9 (VI).
author	Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com>	2019-11-18 17:23:40 +0300
committer	Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com>	2019-11-18 17:23:40 +0300
commit	edd9f701638e28c4419658c1daed25ea0c6e8841 (patch)
tree	e315e05517151074f0a88f6fc80b62d9419954b6 /llvm/lib
parent	b622ff39c0c482494a7400ac0256b543025cd449 (diff)
download	bcm5719-llvm-edd9f701638e28c4419658c1daed25ea0c6e8841.tar.gz bcm5719-llvm-edd9f701638e28c4419658c1daed25ea0c6e8841.zip