From b4a647449fa01bd4e29bce5afef51770cddec664 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 8 Jan 2020 12:53:15 -0500
Subject: TableGen/GlobalISel: Add way for SDNodeXForm to work on timm

The current implementation assumes there is an instruction associated
with the transform, but this is not the case for
timm/TargetConstant/immarg values. These transforms should directly
operate on a specific MachineOperand in the source
instruction. TableGen would assert if you attempted to define an
equivalent GISDNodeXFormEquiv using timm when it failed to find the
instruction matcher.

Specially recognize SDNodeXForms on timm, and pass the operand index
to the render function.

Ideally this would be a separate render function type that looks like
void renderFoo(MachineInstrBuilder, const MachineOperand&), but this
proved to be somewhat mechanically painful. Add an optional operand
index which will only be passed if the transform should only look at
the one source operand.

Theoretically it would also be possible to only ever pass the
MachineOperand, and the existing renderers would check the parent. I
think that would be somewhat ugly for the standard usage which may
want to inspect other operands, and I also think MachineOperand should
eventually not carry a pointer to the parent instruction.

Use it in one sample pattern. This isn't a great example, since the
transform exists to satisfy DAG type constraints. This could also be
avoided by just changing the MachineInstr's arbitrary choice of
operand type from i16 to i32. Other patterns have nontrivial uses, but
this serves as the simplest example.

One flaw this still has is if you try to use an SDNodeXForm defined
for imm, but the source pattern uses timm, you still see the "Failed
to lookup instruction" assert. However, there is now a way to avoid
it.
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td              |  3 +++
 .../Target/AMDGPU/AMDGPUInstructionSelector.cpp    | 31 +++++++++++++++++-----
 llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 19 +++++++------
 llvm/lib/Target/AMDGPU/DSInstructions.td           |  2 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.td              |  4 +++
 5 files changed, 43 insertions(+), 16 deletions(-)

(limited to 'llvm/lib/Target/AMDGPU')
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index f9983693a99..d420aa02ac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -209,6 +209,9 @@ foreach Ty = [i64, p0, p1, p4] in {
 def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">,
   GISDNodeXFormEquiv<as_i32timm>;
 
+def gi_as_i16timm : GICustomOperandRenderer<"renderTruncTImm">,
+  GISDNodeXFormEquiv<as_i16timm>;
+
 def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">,
   GISDNodeXFormEquiv<NegateImm>;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 132c51c9e08..72ccf0df4f3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2106,21 +2106,28 @@ AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
 }
 
 void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
-                                                 const MachineInstr &MI) const {
-  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+                                                 const MachineInstr &MI,
+                                                 int OpIdx) const {
+  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+         "Expected G_CONSTANT");
   Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), *MRI);
   assert(CstVal && "Expected constant value");
   MIB.addImm(CstVal.getValue());
 }
 
 void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB,
-                                                const MachineInstr &MI) const {
-  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+                                                const MachineInstr &MI,
+                                                int OpIdx) const {
+  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+         "Expected G_CONSTANT");
   MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue());
 }
 
 void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB,
-                                                 const MachineInstr &MI) const {
+                                                 const MachineInstr &MI,
+                                                 int OpIdx) const {
+  assert(OpIdx == -1);
+
   const MachineOperand &Op = MI.getOperand(1);
   if (MI.getOpcode() == TargetOpcode::G_FCONSTANT)
     MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
@@ -2131,11 +2138,21 @@ void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB,
 }
 
 void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB,
-                                                const MachineInstr &MI) const {
-  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+                                                const MachineInstr &MI,
+                                                int OpIdx) const {
+  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+         "Expected G_CONSTANT");
   MIB.addImm(MI.getOperand(1).getCImm()->getValue().countPopulation());
 }
 
+/// This only really exists to satisfy DAG type checking machinery, so is a
+/// no-op here.
+void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB,
+                                                const MachineInstr &MI,
+                                                int OpIdx) const {
+  MIB.addImm(MI.getOperand(OpIdx).getImm());
+}
+
 bool AMDGPUInstructionSelector::isInlineImmediate16(int64_t Imm) const {
   return AMDGPU::isInlinableLiteral16(Imm, STI.hasInv2PiInlineImm());
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 0799ace0867..633c4d35137 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -168,17 +168,20 @@ private:
   InstructionSelector::ComplexRendererFns
   selectDS1Addr1Offset(MachineOperand &Root) const;
 
-  void renderTruncImm32(MachineInstrBuilder &MIB,
-                        const MachineInstr &MI) const;
+  void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
+                        int OpIdx = -1) const;
 
-  void renderNegateImm(MachineInstrBuilder &MIB,
-                       const MachineInstr &MI) const;
+  void renderTruncTImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+                       int OpIdx) const;
 
-  void renderBitcastImm(MachineInstrBuilder &MIB,
-                        const MachineInstr &MI) const;
+  void renderNegateImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+                       int OpIdx) const;
 
-  void renderPopcntImm(MachineInstrBuilder &MIB,
-                       const MachineInstr &MI) const;
+  void renderBitcastImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+                        int OpIdx) const;
+
+  void renderPopcntImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+                       int OpIdx) const;
 
   bool isInlineImmediate16(int64_t Imm) const;
   bool isInlineImmediate32(int64_t Imm) const;
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index f4e50e3a15e..fe7faca8b15 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -619,7 +619,7 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
 
 def : GCNPat <
   (int_amdgcn_ds_swizzle i32:$src, timm:$offset16),
-  (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16imm $offset16), (i1 0))
+  (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16timm $offset16), (i1 0))
 >;
 
 class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 1518beafc7a..85e8d0582dc 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -677,6 +677,10 @@ def as_i16imm : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
 }]>;
 
+def as_i16timm : SDNodeXForm<timm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
+}]>;
+
 def as_i32imm: SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
 }]>;
-- 
cgit v1.2.3