TableGen/GlobalISel: Add way for SDNodeXForm to work on timm

The current implementation assumes there is an instruction associated with the transform, but this is not the case for timm/TargetConstant/immarg values. These transforms should directly operate on a specific MachineOperand in the source instruction. TableGen would assert if you attempted to define an equivalent GISDNodeXFormEquiv using timm when it failed to find the instruction matcher. Specially recognize SDNodeXForms on timm, and pass the operand index to the render function. Ideally this would be a separate render function type that looks like void renderFoo(MachineInstrBuilder, const MachineOperand&), but this proved to be somewhat mechanically painful. Add an optional operand index which will only be passed if the transform should only look at the one source operand. Theoretically it would also be possible to only ever pass the MachineOperand, and the existing renderers would check the parent. I think that would be somewhat ugly for the standard usage which may want to inspect other operands, and I also think MachineOperand should eventually not carry a pointer to the parent instruction. Use it in one sample pattern. This isn't a great example, since the transform exists to satisfy DAG type constraints. This could also be avoided by just changing the MachineInstr's arbitrary choice of operand type from i16 to i32. Other patterns have nontrivial uses, but this serves as the simplest example. One flaw this still has is if you try to use an SDNodeXForm defined for imm, but the source pattern uses timm, you still see the "Failed to lookup instruction" assert. However, there is now a way to avoid it.
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2020-01-08 12:53:15 -0500
committer: Matt Arsenault <arsenm2@gmail.com> 2020-01-09 17:37:52 -0500
commit: b4a647449fa01bd4e29bce5afef51770cddec664 (patch)
tree: 2b73e8bf2fb3b3e6a18121a77c11378fc11c3fb5 /llvm/lib
parent: 0ea3c7291fb8d463d9c7ae6aaec7a432ef366a51 (diff)
download: bcm5719-llvm-b4a647449fa01bd4e29bce5afef51770cddec664.tar.gz
bcm5719-llvm-b4a647449fa01bd4e29bce5afef51770cddec664.zip
7 files changed, 68 insertions, 31 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 45075646444..bf2a23fb26e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -260,9 +260,12 @@ private:
                                              MachineIRBuilder &MIB) const;
   ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
 
-  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
-  void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const;
-  void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const;
+  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+                      int OpIdx = -1) const;
+  void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
+                          int OpIdx = -1) const;
+  void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
+                          int OpIdx = -1) const;
 
   // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
   void materializeLargeCMVal(MachineInstr &I, const Value *V,
@@ -4851,25 +4854,29 @@ AArch64InstructionSelector::selectArithExtendedRegister(
 }
 
 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
-                                                const MachineInstr &MI) const {
+                                                const MachineInstr &MI,
+                                                int OpIdx) const {
   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
-  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+         "Expected G_CONSTANT");
   Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
   assert(CstVal && "Expected constant value");
   MIB.addImm(CstVal.getValue());
 }
 
 void AArch64InstructionSelector::renderLogicalImm32(
-    MachineInstrBuilder &MIB, const MachineInstr &I) const {
-  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+  MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
+  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+         "Expected G_CONSTANT");
   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
   MIB.addImm(Enc);
 }
 
 void AArch64InstructionSelector::renderLogicalImm64(
-    MachineInstrBuilder &MIB, const MachineInstr &I) const {
-  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+  MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
+  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+         "Expected G_CONSTANT");
   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
   MIB.addImm(Enc);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index f9983693a99..d420aa02ac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -209,6 +209,9 @@ foreach Ty = [i64, p0, p1, p4] in {
 def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">,
   GISDNodeXFormEquiv<as_i32timm>;
 
+def gi_as_i16timm : GICustomOperandRenderer<"renderTruncTImm">,
+  GISDNodeXFormEquiv<as_i16timm>;
+
 def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">,
   GISDNodeXFormEquiv<NegateImm>;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 132c51c9e08..72ccf0df4f3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2106,21 +2106,28 @@ AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
 }
 
 void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
-                                                 const MachineInstr &MI) const {
-  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+                                                 const MachineInstr &MI,
+                                                 int OpIdx) const {
+  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+         "Expected G_CONSTANT");
   Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), *MRI);
   assert(CstVal && "Expected constant value");
   MIB.addImm(CstVal.getValue());
 }
 
 void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB,
-                                                const MachineInstr &MI) const {
-  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+                                                const MachineInstr &MI,
+                                                int OpIdx) const {
+  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+         "Expected G_CONSTANT");
   MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue());
 }
 
 void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB,
-                                                 const MachineInstr &MI) const {
+                                                 const MachineInstr &MI,
+                                                 int OpIdx) const {
+  assert(OpIdx == -1);
+
   const MachineOperand &Op = MI.getOperand(1);
   if (MI.getOpcode() == TargetOpcode::G_FCONSTANT)
     MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
@@ -2131,11 +2138,21 @@ void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB,
 }
 
 void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB,
-                                                const MachineInstr &MI) const {
-  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+                                                const MachineInstr &MI,
+                                                int OpIdx) const {
+  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+         "Expected G_CONSTANT");
   MIB.addImm(MI.getOperand(1).getCImm()->getValue().countPopulation());
 }
 
+/// This only really exists to satisfy DAG type checking machinery, so is a
+/// no-op here.
+void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB,
+                                                const MachineInstr &MI,
+                                                int OpIdx) const {
+  MIB.addImm(MI.getOperand(OpIdx).getImm());
+}
+
 bool AMDGPUInstructionSelector::isInlineImmediate16(int64_t Imm) const {
   return AMDGPU::isInlinableLiteral16(Imm, STI.hasInv2PiInlineImm());
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 0799ace0867..633c4d35137 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -168,17 +168,20 @@ private:
   InstructionSelector::ComplexRendererFns
   selectDS1Addr1Offset(MachineOperand &Root) const;
 
-  void renderTruncImm32(MachineInstrBuilder &MIB,
-                        const MachineInstr &MI) const;
+  void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
+                        int OpIdx = -1) const;
 
-  void renderNegateImm(MachineInstrBuilder &MIB,
-                       const MachineInstr &MI) const;
+  void renderTruncTImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+                       int OpIdx) const;
 
-  void renderBitcastImm(MachineInstrBuilder &MIB,
-                        const MachineInstr &MI) const;
+  void renderNegateImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+                       int OpIdx) const;
 
-  void renderPopcntImm(MachineInstrBuilder &MIB,
-                       const MachineInstr &MI) const;
+  void renderBitcastImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+                        int OpIdx) const;
+
+  void renderPopcntImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+                       int OpIdx) const;
 
   bool isInlineImmediate16(int64_t Imm) const;
   bool isInlineImmediate32(int64_t Imm) const;
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index f4e50e3a15e..fe7faca8b15 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -619,7 +619,7 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
 
 def : GCNPat <
   (int_amdgcn_ds_swizzle i32:$src, timm:$offset16),
-  (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16imm $offset16), (i1 0))
+  (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16timm $offset16), (i1 0))
 >;
 
 class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 1518beafc7a..85e8d0582dc 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -677,6 +677,10 @@ def as_i16imm : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
 }]>;
 
+def as_i16timm : SDNodeXForm<timm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
+}]>;
+
 def as_i32imm: SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
 }]>;
diff --git a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index 601a529dd41..67816bc2103 100644
--- a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -138,8 +138,10 @@ private:
   unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
                                  unsigned Size) const;
 
-  void renderVFPF32Imm(MachineInstrBuilder &New, const MachineInstr &Old) const;
-  void renderVFPF64Imm(MachineInstrBuilder &New, const MachineInstr &Old) const;
+  void renderVFPF32Imm(MachineInstrBuilder &New, const MachineInstr &Old,
+                       int OpIdx = -1) const;
+  void renderVFPF64Imm(MachineInstrBuilder &New, const MachineInstr &Old,
+                       int OpIdx = -1) const;
 
 #define GET_GLOBALISEL_PREDICATES_DECL
 #include "ARMGenGlobalISel.inc"
@@ -811,9 +813,10 @@ bool ARMInstructionSelector::selectShift(unsigned ShiftOpc,
 }
 
 void ARMInstructionSelector::renderVFPF32Imm(
-    MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst) const {
+  MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst,
+  int OpIdx) const {
   assert(OldInst.getOpcode() == TargetOpcode::G_FCONSTANT &&
-         "Expected G_FCONSTANT");
+         OpIdx == -1 && "Expected G_FCONSTANT");
 
   APFloat FPImmValue = OldInst.getOperand(1).getFPImm()->getValueAPF();
   int FPImmEncoding = ARM_AM::getFP32Imm(FPImmValue);
@@ -823,9 +826,9 @@ void ARMInstructionSelector::renderVFPF32Imm(
 }
 
 void ARMInstructionSelector::renderVFPF64Imm(
-    MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst) const {
+  MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst, int OpIdx) const {
   assert(OldInst.getOpcode() == TargetOpcode::G_FCONSTANT &&
-         "Expected G_FCONSTANT");
+         OpIdx == -1 && "Expected G_FCONSTANT");
 
   APFloat FPImmValue = OldInst.getOperand(1).getFPImm()->getValueAPF();
   int FPImmEncoding = ARM_AM::getFP64Imm(FPImmValue);
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2020-01-08 12:53:15 -0500
committer	Matt Arsenault <arsenm2@gmail.com>	2020-01-09 17:37:52 -0500
commit	b4a647449fa01bd4e29bce5afef51770cddec664 (patch)
tree	2b73e8bf2fb3b3e6a18121a77c11378fc11c3fb5 /llvm/lib
parent	0ea3c7291fb8d463d9c7ae6aaec7a432ef366a51 (diff)
download	bcm5719-llvm-b4a647449fa01bd4e29bce5afef51770cddec664.tar.gz bcm5719-llvm-b4a647449fa01bd4e29bce5afef51770cddec664.zip