From b4a647449fa01bd4e29bce5afef51770cddec664 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 8 Jan 2020 12:53:15 -0500 Subject: TableGen/GlobalISel: Add way for SDNodeXForm to work on timm The current implementation assumes there is an instruction associated with the transform, but this is not the case for timm/TargetConstant/immarg values. These transforms should directly operate on a specific MachineOperand in the source instruction. TableGen would assert if you attempted to define an equivalent GISDNodeXFormEquiv using timm when it failed to find the instruction matcher. Specially recognize SDNodeXForms on timm, and pass the operand index to the render function. Ideally this would be a separate render function type that looks like void renderFoo(MachineInstrBuilder, const MachineOperand&), but this proved to be somewhat mechanically painful. Add an optional operand index which will only be passed if the transform should only look at the one source operand. Theoretically it would also be possible to only ever pass the MachineOperand, and the existing renderers would check the parent. I think that would be somewhat ugly for the standard usage which may want to inspect other operands, and I also think MachineOperand should eventually not carry a pointer to the parent instruction. Use it in one sample pattern. This isn't a great example, since the transform exists to satisfy DAG type constraints. This could also be avoided by just changing the MachineInstr's arbitrary choice of operand type from i16 to i32. Other patterns have nontrivial uses, but this serves as the simplest example. One flaw this still has is if you try to use an SDNodeXForm defined for imm, but the source pattern uses timm, you still see the "Failed to lookup instruction" assert. However, there is now a way to avoid it. --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 3 +++ .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 31 +++++++++++++++++----- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 19 +++++++------ llvm/lib/Target/AMDGPU/DSInstructions.td | 2 +- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 4 +++ 5 files changed, 43 insertions(+), 16 deletions(-) (limited to 'llvm/lib/Target/AMDGPU') diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index f9983693a99..d420aa02ac2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -209,6 +209,9 @@ foreach Ty = [i64, p0, p1, p4] in { def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">, GISDNodeXFormEquiv; +def gi_as_i16timm : GICustomOperandRenderer<"renderTruncTImm">, + GISDNodeXFormEquiv; + def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">, GISDNodeXFormEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 132c51c9e08..72ccf0df4f3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2106,21 +2106,28 @@ AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const { } void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB, - const MachineInstr &MI) const { - assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + const MachineInstr &MI, + int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); Optional CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), *MRI); assert(CstVal && "Expected constant value"); MIB.addImm(CstVal.getValue()); } void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB, - const MachineInstr &MI) const { - assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + const MachineInstr &MI, + int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue()); } void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB, - const MachineInstr &MI) const { + const MachineInstr &MI, + int OpIdx) const { + assert(OpIdx == -1); + const MachineOperand &Op = MI.getOperand(1); if (MI.getOpcode() == TargetOpcode::G_FCONSTANT) MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); @@ -2131,11 +2138,21 @@ void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB, } void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB, - const MachineInstr &MI) const { - assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + const MachineInstr &MI, + int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); MIB.addImm(MI.getOperand(1).getCImm()->getValue().countPopulation()); } +/// This only really exists to satisfy DAG type checking machinery, so is a +/// no-op here. +void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + MIB.addImm(MI.getOperand(OpIdx).getImm()); +} + bool AMDGPUInstructionSelector::isInlineImmediate16(int64_t Imm) const { return AMDGPU::isInlinableLiteral16(Imm, STI.hasInv2PiInlineImm()); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 0799ace0867..633c4d35137 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -168,17 +168,20 @@ private: InstructionSelector::ComplexRendererFns selectDS1Addr1Offset(MachineOperand &Root) const; - void renderTruncImm32(MachineInstrBuilder &MIB, - const MachineInstr &MI) const; + void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx = -1) const; - void renderNegateImm(MachineInstrBuilder &MIB, - const MachineInstr &MI) const; + void renderTruncTImm(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; - void renderBitcastImm(MachineInstrBuilder &MIB, - const MachineInstr &MI) const; + void renderNegateImm(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; - void renderPopcntImm(MachineInstrBuilder &MIB, - const MachineInstr &MI) const; + void renderBitcastImm(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; + + void renderPopcntImm(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; bool isInlineImmediate16(int64_t Imm) const; bool isInlineImmediate32(int64_t Imm) const; diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index f4e50e3a15e..fe7faca8b15 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -619,7 +619,7 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">; def : GCNPat < (int_amdgcn_ds_swizzle i32:$src, timm:$offset16), - (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16imm $offset16), (i1 0)) + (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16timm $offset16), (i1 0)) >; class DSReadPat : GCNPat < diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 1518beafc7a..85e8d0582dc 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -677,6 +677,10 @@ def as_i16imm : SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); }]>; +def as_i16timm : SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); +}]>; + def as_i32imm: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); }]>; -- cgit v1.2.3