diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2020-01-08 12:53:15 -0500 |
---|---|---|
committer | Matt Arsenault <arsenm2@gmail.com> | 2020-01-09 17:37:52 -0500 |
commit | b4a647449fa01bd4e29bce5afef51770cddec664 (patch) | |
tree | 2b73e8bf2fb3b3e6a18121a77c11378fc11c3fb5 /llvm/lib | |
parent | 0ea3c7291fb8d463d9c7ae6aaec7a432ef366a51 (diff) | |
download | bcm5719-llvm-b4a647449fa01bd4e29bce5afef51770cddec664.tar.gz bcm5719-llvm-b4a647449fa01bd4e29bce5afef51770cddec664.zip |
TableGen/GlobalISel: Add way for SDNodeXForm to work on timm
The current implementation assumes there is an instruction associated
with the transform, but this is not the case for
timm/TargetConstant/immarg values. These transforms should directly
operate on a specific MachineOperand in the source
instruction. TableGen would assert if you attempted to define an
equivalent GISDNodeXFormEquiv using timm when it failed to find the
instruction matcher.
Specially recognize SDNodeXForms on timm, and pass the operand index
to the render function.
Ideally this would be a separate render function type that looks like
void renderFoo(MachineInstrBuilder, const MachineOperand&), but this
proved to be somewhat mechanically painful. Add an optional operand
index which will only be passed if the transform should only look at
the one source operand.
Theoretically it would also be possible to only ever pass the
MachineOperand, and the existing renderers would check the parent. I
think that would be somewhat ugly for the standard usage which may
want to inspect other operands, and I also think MachineOperand should
eventually not carry a pointer to the parent instruction.
Use it in one sample pattern. This isn't a great example, since the
transform exists to satisfy DAG type constraints. This could also be
avoided by just changing the MachineInstr's arbitrary choice of
operand type from i16 to i32. Other patterns have nontrivial uses, but
this serves as the simplest example.
One flaw this still has is if you try to use an SDNodeXForm defined
for imm, but the source pattern uses timm, you still see the "Failed
to lookup instruction" assert. However, there is now a way to avoid
it.
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp | 25 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 31 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 19 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/DSInstructions.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstructionSelector.cpp | 15 |
7 files changed, 68 insertions, 31 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 45075646444..bf2a23fb26e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -260,9 +260,12 @@ private: MachineIRBuilder &MIB) const; ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; - void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const; - void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const; - void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const; + void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx = -1) const; + void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, + int OpIdx = -1) const; + void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, + int OpIdx = -1) const; // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. void materializeLargeCMVal(MachineInstr &I, const Value *V, @@ -4851,25 +4854,29 @@ AArch64InstructionSelector::selectArithExtendedRegister( } void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, - const MachineInstr &MI) const { + const MachineInstr &MI, + int OpIdx) const { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); - assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI); assert(CstVal && "Expected constant value"); MIB.addImm(CstVal.getValue()); } void AArch64InstructionSelector::renderLogicalImm32( - MachineInstrBuilder &MIB, const MachineInstr &I) const { - assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { + assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32); MIB.addImm(Enc); } void AArch64InstructionSelector::renderLogicalImm64( - MachineInstrBuilder &MIB, const MachineInstr &I) const { - assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { + assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64); MIB.addImm(Enc); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index f9983693a99..d420aa02ac2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -209,6 +209,9 @@ foreach Ty = [i64, p0, p1, p4] in { def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">, GISDNodeXFormEquiv<as_i32timm>; +def gi_as_i16timm : GICustomOperandRenderer<"renderTruncTImm">, + GISDNodeXFormEquiv<as_i16timm>; + def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">, GISDNodeXFormEquiv<NegateImm>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 132c51c9e08..72ccf0df4f3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2106,21 +2106,28 @@ AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const { } void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB, - const MachineInstr &MI) const { - assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + const MachineInstr &MI, + int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), *MRI); assert(CstVal && "Expected constant value"); MIB.addImm(CstVal.getValue()); } void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB, - const MachineInstr &MI) const { - assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + const MachineInstr &MI, + int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue()); } void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB, - const MachineInstr &MI) const { + const MachineInstr &MI, + int OpIdx) const { + assert(OpIdx == -1); + const MachineOperand &Op = MI.getOperand(1); if (MI.getOpcode() == TargetOpcode::G_FCONSTANT) MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); @@ -2131,11 +2138,21 @@ void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB, } void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB, - const MachineInstr &MI) const { - assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + const MachineInstr &MI, + int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); MIB.addImm(MI.getOperand(1).getCImm()->getValue().countPopulation()); } +/// This only really exists to satisfy DAG type checking machinery, so is a +/// no-op here. +void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + MIB.addImm(MI.getOperand(OpIdx).getImm()); +} + bool AMDGPUInstructionSelector::isInlineImmediate16(int64_t Imm) const { return AMDGPU::isInlinableLiteral16(Imm, STI.hasInv2PiInlineImm()); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 0799ace0867..633c4d35137 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -168,17 +168,20 @@ private: InstructionSelector::ComplexRendererFns selectDS1Addr1Offset(MachineOperand &Root) const; - void renderTruncImm32(MachineInstrBuilder &MIB, - const MachineInstr &MI) const; + void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx = -1) const; - void renderNegateImm(MachineInstrBuilder &MIB, - const MachineInstr &MI) const; + void renderTruncTImm(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; - void renderBitcastImm(MachineInstrBuilder &MIB, - const MachineInstr &MI) const; + void renderNegateImm(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; - void renderPopcntImm(MachineInstrBuilder &MIB, - const MachineInstr &MI) const; + void renderBitcastImm(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; + + void renderPopcntImm(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; bool isInlineImmediate16(int64_t Imm) const; bool isInlineImmediate32(int64_t Imm) const; diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index f4e50e3a15e..fe7faca8b15 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -619,7 +619,7 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">; def : GCNPat < (int_amdgcn_ds_swizzle i32:$src, timm:$offset16), - (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16imm $offset16), (i1 0)) + (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16timm $offset16), (i1 0)) >; class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat < diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 1518beafc7a..85e8d0582dc 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -677,6 +677,10 @@ def as_i16imm : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); }]>; +def as_i16timm : SDNodeXForm<timm, [{ + return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); +}]>; + def as_i32imm: SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); }]>; diff --git a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp index 601a529dd41..67816bc2103 100644 --- a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -138,8 +138,10 @@ private: unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank, unsigned Size) const; - void renderVFPF32Imm(MachineInstrBuilder &New, const MachineInstr &Old) const; - void renderVFPF64Imm(MachineInstrBuilder &New, const MachineInstr &Old) const; + void renderVFPF32Imm(MachineInstrBuilder &New, const MachineInstr &Old, + int OpIdx = -1) const; + void renderVFPF64Imm(MachineInstrBuilder &New, const MachineInstr &Old, + int OpIdx = -1) const; #define GET_GLOBALISEL_PREDICATES_DECL #include "ARMGenGlobalISel.inc" @@ -811,9 +813,10 @@ bool ARMInstructionSelector::selectShift(unsigned ShiftOpc, } void ARMInstructionSelector::renderVFPF32Imm( - MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst) const { + MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst, + int OpIdx) const { assert(OldInst.getOpcode() == TargetOpcode::G_FCONSTANT && - "Expected G_FCONSTANT"); + OpIdx == -1 && "Expected G_FCONSTANT"); APFloat FPImmValue = OldInst.getOperand(1).getFPImm()->getValueAPF(); int FPImmEncoding = ARM_AM::getFP32Imm(FPImmValue); @@ -823,9 +826,9 @@ void ARMInstructionSelector::renderVFPF32Imm( } void ARMInstructionSelector::renderVFPF64Imm( - MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst) const { + MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst, int OpIdx) const { assert(OldInst.getOpcode() == TargetOpcode::G_FCONSTANT && - "Expected G_FCONSTANT"); + OpIdx == -1 && "Expected G_FCONSTANT"); APFloat FPImmValue = OldInst.getOperand(1).getFPImm()->getValueAPF(); int FPImmEncoding = ARM_AM::getFP64Imm(FPImmValue); |