diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp | 65 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 42 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MIMGInstructions.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 64 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 6 |
8 files changed, 128 insertions, 78 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index 49447862b60..8156599528c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -23,7 +23,6 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR -#define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" // Pin the vtable to this file. @@ -56,59 +55,6 @@ bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, return (NumLoads <= 16 && (Offset1 - Offset0) < 64); } -static AMDGPU::Channels indexToChannel(unsigned Channel) { - switch (Channel) { - case 1: - return AMDGPU::Channels_1; - case 2: - return AMDGPU::Channels_2; - case 3: - return AMDGPU::Channels_3; - case 4: - return AMDGPU::Channels_4; - default: - llvm_unreachable("invalid MIMG channel"); - } -} - -// FIXME: Need to handle d16 images correctly. -static unsigned rcToChannels(unsigned RCID) { - switch (RCID) { - case AMDGPU::VGPR_32RegClassID: - return 1; - case AMDGPU::VReg_64RegClassID: - return 2; - case AMDGPU::VReg_96RegClassID: - return 3; - case AMDGPU::VReg_128RegClassID: - return 4; - default: - llvm_unreachable("invalid MIMG register class"); - } -} - -int AMDGPUInstrInfo::getMaskedMIMGOp(unsigned Opc, - unsigned NewChannels) const { - AMDGPU::Channels Channel = indexToChannel(NewChannels); - unsigned OrigChannels = rcToChannels(get(Opc).OpInfo[0].RegClass); - if (NewChannels == OrigChannels) - return Opc; - - switch (OrigChannels) { - case 1: - return AMDGPU::getMaskedMIMGOp1(Opc, Channel); - case 2: - return AMDGPU::getMaskedMIMGOp2(Opc, Channel); - case 3: - return AMDGPU::getMaskedMIMGOp3(Opc, Channel); - case 4: - return AMDGPU::getMaskedMIMGOp4(Opc, Channel); - default: - llvm_unreachable("invalid MIMG channel"); - } -} - - // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td enum SIEncodingFamily { SI = 0, @@ -118,17 +64,6 @@ enum SIEncodingFamily { GFX9 = 4 }; -// Wrapper for Tablegen'd function. enum Subtarget is not defined in any -// header files, so we need to wrap it in a function that takes unsigned -// instead. -namespace llvm { -namespace AMDGPU { -static int getMCOpcode(uint16_t Opcode, unsigned Gen) { - return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); -} -} -} - static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) { switch (ST.getGeneration()) { case AMDGPUSubtarget::SOUTHERN_ISLANDS: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 4ab0515d5ca..a9fcd483463 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -22,6 +22,7 @@ #define GET_INSTRINFO_HEADER #include "AMDGPUGenInstrInfo.inc" +#undef GET_INSTRINFO_HEADER namespace llvm { @@ -49,10 +50,6 @@ public: /// Return -1 if the target-specific opcode for the pseudo instruction does /// not exist. If Opcode is not a pseudo instruction, this is identity. int pseudoToMCOpcode(int Opcode) const; - - /// \brief Given a MIMG \p MI that writes any number of channels, return the - /// equivalent opcode that writes \p NewChannels Channels. - int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) const; }; } // End llvm namespace diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index a33670c6403..4a3f2c97517 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -234,6 +234,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, AMDGPU::OpName::src2_modifiers); } + if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { + Res = convertMIMGInst(MI); + } + if (Res && IsSDWA) Res = convertSDWAInst(MI); @@ -260,6 +264,42 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { return MCDisassembler::Success; } +DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { + int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::vdata); + + int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::dmask); + unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; + if (DMask == 0) + return MCDisassembler::Success; + + unsigned ChannelCount = countPopulation(DMask); + if (ChannelCount == 1) + return MCDisassembler::Success; + + int NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), ChannelCount); + assert(NewOpcode != -1 && "could not find matching mimg channel instruction"); + auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; + + // Widen the register to the correct number of enabled channels. + unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); + auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, + &MRI.getRegClass(RCID)); + if (NewVdata == AMDGPU::NoRegister) { + // It's possible to encode this such that the low register + enabled + // components exceeds the register count. + return MCDisassembler::Success; + } + + MI.setOpcode(NewOpcode); + // vaddr will be always appear as a single VGPR. This will look different than + // how it is usually emitted because the number of register components is not + // in the instruction encoding. + MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); + return MCDisassembler::Success; +} + const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { return getContext().getRegisterInfo()-> getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); @@ -786,7 +826,7 @@ static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/, static MCDisassembler *createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new AMDGPUDisassembler(STI, Ctx); + return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo()); } extern "C" void LLVMInitializeAMDGPUDisassembler() { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 18a91356d20..ce396eb68c4 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -17,16 +17,18 @@ #define LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" + #include <algorithm> #include <cstdint> #include <memory> namespace llvm { -class MCContext; class MCInst; class MCOperand; class MCSubtargetInfo; @@ -38,13 +40,16 @@ class Twine; class AMDGPUDisassembler : public MCDisassembler { private: + std::unique_ptr<MCInstrInfo const> const MCII; + const MCRegisterInfo &MRI; mutable ArrayRef<uint8_t> Bytes; mutable uint32_t Literal; mutable bool HasLiteral; public: - AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : - MCDisassembler(STI, Ctx) {} + AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, + MCInstrInfo const *MCII) : + MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()) {} ~AMDGPUDisassembler() override = default; @@ -64,6 +69,7 @@ public: uint64_t Address) const; DecodeStatus convertSDWAInst(MCInst &MI) const; + DecodeStatus convertMIMGInst(MCInst &MI) const; MCOperand decodeOperand_VGPR_32(unsigned Val) const; MCOperand decodeOperand_VS_32(unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 9622bccddf9..30a2df51038 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -63,13 +63,13 @@ multiclass MIMG_NoSampler <bits<7> op, string asm> { class MIMG_Store_Helper <bits<7> op, string asm, RegisterClass data_rc, - RegisterClass addr_rc> : MIMG_Helper < + RegisterClass addr_rc, + string dns = ""> : MIMG_Helper < (outs), (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc, r128:$r128, tfe:$tfe, lwe:$lwe, da:$da), - asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da" - >, MIMGe<op> { + asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da", dns>, MIMGe<op> { let ssamp = 0; let mayLoad = 1; // TableGen requires this for matching with the intrinsics let mayStore = 1; @@ -81,7 +81,8 @@ class MIMG_Store_Helper <bits<7> op, string asm, multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm, RegisterClass data_rc, int channels> { - def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32>, + def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32, + !if(!eq(channels, 1), "AMDGPU", "")>, MIMG_Mask<asm#"_V1", channels>; def _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>, MIMG_Mask<asm#"_V2", channels>; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 92087623cec..d0262105ae8 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6652,7 +6652,8 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node, unsigned BitsSet = countPopulation(NewDmask); const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); - int NewOpcode = TII->getMaskedMIMGOp(Node->getMachineOpcode(), BitsSet); + int NewOpcode = AMDGPU::getMaskedMIMGOp(*TII, + Node->getMachineOpcode(), BitsSet); assert(NewOpcode != -1 && NewOpcode != static_cast<int>(Node->getMachineOpcode()) && "failed to find equivalent MIMG op"); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 5a59e04c849..819a7add0be 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -39,7 +40,9 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #define GET_INSTRINFO_NAMED_OPS +#define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" +#undef GET_INSTRMAP_INFO #undef GET_INSTRINFO_NAMED_OPS namespace { @@ -100,6 +103,66 @@ static cl::opt<bool> EnablePackedInlinableLiterals( namespace AMDGPU { +LLVM_READNONE +static inline Channels indexToChannel(unsigned Channel) { + switch (Channel) { + case 1: + return AMDGPU::Channels_1; + case 2: + return AMDGPU::Channels_2; + case 3: + return AMDGPU::Channels_3; + case 4: + return AMDGPU::Channels_4; + default: + llvm_unreachable("invalid MIMG channel"); + } +} + + +// FIXME: Need to handle d16 images correctly. +static unsigned rcToChannels(unsigned RCID) { + switch (RCID) { + case AMDGPU::VGPR_32RegClassID: + return 1; + case AMDGPU::VReg_64RegClassID: + return 2; + case AMDGPU::VReg_96RegClassID: + return 3; + case AMDGPU::VReg_128RegClassID: + return 4; + default: + llvm_unreachable("invalid MIMG register class"); + } +} + +int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) { + AMDGPU::Channels Channel = AMDGPU::indexToChannel(NewChannels); + unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass); + if (NewChannels == OrigChannels) + return Opc; + + switch (OrigChannels) { + case 1: + return AMDGPU::getMaskedMIMGOp1(Opc, Channel); + case 2: + return AMDGPU::getMaskedMIMGOp2(Opc, Channel); + case 3: + return AMDGPU::getMaskedMIMGOp3(Opc, Channel); + case 4: + return AMDGPU::getMaskedMIMGOp4(Opc, Channel); + default: + llvm_unreachable("invalid MIMG channel"); + } +} + +// Wrapper for Tablegen'd function. enum Subtarget is not defined in any +// header files, so we need to wrap it in a function that takes unsigned +// instead. +int getMCOpcode(uint16_t Opcode, unsigned Gen) { + return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); +} + namespace IsaInfo { IsaVersion getIsaVersion(const FeatureBitset &Features) { @@ -833,6 +896,7 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { return isGCN3Encoding(ST) ? isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); } + } // end namespace AMDGPU } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index c4b7779514f..a215b445378 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -156,6 +156,12 @@ unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); +LLVM_READONLY +int getMaskedMIMGOp(const MCInstrInfo &MII, + unsigned Opc, unsigned NewChannels); +LLVM_READONLY +int getMCOpcode(uint16_t Opcode, unsigned Gen); + void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features); |